Completed
Push — master ( 17afdd...b3e3d0 )
by Lars
03:14
created

UTF8::str_iends_with()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 8

Duplication

Lines 15
Ratio 100 %

Code Coverage

Tests 8
CRAP Score 3.0123

Importance

Changes 0
Metric Value
dl 15
loc 15
ccs 8
cts 9
cp 0.8889
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 8
nc 3
nop 2
crap 3.0123
1
<?php
2
3
namespace voku\helper;
4
5
use Symfony\Polyfill\Intl\Grapheme\Grapheme;
6
use Symfony\Polyfill\Xml\Xml;
7
8
/**
9
 * UTF8-Helper-Class
10
 *
11
 * @package voku\helper
12
 */
13
final class UTF8
14
{
15
  /**
16
   * @var array
17
   */
18
  private static $win1252ToUtf8 = array(
19
      128 => "\xe2\x82\xac", // EURO SIGN
20
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
21
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
22
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
23
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
24
      134 => "\xe2\x80\xa0", // DAGGER
25
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
26
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
27
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
28
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
29
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
30
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
31
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
32
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
33
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
34
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
35
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
36
      149 => "\xe2\x80\xa2", // BULLET
37
      150 => "\xe2\x80\x93", // EN DASH
38
      151 => "\xe2\x80\x94", // EM DASH
39
      152 => "\xcb\x9c", // SMALL TILDE
40
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
41
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
42
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
43
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
44
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
45
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
46
  );
47
48
  /**
49
   * @var array
50
   */
51
  private static $cp1252ToUtf8 = array(
52
      '€' => '€',
53
      '‚' => '‚',
54
      'ƒ' => 'ƒ',
55
      '„' => '„',
56
      '…' => '…',
57
      '†' => '†',
58
      '‡' => '‡',
59
      'ˆ' => 'ˆ',
60
      '‰' => '‰',
61
      'Š' => 'Š',
62
      '‹' => '‹',
63
      'Œ' => 'Œ',
64
      'Ž' => 'Ž',
65
      '‘' => '‘',
66
      '’' => '’',
67
      '“' => '“',
68
      '”' => '”',
69
      '•' => '•',
70
      '–' => '–',
71
      '—' => '—',
72
      '˜' => '˜',
73
      '™' => '™',
74
      'š' => 'š',
75
      '›' => '›',
76
      'œ' => 'œ',
77
      'ž' => 'ž',
78
      'Ÿ' => 'Ÿ',
79
  );
80
81
  /**
82
   * Bom => Byte-Length
83
   *
84
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
85
   *
86
   * @var array
87
   */
88
  private static $bom = array(
89
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
90
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
91
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
92
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
93
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
94
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
96
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
0 ignored issues
show
Unused Code Comprehensibility introduced by
36% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
97
  );
98
99
  /**
100
   * Numeric code point => UTF-8 Character
101
   *
102
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
103
   *
104
   * @var array
105
   */
106
  private static $whitespace = array(
107
    // NUL Byte
108
    0     => "\x0",
109
    // Tab
110
    9     => "\x9",
111
    // New Line
112
    10    => "\xa",
113
    // Vertical Tab
114
    11    => "\xb",
115
    // Carriage Return
116
    13    => "\xd",
117
    // Ordinary Space
118
    32    => "\x20",
119
    // NO-BREAK SPACE
120
    160   => "\xc2\xa0",
121
    // OGHAM SPACE MARK
122
    5760  => "\xe1\x9a\x80",
123
    // MONGOLIAN VOWEL SEPARATOR
124
    6158  => "\xe1\xa0\x8e",
125
    // EN QUAD
126
    8192  => "\xe2\x80\x80",
127
    // EM QUAD
128
    8193  => "\xe2\x80\x81",
129
    // EN SPACE
130
    8194  => "\xe2\x80\x82",
131
    // EM SPACE
132
    8195  => "\xe2\x80\x83",
133
    // THREE-PER-EM SPACE
134
    8196  => "\xe2\x80\x84",
135
    // FOUR-PER-EM SPACE
136
    8197  => "\xe2\x80\x85",
137
    // SIX-PER-EM SPACE
138
    8198  => "\xe2\x80\x86",
139
    // FIGURE SPACE
140
    8199  => "\xe2\x80\x87",
141
    // PUNCTUATION SPACE
142
    8200  => "\xe2\x80\x88",
143
    // THIN SPACE
144
    8201  => "\xe2\x80\x89",
145
    //HAIR SPACE
146
    8202  => "\xe2\x80\x8a",
147
    // LINE SEPARATOR
148
    8232  => "\xe2\x80\xa8",
149
    // PARAGRAPH SEPARATOR
150
    8233  => "\xe2\x80\xa9",
151
    // NARROW NO-BREAK SPACE
152
    8239  => "\xe2\x80\xaf",
153
    // MEDIUM MATHEMATICAL SPACE
154
    8287  => "\xe2\x81\x9f",
155
    // IDEOGRAPHIC SPACE
156
    12288 => "\xe3\x80\x80",
157
  );
158
159
  /**
160
   * @var array
161
   */
162
  private static $whitespaceTable = array(
163
      'SPACE'                     => "\x20",
164
      'NO-BREAK SPACE'            => "\xc2\xa0",
165
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
166
      'EN QUAD'                   => "\xe2\x80\x80",
167
      'EM QUAD'                   => "\xe2\x80\x81",
168
      'EN SPACE'                  => "\xe2\x80\x82",
169
      'EM SPACE'                  => "\xe2\x80\x83",
170
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
171
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
172
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
173
      'FIGURE SPACE'              => "\xe2\x80\x87",
174
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
175
      'THIN SPACE'                => "\xe2\x80\x89",
176
      'HAIR SPACE'                => "\xe2\x80\x8a",
177
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
178
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
179
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
180
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
181
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
182
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
183
  );
184
185
  /**
186
   * bidirectional text chars
187
   *
188
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
189
   *
190
   * @var array
191
   */
192
  private static $bidiUniCodeControlsTable = array(
193
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
194
    8234 => "\xE2\x80\xAA",
195
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
196
    8235 => "\xE2\x80\xAB",
197
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
198
    8236 => "\xE2\x80\xAC",
199
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
200
    8237 => "\xE2\x80\xAD",
201
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
202
    8238 => "\xE2\x80\xAE",
203
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
204
    8294 => "\xE2\x81\xA6",
205
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
206
    8295 => "\xE2\x81\xA7",
207
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
208
    8296 => "\xE2\x81\xA8",
209
    // POP DIRECTIONAL ISOLATE
210
    8297 => "\xE2\x81\xA9",
211
  );
212
213
  /**
214
   * @var array
215
   */
216
  private static $commonCaseFold = array(
217
      'ſ'            => 's',
218
      "\xCD\x85"     => 'ι',
219
      'ς'            => 'σ',
220
      "\xCF\x90"     => 'β',
221
      "\xCF\x91"     => 'θ',
222
      "\xCF\x95"     => 'φ',
223
      "\xCF\x96"     => 'π',
224
      "\xCF\xB0"     => 'κ',
225
      "\xCF\xB1"     => 'ρ',
226
      "\xCF\xB5"     => 'ε',
227
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
228
      "\xE1\xBE\xBE" => 'ι',
229
  );
230
231
  /**
232
   * @var array
233
   */
234
  private static $brokenUtf8ToUtf8 = array(
235
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
236
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
237
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
238
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
239
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
240
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
241
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
242
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
243
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
244
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
245
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
246
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
247
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
248
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
249
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
250
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
251
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
252
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
253
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
254
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
255
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
256
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
257
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
258
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
259
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
260
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
261
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
262
      'ü'       => 'ü',
263
      'ä'       => 'ä',
264
      'ö'       => 'ö',
265
      'Ö'       => 'Ö',
266
      'ß'       => 'ß',
267
      'Ã '       => 'à',
268
      'á'       => 'á',
269
      'â'       => 'â',
270
      'ã'       => 'ã',
271
      'ù'       => 'ù',
272
      'ú'       => 'ú',
273
      'û'       => 'û',
274
      'Ù'       => 'Ù',
275
      'Ú'       => 'Ú',
276
      'Û'       => 'Û',
277
      'Ü'       => 'Ü',
278
      'ò'       => 'ò',
279
      'ó'       => 'ó',
280
      'ô'       => 'ô',
281
      'è'       => 'è',
282
      'é'       => 'é',
283
      'ê'       => 'ê',
284
      'ë'       => 'ë',
285
      'À'       => 'À',
286
      'Á'       => 'Á',
287
      'Â'       => 'Â',
288
      'Ã'       => 'Ã',
289
      'Ä'       => 'Ä',
290
      'Ã…'       => 'Å',
291
      'Ç'       => 'Ç',
292
      'È'       => 'È',
293
      'É'       => 'É',
294
      'Ê'       => 'Ê',
295
      'Ë'       => 'Ë',
296
      'ÃŒ'       => 'Ì',
297
      'Í'       => 'Í',
298
      'ÃŽ'       => 'Î',
299
      'Ï'       => 'Ï',
300
      'Ñ'       => 'Ñ',
301
      'Ã’'       => 'Ò',
302
      'Ó'       => 'Ó',
303
      'Ô'       => 'Ô',
304
      'Õ'       => 'Õ',
305
      'Ø'       => 'Ø',
306
      'Ã¥'       => 'å',
307
      'æ'       => 'æ',
308
      'ç'       => 'ç',
309
      'ì'       => 'ì',
310
      'í'       => 'í',
311
      'î'       => 'î',
312
      'ï'       => 'ï',
313
      'ð'       => 'ð',
314
      'ñ'       => 'ñ',
315
      'õ'       => 'õ',
316
      'ø'       => 'ø',
317
      'ý'       => 'ý',
318
      'ÿ'       => 'ÿ',
319
      '€'      => '€',
320
  );
321
322
  /**
323
   * @var array
324
   */
325
  private static $utf8ToWin1252 = array(
326
      "\xe2\x82\xac" => "\x80", // EURO SIGN
327
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
328
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
329
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
330
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
331
      "\xe2\x80\xa0" => "\x86", // DAGGER
332
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
333
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
334
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
335
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
336
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
337
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
338
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
339
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
340
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
341
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
342
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
343
      "\xe2\x80\xa2" => "\x95", // BULLET
344
      "\xe2\x80\x93" => "\x96", // EN DASH
345
      "\xe2\x80\x94" => "\x97", // EM DASH
346
      "\xcb\x9c"     => "\x98", // SMALL TILDE
347
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
348
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
349
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
350
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
351
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
352
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
353
  );
354
355
  /**
356
   * @var array
357
   */
358
  private static $utf8MSWord = array(
359
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
360
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
361
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
362
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
363
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
364
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
365
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
366
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
367
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
368
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
369
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
370
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
371
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
372
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
373
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
374
  );
375
376
  private static $iconvEncoding = array(
377
      'ANSI_X3.4-1968',
378
      'ANSI_X3.4-1986',
379
      'ASCII',
380
      'CP367',
381
      'IBM367',
382
      'ISO-IR-6',
383
      'ISO646-US',
384
      'ISO_646.IRV:1991',
385
      'US',
386
      'US-ASCII',
387
      'CSASCII',
388
      'UTF-8',
389
      'ISO-10646-UCS-2',
390
      'UCS-2',
391
      'CSUNICODE',
392
      'UCS-2BE',
393
      'UNICODE-1-1',
394
      'UNICODEBIG',
395
      'CSUNICODE11',
396
      'UCS-2LE',
397
      'UNICODELITTLE',
398
      'ISO-10646-UCS-4',
399
      'UCS-4',
400
      'CSUCS4',
401
      'UCS-4BE',
402
      'UCS-4LE',
403
      'UTF-16',
404
      'UTF-16BE',
405
      'UTF-16LE',
406
      'UTF-32',
407
      'UTF-32BE',
408
      'UTF-32LE',
409
      'UNICODE-1-1-UTF-7',
410
      'UTF-7',
411
      'CSUNICODE11UTF7',
412
      'UCS-2-INTERNAL',
413
      'UCS-2-SWAPPED',
414
      'UCS-4-INTERNAL',
415
      'UCS-4-SWAPPED',
416
      'C99',
417
      'JAVA',
418
      'CP819',
419
      'IBM819',
420
      'ISO-8859-1',
421
      'ISO-IR-100',
422
      'ISO8859-1',
423
      'ISO_8859-1',
424
      'ISO_8859-1:1987',
425
      'L1',
426
      'LATIN1',
427
      'CSISOLATIN1',
428
      'ISO-8859-2',
429
      'ISO-IR-101',
430
      'ISO8859-2',
431
      'ISO_8859-2',
432
      'ISO_8859-2:1987',
433
      'L2',
434
      'LATIN2',
435
      'CSISOLATIN2',
436
      'ISO-8859-3',
437
      'ISO-IR-109',
438
      'ISO8859-3',
439
      'ISO_8859-3',
440
      'ISO_8859-3:1988',
441
      'L3',
442
      'LATIN3',
443
      'CSISOLATIN3',
444
      'ISO-8859-4',
445
      'ISO-IR-110',
446
      'ISO8859-4',
447
      'ISO_8859-4',
448
      'ISO_8859-4:1988',
449
      'L4',
450
      'LATIN4',
451
      'CSISOLATIN4',
452
      'CYRILLIC',
453
      'ISO-8859-5',
454
      'ISO-IR-144',
455
      'ISO8859-5',
456
      'ISO_8859-5',
457
      'ISO_8859-5:1988',
458
      'CSISOLATINCYRILLIC',
459
      'ARABIC',
460
      'ASMO-708',
461
      'ECMA-114',
462
      'ISO-8859-6',
463
      'ISO-IR-127',
464
      'ISO8859-6',
465
      'ISO_8859-6',
466
      'ISO_8859-6:1987',
467
      'CSISOLATINARABIC',
468
      'ECMA-118',
469
      'ELOT_928',
470
      'GREEK',
471
      'GREEK8',
472
      'ISO-8859-7',
473
      'ISO-IR-126',
474
      'ISO8859-7',
475
      'ISO_8859-7',
476
      'ISO_8859-7:1987',
477
      'ISO_8859-7:2003',
478
      'CSISOLATINGREEK',
479
      'HEBREW',
480
      'ISO-8859-8',
481
      'ISO-IR-138',
482
      'ISO8859-8',
483
      'ISO_8859-8',
484
      'ISO_8859-8:1988',
485
      'CSISOLATINHEBREW',
486
      'ISO-8859-9',
487
      'ISO-IR-148',
488
      'ISO8859-9',
489
      'ISO_8859-9',
490
      'ISO_8859-9:1989',
491
      'L5',
492
      'LATIN5',
493
      'CSISOLATIN5',
494
      'ISO-8859-10',
495
      'ISO-IR-157',
496
      'ISO8859-10',
497
      'ISO_8859-10',
498
      'ISO_8859-10:1992',
499
      'L6',
500
      'LATIN6',
501
      'CSISOLATIN6',
502
      'ISO-8859-11',
503
      'ISO8859-11',
504
      'ISO_8859-11',
505
      'ISO-8859-13',
506
      'ISO-IR-179',
507
      'ISO8859-13',
508
      'ISO_8859-13',
509
      'L7',
510
      'LATIN7',
511
      'ISO-8859-14',
512
      'ISO-CELTIC',
513
      'ISO-IR-199',
514
      'ISO8859-14',
515
      'ISO_8859-14',
516
      'ISO_8859-14:1998',
517
      'L8',
518
      'LATIN8',
519
      'ISO-8859-15',
520
      'ISO-IR-203',
521
      'ISO8859-15',
522
      'ISO_8859-15',
523
      'ISO_8859-15:1998',
524
      'LATIN-9',
525
      'ISO-8859-16',
526
      'ISO-IR-226',
527
      'ISO8859-16',
528
      'ISO_8859-16',
529
      'ISO_8859-16:2001',
530
      'L10',
531
      'LATIN10',
532
      'KOI8-R',
533
      'CSKOI8R',
534
      'KOI8-U',
535
      'KOI8-RU',
536
      'CP1250',
537
      'MS-EE',
538
      'WINDOWS-1250',
539
      'CP1251',
540
      'MS-CYRL',
541
      'WINDOWS-1251',
542
      'CP1252',
543
      'MS-ANSI',
544
      'WINDOWS-1252',
545
      'CP1253',
546
      'MS-GREEK',
547
      'WINDOWS-1253',
548
      'CP1254',
549
      'MS-TURK',
550
      'WINDOWS-1254',
551
      'CP1255',
552
      'MS-HEBR',
553
      'WINDOWS-1255',
554
      'CP1256',
555
      'MS-ARAB',
556
      'WINDOWS-1256',
557
      'CP1257',
558
      'WINBALTRIM',
559
      'WINDOWS-1257',
560
      'CP1258',
561
      'WINDOWS-1258',
562
      '850',
563
      'CP850',
564
      'IBM850',
565
      'CSPC850MULTILINGUAL',
566
      '862',
567
      'CP862',
568
      'IBM862',
569
      'CSPC862LATINHEBREW',
570
      '866',
571
      'CP866',
572
      'IBM866',
573
      'CSIBM866',
574
      'MAC',
575
      'MACINTOSH',
576
      'MACROMAN',
577
      'CSMACINTOSH',
578
      'MACCENTRALEUROPE',
579
      'MACICELAND',
580
      'MACCROATIAN',
581
      'MACROMANIA',
582
      'MACCYRILLIC',
583
      'MACUKRAINE',
584
      'MACGREEK',
585
      'MACTURKISH',
586
      'MACHEBREW',
587
      'MACARABIC',
588
      'MACTHAI',
589
      'HP-ROMAN8',
590
      'R8',
591
      'ROMAN8',
592
      'CSHPROMAN8',
593
      'NEXTSTEP',
594
      'ARMSCII-8',
595
      'GEORGIAN-ACADEMY',
596
      'GEORGIAN-PS',
597
      'KOI8-T',
598
      'CP154',
599
      'CYRILLIC-ASIAN',
600
      'PT154',
601
      'PTCP154',
602
      'CSPTCP154',
603
      'KZ-1048',
604
      'RK1048',
605
      'STRK1048-2002',
606
      'CSKZ1048',
607
      'MULELAO-1',
608
      'CP1133',
609
      'IBM-CP1133',
610
      'ISO-IR-166',
611
      'TIS-620',
612
      'TIS620',
613
      'TIS620-0',
614
      'TIS620.2529-1',
615
      'TIS620.2533-0',
616
      'TIS620.2533-1',
617
      'CP874',
618
      'WINDOWS-874',
619
      'VISCII',
620
      'VISCII1.1-1',
621
      'CSVISCII',
622
      'TCVN',
623
      'TCVN-5712',
624
      'TCVN5712-1',
625
      'TCVN5712-1:1993',
626
      'ISO-IR-14',
627
      'ISO646-JP',
628
      'JIS_C6220-1969-RO',
629
      'JP',
630
      'CSISO14JISC6220RO',
631
      'JISX0201-1976',
632
      'JIS_X0201',
633
      'X0201',
634
      'CSHALFWIDTHKATAKANA',
635
      'ISO-IR-87',
636
      'JIS0208',
637
      'JIS_C6226-1983',
638
      'JIS_X0208',
639
      'JIS_X0208-1983',
640
      'JIS_X0208-1990',
641
      'X0208',
642
      'CSISO87JISX0208',
643
      'ISO-IR-159',
644
      'JIS_X0212',
645
      'JIS_X0212-1990',
646
      'JIS_X0212.1990-0',
647
      'X0212',
648
      'CSISO159JISX02121990',
649
      'CN',
650
      'GB_1988-80',
651
      'ISO-IR-57',
652
      'ISO646-CN',
653
      'CSISO57GB1988',
654
      'CHINESE',
655
      'GB_2312-80',
656
      'ISO-IR-58',
657
      'CSISO58GB231280',
658
      'CN-GB-ISOIR165',
659
      'ISO-IR-165',
660
      'ISO-IR-149',
661
      'KOREAN',
662
      'KSC_5601',
663
      'KS_C_5601-1987',
664
      'KS_C_5601-1989',
665
      'CSKSC56011987',
666
      'EUC-JP',
667
      'EUCJP',
668
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
669
      'CSEUCPKDFMTJAPANESE',
670
      'MS_KANJI',
671
      'SHIFT-JIS',
672
      'SHIFT_JIS',
673
      'SJIS',
674
      'CSSHIFTJIS',
675
      'CP932',
676
      'ISO-2022-JP',
677
      'CSISO2022JP',
678
      'ISO-2022-JP-1',
679
      'ISO-2022-JP-2',
680
      'CSISO2022JP2',
681
      'CN-GB',
682
      'EUC-CN',
683
      'EUCCN',
684
      'GB2312',
685
      'CSGB2312',
686
      'GBK',
687
      'CP936',
688
      'MS936',
689
      'WINDOWS-936',
690
      'GB18030',
691
      'ISO-2022-CN',
692
      'CSISO2022CN',
693
      'ISO-2022-CN-EXT',
694
      'HZ',
695
      'HZ-GB-2312',
696
      'EUC-TW',
697
      'EUCTW',
698
      'CSEUCTW',
699
      'BIG-5',
700
      'BIG-FIVE',
701
      'BIG5',
702
      'BIGFIVE',
703
      'CN-BIG5',
704
      'CSBIG5',
705
      'CP950',
706
      'BIG5-HKSCS:1999',
707
      'BIG5-HKSCS:2001',
708
      'BIG5-HKSCS',
709
      'BIG5-HKSCS:2004',
710
      'BIG5HKSCS',
711
      'EUC-KR',
712
      'EUCKR',
713
      'CSEUCKR',
714
      'CP949',
715
      'UHC',
716
      'CP1361',
717
      'JOHAB',
718
      'ISO-2022-KR',
719
      'CSISO2022KR',
720
      'CP856',
721
      'CP922',
722
      'CP943',
723
      'CP1046',
724
      'CP1124',
725
      'CP1129',
726
      'CP1161',
727
      'IBM-1161',
728
      'IBM1161',
729
      'CSIBM1161',
730
      'CP1162',
731
      'IBM-1162',
732
      'IBM1162',
733
      'CSIBM1162',
734
      'CP1163',
735
      'IBM-1163',
736
      'IBM1163',
737
      'CSIBM1163',
738
      'DEC-KANJI',
739
      'DEC-HANYU',
740
      '437',
741
      'CP437',
742
      'IBM437',
743
      'CSPC8CODEPAGE437',
744
      'CP737',
745
      'CP775',
746
      'IBM775',
747
      'CSPC775BALTIC',
748
      '852',
749
      'CP852',
750
      'IBM852',
751
      'CSPCP852',
752
      'CP853',
753
      '855',
754
      'CP855',
755
      'IBM855',
756
      'CSIBM855',
757
      '857',
758
      'CP857',
759
      'IBM857',
760
      'CSIBM857',
761
      'CP858',
762
      '860',
763
      'CP860',
764
      'IBM860',
765
      'CSIBM860',
766
      '861',
767
      'CP-IS',
768
      'CP861',
769
      'IBM861',
770
      'CSIBM861',
771
      '863',
772
      'CP863',
773
      'IBM863',
774
      'CSIBM863',
775
      'CP864',
776
      'IBM864',
777
      'CSIBM864',
778
      '865',
779
      'CP865',
780
      'IBM865',
781
      'CSIBM865',
782
      '869',
783
      'CP-GR',
784
      'CP869',
785
      'IBM869',
786
      'CSIBM869',
787
      'CP1125',
788
      'EUC-JISX0213',
789
      'SHIFT_JISX0213',
790
      'ISO-2022-JP-3',
791
      'BIG5-2003',
792
      'ISO-IR-230',
793
      'TDS565',
794
      'ATARI',
795
      'ATARIST',
796
      'RISCOS-LATIN1',
797
  );
798
799
  /**
800
   * @var array
801
   */
802
  private static $support = array();
803
804
  /**
805
   * __construct()
806
   */
807 1
  public function __construct()
808
  {
809 1
    self::checkForSupport();
810 1
  }
811
812
  /**
813
   * Return the character at the specified position: $str[1] like functionality.
814
   *
815
   * @param string $str <p>A UTF-8 string.</p>
816
   * @param int    $pos <p>The position of character to return.</p>
817
   *
818
   * @return string <p>Single Multi-Byte character.</p>
819
   */
820 2
  public static function access($str, $pos)
821
  {
822 2
    return self::substr($str, $pos, 1);
823
  }
824
825
  /**
826
   * Prepends UTF-8 BOM character to the string and returns the whole string.
827
   *
828
   * INFO: If BOM already existed there, the Input string is returned.
829
   *
830
   * @param string $str <p>The input string.</p>
831
   *
832
   * @return string <p>The output string that contains BOM.</p>
833
   */
834 1
  public static function add_bom_to_string($str)
835
  {
836 1
    if (self::string_has_bom($str) === false) {
837 1
      $str = self::bom() . $str;
838 1
    }
839
840 1
    return $str;
841
  }
842
843
  /**
844
   * Convert binary into an string.
845
   *
846
   * @param mixed $bin 1|0
847
   *
848
   * @return string
849
   */
850 1
  public static function binary_to_str($bin)
851
  {
852 1
    return pack('H*', base_convert($bin, 2, 16));
853
  }
854
855
  /**
856
   * Returns the UTF-8 Byte Order Mark Character.
857
   *
858
   * @return string UTF-8 Byte Order Mark
859
   */
860 2
  public static function bom()
861
  {
862 2
    return "\xEF\xBB\xBF";
863
  }
864
865
  /**
866
   * @alias of UTF8::chr_map()
867
   * @see   UTF8::chr_map()
868
   *
869
   * @param string|array $callback
870
   * @param string       $str
871
   *
872
   * @return array
873
   */
874 1
  public static function callback($callback, $str)
875
  {
876 1
    return self::chr_map($callback, $str);
877
  }
878
879
  /**
880
   * This method will auto-detect your server environment for UTF-8 support.
881
   *
882
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
883
   */
884 2
  public static function checkForSupport()
885
  {
886 2
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
887
888 1
      self::$support['already_checked_via_portable_utf8'] = true;
889
890 1
      self::$support['mbstring'] = self::mbstring_loaded();
891 1
      self::$support['iconv'] = self::iconv_loaded();
892 1
      self::$support['intl'] = self::intl_loaded();
893 1
      self::$support['intlChar'] = self::intlChar_loaded();
894 1
      self::$support['pcre_utf8'] = self::pcre_utf8_support();
895 1
    }
896 2
  }
897
898
  /**
899
   * Generates a UTF-8 encoded character from the given code point.
900
   *
901
   * INFO: opposite to UTF8::ord()
902
   *
903
   * @param int    $code_point <p>The code point for which to generate a character.</p>
904
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
905
   *
906
   * @return string|null <p>Multi-Byte character, returns null on failure to encode.</p>
907 9
   */
908
  public static function chr($code_point, $encoding = 'UTF-8')
909 9
  {
910 9
    $i = (int)$code_point;
911 1
    if ($i !== $code_point) {
912
      return null;
913
    }
914 9
915
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
916
      self::checkForSupport();
917
    }
918 9
919
    if ($encoding !== 'UTF-8') {
920
      $encoding = self::normalize_encoding($encoding);
921
    } elseif (self::$support['intlChar'] === true) {
922
      return \IntlChar::chr($code_point);
923 9
    }
924 9
925 8
    // use static cache, if there is no support for "IntlChar"
926
    static $cache = array();
927
    $cacheKey = $code_point . $encoding;
928
    if (isset($cache[$cacheKey]) === true) {
929 8
      return $cache[$cacheKey];
930 6
    }
931
932
    if (0x80 > $code_point %= 0x200000) {
933 7
      $str = chr($code_point);
934 6
    } elseif (0x800 > $code_point) {
935 6
      $str = chr(0xC0 | $code_point >> 6) .
936
             chr(0x80 | $code_point & 0x3F);
937
    } elseif (0x10000 > $code_point) {
938 7
      $str = chr(0xE0 | $code_point >> 12) .
939 7
             chr(0x80 | $code_point >> 6 & 0x3F) .
940 7
             chr(0x80 | $code_point & 0x3F);
941 7
    } else {
942
      $str = chr(0xF0 | $code_point >> 18) .
943
             chr(0x80 | $code_point >> 12 & 0x3F) .
944 1
             chr(0x80 | $code_point >> 6 & 0x3F) .
945 1
             chr(0x80 | $code_point & 0x3F);
946 1
    }
947 1
948 1
    if ($encoding !== 'UTF-8') {
949
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
950
    }
951
952
    // add into static cache
953
    $cache[$cacheKey] = $str;
954
955
    return $str;
956
  }
957
958
  /**
959
   * Applies callback to all characters of a string.
960
   *
961
   * @param string|array $callback <p>The callback function.</p>
962
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
963 1
   *
964
   * @return array <p>The outcome of callback.</p>
965 1
   */
966
  public static function chr_map($callback, $str)
967 1
  {
968
    $chars = self::split($str);
969
970
    return array_map($callback, $chars);
971
  }
972
973
  /**
974
   * Generates an array of byte length of each character of a Unicode string.
975
   *
976
   * 1 byte => U+0000  - U+007F
977
   * 2 byte => U+0080  - U+07FF
978
   * 3 byte => U+0800  - U+FFFF
979
   * 4 byte => U+10000 - U+10FFFF
980
   *
981
   * @param string $str <p>The original Unicode string.</p>
982 4
   *
983
   * @return array <p>An array of byte lengths of each character.</p>
984 4
   */
985 3
  public static function chr_size_list($str)
986
  {
987
    if (!$str) {
988 4
      return array();
989
    }
990
991
    return array_map('strlen', self::split($str));
992
  }
993
994
  /**
995
   * Get a decimal code representation of a specific character.
996
   *
997
   * @param string $char <p>The input character.</p>
998 2
   *
999
   * @return int
1000 2
   */
1001 2
  public static function chr_to_decimal($char)
1002 2
  {
1003
    $char = (string)$char;
1004 2
    $code = self::ord($char[0]);
1005
    $bytes = 1;
1006 2
1007
    if (!($code & 0x80)) {
1008
      // 0xxxxxxx
1009 2
      return $code;
1010
    }
1011 2
1012 2
    if (($code & 0xe0) === 0xc0) {
1013 2
      // 110xxxxx
1014
      $bytes = 2;
1015 1
      $code &= ~0xc0;
1016 1
    } elseif (($code & 0xf0) === 0xe0) {
1017 1
      // 1110xxxx
1018
      $bytes = 3;
1019
      $code &= ~0xe0;
1020
    } elseif (($code & 0xf8) === 0xf0) {
1021
      // 11110xxx
1022
      $bytes = 4;
1023 2
      $code &= ~0xf0;
1024
    }
1025 2
1026 2
    for ($i = 2; $i <= $bytes; $i++) {
1027
      // 10xxxxxx
1028 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1029
    }
1030
1031
    return $code;
1032
  }
1033
1034
  /**
1035
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1036
   *
1037
   * @param string $char <p>The input character</p>
1038
   * @param string $pfix [optional]
1039 1
   *
1040
   * @return string <p>The code point encoded as U+xxxx<p>
1041 1
   */
1042
  public static function chr_to_hex($char, $pfix = 'U+')
1043
  {
1044
    return self::int_to_hex(self::ord($char), $pfix);
1045
  }
1046
1047
  /**
1048
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1049
   *
1050
   * @param string $body     <p>The original string to be split.</p>
1051
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1052
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1053 1
   *
1054
   * @return string <p>The chunked string</p>
1055 1
   */
1056
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1057
  {
1058
    return implode($end, self::split($body, $chunklen));
1059
  }
1060
1061
  /**
1062
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1063
   *
1064
   * @param string $str                     <p>The string to be sanitized.</p>
1065
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1066
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1067
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1068
   *                                        => "..."</p>
1069
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1070
   *                                        $normalize_whitespace</p>
1071 44
   *
1072
   * @return string <p>Clean UTF-8 encoded string.</p>
1073
   */
1074
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1075
  {
1076
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1077
    // caused connection reset problem on larger strings
1078
1079
    $regx = '/
1080
      (
1081
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1082
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1083
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1084
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1085
        ){1,100}                      # ...one or more times
1086 44
      )
1087 44
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1088
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1089 44
    /x';
1090 44
    $str = preg_replace($regx, '$1', $str);
1091
1092 44
    $str = self::replace_diamond_question_mark($str, '');
1093 17
    $str = self::remove_invisible_characters($str);
1094 17
1095
    if ($normalize_whitespace === true) {
1096 44
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1097 12
    }
1098 12
1099
    if ($normalize_msword === true) {
1100 44
      $str = self::normalize_msword($str);
1101 5
    }
1102 5
1103
    if ($remove_bom === true) {
1104 44
      $str = self::removeBOM($str);
1105
    }
1106
1107
    return $str;
1108
  }
1109
1110
  /**
1111
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1112
   *
1113
   * @param string $str <p>The input string.</p>
1114 4
   *
1115
   * @return string
1116 4
   */
1117
  public static function cleanup($str)
1118 4
  {
1119 1
    $str = (string)$str;
1120
1121
    if (!isset($str[0])) {
1122
      return '';
1123 4
    }
1124
1125
    // fixed ISO <-> UTF-8 Errors
1126
    $str = self::fix_simple_utf8($str);
1127
1128
    // remove all none UTF-8 symbols
1129
    // && remove diamond question mark (�)
1130 4
    // && remove remove invisible characters (e.g. "\0")
1131
    // && remove BOM
1132 4
    // && normalize whitespace chars (but keep non-breaking-spaces)
1133
    $str = self::clean($str, true, true, false, true);
1134
1135
    return (string)$str;
1136
  }
1137
1138
  /**
1139
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1140
   *
1141
   * INFO: opposite to UTF8::string()
1142
   *
1143
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1144
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1145
   *                                    default, code points will be returned as integers.</p>
1146 5
   *
1147
   * @return array <p>The array of code points.</p>
1148 5
   */
1149 5
  public static function codepoints($arg, $u_style = false)
1150 5
  {
1151
    if (is_string($arg)) {
1152 5
      $arg = self::split($arg);
1153
    }
1154 5
1155 5
    $arg = array_map(
1156 5
        array(
1157
            '\\voku\\helper\\UTF8',
1158 5
            'ord',
1159
        ),
1160 5
        $arg
1161 1
    );
1162
1163 1
    if ($u_style) {
1164 1
      $arg = array_map(
1165 1
          array(
1166
              '\\voku\\helper\\UTF8',
1167 1
              'int_to_hex',
1168 1
          ),
1169
          $arg
1170 5
      );
1171
    }
1172
1173
    return $arg;
1174
  }
1175
1176
  /**
1177
   * Returns count of characters used in a string.
1178
   *
1179
   * @param string $str       <p>The input string.</p>
1180
   * @param bool   $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
1181
   *
1182 6
   * @return array <p>An associative array of Character as keys and
1183
   *               their count as values.</p>
1184 6
   */
1185
  public static function count_chars($str, $cleanUtf8 = false)
1186
  {
1187
    return array_count_values(self::split($str, 1, $cleanUtf8));
1188
  }
1189
1190
  /**
1191
   * Get a UTF-8 character from its decimal code representation.
1192
   *
1193
   * @param int $code
1194 1
   *
1195
   * @return string
1196 1
   */
1197 1
  public static function decimal_to_chr($code)
1198 1
  {
1199
    return \mb_convert_encoding(
1200 1
        '&#x' . dechex($code) . ';',
1201
        'UTF-8',
1202
        'HTML-ENTITIES'
1203
    );
1204
  }
1205
1206
  /**
1207
   * Encode a string with a new charset-encoding.
1208
   *
1209
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1210
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1211
   *
1212
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1213
   * @param string $str      <p>The input string</p>
1214
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1215
   *                         /> otherwise we auto-detect the current string-encoding</p>
1216 11
   *
1217
   * @return string
1218 11
   */
1219 11
  public static function encode($encoding, $str, $force = true)
1220
  {
1221 11
    $str = (string)$str;
1222 5
    $encoding = (string)$encoding;
1223
1224
    if (!isset($str[0], $encoding[0])) {
1225 11
      return $str;
1226 1
    }
1227 1
1228
    if ($encoding !== 'UTF-8') {
1229 11
      $encoding = self::normalize_encoding($encoding);
1230
    }
1231
1232
    $encodingDetected = self::str_detect_encoding($str);
1233 11
1234
    if (
1235
        $encodingDetected
0 ignored issues
show
Bug Best Practice introduced by
The expression $encodingDetected of type false|string is loosely compared to true; this is ambiguous if the string can be empty. You might want to explicitly use !== false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
1236 11
        &&
1237
        (
1238 1
            $force === true
1239 11
            ||
1240
            $encodingDetected !== $encoding
1241
        )
1242
    ) {
1243 11
1244
      if (
1245
          $encoding === 'UTF-8'
1246 11
          &&
1247 1
          (
1248 1
              $force === true
1249 1
              || $encodingDetected === 'UTF-8'
1250 11
              || $encodingDetected === 'WINDOWS-1252'
1251 11
              || $encodingDetected === 'ISO-8859-1'
1252
          )
1253
      ) {
1254
        return self::to_utf8($str);
1255
      }
1256 2
1257
      if (
1258
          $encoding === 'ISO-8859-1'
1259 1
          &&
1260
          (
1261
              $force === true
1262 2
              || $encodingDetected === 'ISO-8859-1'
1263 1
              || $encodingDetected === 'UTF-8'
1264
          )
1265
      ) {
1266 2
        return self::to_iso8859($str);
1267 2
      }
1268 2
1269
      $strEncoded = \mb_convert_encoding(
1270 2
          $str,
1271
          $encoding,
1272 2
          $encodingDetected
1273 2
      );
1274
1275
      if ($strEncoded) {
1276
        return $strEncoded;
1277 1
      }
1278
    }
1279
1280
    return $str;
1281
  }
1282
1283
  /**
1284
   * Reads entire file into a string.
1285
   *
1286
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1287
   *
1288
   * @link http://php.net/manual/en/function.file-get-contents.php
1289
   *
1290
   * @param string        $filename      <p>
1291
   *                                     Name of the file to read.
1292
   *                                     </p>
1293
   * @param int|null      $flags         [optional] <p>
1294
   *                                     Prior to PHP 6, this parameter is called
1295
   *                                     use_include_path and is a bool.
1296
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1297
   *                                     to trigger include path
1298
   *                                     search.
1299
   *                                     </p>
1300
   *                                     <p>
1301
   *                                     The value of flags can be any combination of
1302
   *                                     the following flags (with some restrictions), joined with the
1303
   *                                     binary OR (|)
1304
   *                                     operator.
1305
   *                                     </p>
1306
   *                                     <p>
1307
   *                                     <table>
1308
   *                                     Available flags
1309
   *                                     <tr valign="top">
1310
   *                                     <td>Flag</td>
1311
   *                                     <td>Description</td>
1312
   *                                     </tr>
1313
   *                                     <tr valign="top">
1314
   *                                     <td>
1315
   *                                     FILE_USE_INCLUDE_PATH
1316
   *                                     </td>
1317
   *                                     <td>
1318
   *                                     Search for filename in the include directory.
1319
   *                                     See include_path for more
1320
   *                                     information.
1321
   *                                     </td>
1322
   *                                     </tr>
1323
   *                                     <tr valign="top">
1324
   *                                     <td>
1325
   *                                     FILE_TEXT
1326
   *                                     </td>
1327
   *                                     <td>
1328
   *                                     As of PHP 6, the default encoding of the read
1329
   *                                     data is UTF-8. You can specify a different encoding by creating a
1330
   *                                     custom context or by changing the default using
1331
   *                                     stream_default_encoding. This flag cannot be
1332
   *                                     used with FILE_BINARY.
1333
   *                                     </td>
1334
   *                                     </tr>
1335
   *                                     <tr valign="top">
1336
   *                                     <td>
1337
   *                                     FILE_BINARY
1338
   *                                     </td>
1339
   *                                     <td>
1340
   *                                     With this flag, the file is read in binary mode. This is the default
1341
   *                                     setting and cannot be used with FILE_TEXT.
1342
   *                                     </td>
1343
   *                                     </tr>
1344
   *                                     </table>
1345
   *                                     </p>
1346
   * @param resource|null $context       [optional] <p>
1347
   *                                     A valid context resource created with
1348
   *                                     stream_context_create. If you don't need to use a
1349
   *                                     custom context, you can skip this parameter by &null;.
1350
   *                                     </p>
1351
   * @param int|null      $offset        [optional] <p>
1352
   *                                     The offset where the reading starts.
1353
   *                                     </p>
1354
   * @param int|null      $maxlen        [optional] <p>
1355
   *                                     Maximum length of data read. The default is to read until end
1356
   *                                     of file is reached.
1357
   *                                     </p>
1358
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1359
   *
1360
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1361
   *                                     or pdf, because they used non default utf-8 chars</p>
1362 2
   *
1363
   * @return string <p>The function returns the read data or false on failure.</p>
1364
   */
1365 2
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxlen = null, $timeout = 10, $convertToUtf8 = true)
1366 2
  {
1367
    // init
1368 2
    $timeout = (int)$timeout;
1369 2
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1370
1371
    if ($timeout && $context === null) {
1372
      $context = stream_context_create(
1373 2
          array(
1374 2
              'http' =>
1375
                  array(
1376 2
                      'timeout' => $timeout,
1377 2
                  ),
1378
          )
1379 2
      );
1380 1
    }
1381 1
1382 2
    if (is_int($maxlen)) {
1383
      $data = file_get_contents($filename, $flags, $context, $offset, $maxlen);
1384
    } else {
1385
      $data = file_get_contents($filename, $flags, $context, $offset);
1386 2
    }
1387 1
1388
    // return false on error
1389
    if ($data === false) {
1390 1
      return false;
1391 1
    }
1392 1
1393 1
    if ($convertToUtf8 === true) {
1394
      $data = self::encode('UTF-8', $data, false);
1395 1
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1396
    }
1397
1398
    return $data;
1399
  }
1400
1401
  /**
1402
   * Checks if a file starts with BOM (Byte Order Mark) character.
1403
   *
1404
   * @param string $file_path <p>Path to a valid file.</p>
1405 1
   *
1406
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1407 1
   */
1408
  public static function file_has_bom($file_path)
1409
  {
1410
    return self::string_has_bom(file_get_contents($file_path));
1411
  }
1412
1413
  /**
1414
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1415
   *
1416
   * @param mixed  $var
1417
   * @param int    $normalization_form
1418
   * @param string $leading_combining
1419 9
   *
1420
   * @return mixed
1421 9
   */
1422 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1423 3
  {
1424
    switch (gettype($var)) {
1425 3 View Code Duplication
      case 'array':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1426 3
        foreach ($var as $k => $v) {
1427 3
          /** @noinspection AlterInForeachInspection */
1428 9
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1429 2
        }
1430 2
        break;
1431 2 View Code Duplication
      case 'object':
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1432 2
        foreach ($var as $k => $v) {
1433 9
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1434
        }
1435 8
        break;
1436
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1437 2
1438 2
        if (false !== strpos($var, "\r")) {
1439
          // Workaround https://bugs.php.net/65732
1440 8
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1441
        }
1442 8
1443 6
        if (self::is_ascii($var) === false) {
1444 6
1445 6
          /** @noinspection PhpUndefinedClassInspection */
1446
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1447 6
            $n = '-';
1448 3
          } else {
1449 3
            /** @noinspection PhpUndefinedClassInspection */
1450 5
            $n = \Normalizer::normalize($var, $normalization_form);
1451
1452
            if (isset($n[0])) {
1453
              $var = $n;
1454
            } else {
1455 8
              $var = self::encode('UTF-8', $var);
1456 8
            }
1457 5
          }
1458 8
1459
          if (
1460
              $var[0] >= "\x80" && isset($n[0], $leading_combining[0])
1461 2
              &&
1462 2
              preg_match('/^\p{Mn}/u', $var)
1463 8
          ) {
1464 8
            // Prevent leading combining chars
1465 9
            // for NFC-safe concatenations.
1466
            $var = $leading_combining . $var;
1467 9
          }
1468
        }
1469
        break;
1470
    }
1471
1472
    return $var;
1473
  }
1474
1475
  /**
1476
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1477
   *
1478
   * @param int    $type
1479
   * @param string $var
1480
   * @param int    $filter
1481
   * @param mixed  $option
1482
   *
1483
   * @return mixed
1484
   */
1485 View Code Duplication
  public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1486
  {
1487
    if (4 > func_num_args()) {
1488
      $var = filter_input($type, $var, $filter);
1489
    } else {
1490
      $var = filter_input($type, $var, $filter, $option);
1491
    }
1492
1493
    return self::filter($var);
1494
  }
1495
1496
  /**
1497
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1498
   *
1499
   * @param int   $type
1500
   * @param mixed $definition
1501
   * @param bool  $add_empty
1502
   *
1503
   * @return mixed
1504
   */
1505 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1506
  {
1507
    if (2 > func_num_args()) {
1508
      $a = filter_input_array($type);
1509
    } else {
1510
      $a = filter_input_array($type, $definition, $add_empty);
1511
    }
1512
1513
    return self::filter($a);
1514
  }
1515
1516
  /**
1517
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1518
   *
1519
   * @param mixed $var
1520 1
   * @param int   $filter
1521
   * @param mixed $option
1522 1
   *
1523 1
   * @return mixed
1524 1
   */
1525 1 View Code Duplication
  public static function filter_var($var, $filter = FILTER_DEFAULT, $option = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1526
  {
1527
    if (3 > func_num_args()) {
1528 1
      $var = filter_var($var, $filter);
1529
    } else {
1530
      $var = filter_var($var, $filter, $option);
1531
    }
1532
1533
    return self::filter($var);
1534
  }
1535
1536
  /**
1537
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1538
   *
1539
   * @param array $data
1540 1
   * @param mixed $definition
1541
   * @param bool  $add_empty
1542 1
   *
1543 1
   * @return mixed
1544 1
   */
1545 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1546
  {
1547
    if (2 > func_num_args()) {
1548 1
      $a = filter_var_array($data);
1549
    } else {
1550
      $a = filter_var_array($data, $definition, $add_empty);
1551
    }
1552
1553
    return self::filter($a);
1554
  }
1555
1556
  /**
1557
   * Check if the number of unicode characters are not more than the specified integer.
1558
   *
1559 1
   * @param string $str      The original string to be checked.
1560
   * @param int    $box_size The size in number of chars to be checked against string.
1561 1
   *
1562
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1563
   */
1564
  public static function fits_inside($str, $box_size)
1565
  {
1566
    return (self::strlen($str) <= $box_size);
1567
  }
1568
1569
  /**
1570
   * Try to fix simple broken UTF-8 strings.
1571
   *
1572
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1573
   *
1574
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1575
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1576
   * See: http://en.wikipedia.org/wiki/Windows-1252
1577 7
   *
1578
   * @param string $str <p>The input string</p>
1579 7
   *
1580 7
   * @return string
1581
   */
1582 7 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1583
  {
1584 7
    // init
1585 2
    $str = (string)$str;
1586
1587
    if (!isset($str[0])) {
1588 7
      return '';
1589 1
    }
1590 1
1591 1
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1592
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1593 7
1594
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1595
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$brokenUtf8ToUtf8);
1596
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$brokenUtf8ToUtf8);
1597
    }
1598
1599
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1600
  }
1601
1602
  /**
1603 1
   * Fix a double (or multiple) encoded UTF8 string.
1604
   *
1605 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1606
   *
1607 1
   * @return mixed
1608
   */
1609
  public static function fix_utf8($str)
1610 1
  {
1611 1
    if (is_array($str)) {
1612
1613 1
      /** @noinspection ForeachSourceInspection */
1614
      foreach ($str as $k => $v) {
1615
        /** @noinspection AlterInForeachInspection */
1616 1
        /** @noinspection OffsetOperationsInspection */
1617 1
        $str[$k] = self::fix_utf8($v);
1618 1
      }
1619 1
1620 1
      return $str;
1621
    }
1622 1
1623
    $last = '';
1624
    while ($last !== $str) {
1625
      $last = $str;
1626
      $str = self::to_utf8(self::utf8_decode($str));
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1626 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1627
    }
1628
1629
    return $str;
1630
  }
1631
1632 1
  /**
1633
   * Get character of a specific character.
1634 1
   *
1635
   * @param string $char
1636
   *
1637
   * @return string <p>'RTL' or 'LTR'</p>
1638 1
   */
1639
  public static function getCharDirection($char)
1640
  {
1641
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
1642
      self::checkForSupport();
1643
    }
1644
1645
    if (self::$support['intlChar'] === true) {
1646
      $tmpReturn = \IntlChar::charDirection($char);
1647
1648
      // from "IntlChar"-Class
1649
      $charDirection = array(
1650
          'RTL' => array(1, 13, 14, 15, 21),
1651
          'LTR' => array(0, 11, 12, 20),
1652
      );
1653
1654 1
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1655
        return 'LTR';
1656 1
      } elseif (in_array($tmpReturn, $charDirection['RTL'], true)) {
1657 1
        return 'RTL';
1658
      }
1659
    }
1660 1
1661
    $c = static::chr_to_decimal($char);
1662 1
1663 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1664 1
      return 'LTR';
1665 1
    }
1666 1
1667 1
    if (0x85e >= $c) {
1668 1
1669 1
      if (0x5be === $c ||
1670 1
          0x5c0 === $c ||
1671 1
          0x5c3 === $c ||
1672 1
          0x5c6 === $c ||
1673
          (0x5d0 <= $c && 0x5ea >= $c) ||
1674
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1675
          0x608 === $c ||
1676
          0x60b === $c ||
1677
          0x60d === $c ||
1678
          0x61b === $c ||
1679
          (0x61e <= $c && 0x64a >= $c) ||
1680
          (0x66d <= $c && 0x66f >= $c) ||
1681
          (0x671 <= $c && 0x6d5 >= $c) ||
1682
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1683
          (0x6ee <= $c && 0x6ef >= $c) ||
1684
          (0x6fa <= $c && 0x70d >= $c) ||
1685
          0x710 === $c ||
1686
          (0x712 <= $c && 0x72f >= $c) ||
1687
          (0x74d <= $c && 0x7a5 >= $c) ||
1688
          0x7b1 === $c ||
1689
          (0x7c0 <= $c && 0x7ea >= $c) ||
1690
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1691
          0x7fa === $c ||
1692 1
          (0x800 <= $c && 0x815 >= $c) ||
1693 1
          0x81a === $c ||
1694
          0x824 === $c ||
1695
          0x828 === $c ||
1696
          (0x830 <= $c && 0x83e >= $c) ||
1697
          (0x840 <= $c && 0x858 >= $c) ||
1698
          0x85e === $c
1699
      ) {
1700
        return 'RTL';
1701
      }
1702
1703
    } elseif (0x200f === $c) {
1704
1705
      return 'RTL';
1706
1707
    } elseif (0xfb1d <= $c) {
1708
1709
      if (0xfb1d === $c ||
1710
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1711
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1712
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1713
          0xfb3e === $c ||
1714
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1715
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1716
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1717
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1718
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1719
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1720
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1721
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1722
          (0xfe76 <= $c && 0xfefc >= $c) ||
1723
          (0x10800 <= $c && 0x10805 >= $c) ||
1724
          0x10808 === $c ||
1725
          (0x1080a <= $c && 0x10835 >= $c) ||
1726
          (0x10837 <= $c && 0x10838 >= $c) ||
1727
          0x1083c === $c ||
1728
          (0x1083f <= $c && 0x10855 >= $c) ||
1729
          (0x10857 <= $c && 0x1085f >= $c) ||
1730
          (0x10900 <= $c && 0x1091b >= $c) ||
1731
          (0x10920 <= $c && 0x10939 >= $c) ||
1732
          0x1093f === $c ||
1733
          0x10a00 === $c ||
1734
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1735
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1736
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1737
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1738
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1739
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1740
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1741
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1742
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1743
          (0x10b78 <= $c && 0x10b7f >= $c)
1744
      ) {
1745
        return 'RTL';
1746
      }
1747
    }
1748
1749
    return 'LTR';
1750
  }
1751
1752 1
  /**
1753
   * get data from "/data/*.ser"
1754 1
   *
1755 1
   * @param string $file
1756
   *
1757 1
   * @return bool|string|array|int <p>Will return false on error.</p>
1758
   */
1759
  private static function getData($file)
1760
  {
1761
    $file = __DIR__ . '/data/' . $file . '.php';
1762
    if (file_exists($file)) {
1763
      /** @noinspection PhpIncludeInspection */
1764
      return require $file;
1765
    } else {
1766
      return false;
1767
    }
1768
  }
1769
1770
  /**
1771
   * alias for "UTF8::string_has_bom()"
1772 1
   *
1773
   * @see UTF8::string_has_bom()
1774 1
   *
1775
   * @param string $str
1776
   *
1777
   * @return bool
1778
   */
1779
  public static function hasBom($str)
1780
  {
1781
    return self::string_has_bom($str);
1782
  }
1783
1784
  /**
1785
   * Converts hexadecimal U+xxxx code point representation to integer.
1786 1
   *
1787
   * INFO: opposite to UTF8::int_to_hex()
1788 1
   *
1789 1
   * @param string $str <p>The hexadecimal code point representation.</p>
1790
   *
1791
   * @return int|false <p>The code point, or false on failure.</p>
1792 1
   */
1793 1
  public static function hex_to_int($str)
1794
  {
1795
    if (!$str) {
1796 1
      return false;
1797
    }
1798
1799
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $str, $match)) {
1800
      return intval($match[1], 16);
1801
    }
1802
1803
    return false;
1804
  }
1805
1806
  /**
1807
   * alias for "UTF8::html_entity_decode()"
1808
   *
1809
   * @see UTF8::html_entity_decode()
1810 1
   *
1811
   * @param string $str
1812 1
   * @param int    $flags
1813
   * @param string $encoding
1814
   *
1815
   * @return string
1816
   */
1817
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
1818
  {
1819
    return self::html_entity_decode($str, $flags, $encoding);
1820
  }
1821
1822
  /**
1823
   * Converts a UTF-8 string to a series of HTML numbered entities.
1824
   *
1825
   * INFO: opposite to UTF8::html_decode()
1826 2
   *
1827
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
1828
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
1829 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
1830
   *
1831 2
   * @return string <p>HTML numbered entities.</p>
1832 2
   */
1833 1
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
1834 1
  {
1835
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
1836 2
    if (function_exists('mb_encode_numericentity')) {
1837 1
1838 1
      $startCode = 0x00;
1839
      if ($keepAsciiChars === true) {
1840 2
        $startCode = 0x80;
1841 2
      }
1842 2
1843
      if ($encoding !== 'UTF-8') {
1844 2
        $encoding = self::normalize_encoding($encoding);
1845
      }
1846
1847
      return mb_encode_numericentity(
1848
          $str,
1849
          array($startCode, 0xffff, 0, 0xffff,),
1850
          $encoding
1851
      );
1852
    }
1853
1854
    return implode(
1855
        array_map(
1856
            function ($data) use ($keepAsciiChars) {
1857
              return UTF8::single_chr_html_encode($data, $keepAsciiChars);
1858
            },
1859
            self::split($str)
1860
        )
1861
    );
1862
  }
1863
1864
  /**
1865
   * UTF-8 version of html_entity_decode()
1866
   *
1867
   * The reason we are not using html_entity_decode() by itself is because
1868
   * while it is not technically correct to leave out the semicolon
1869
   * at the end of an entity most browsers will still interpret the entity
1870
   * correctly. html_entity_decode() does not convert entities without
1871
   * semicolons, so we are left with our own little solution here. Bummer.
1872
   *
1873
   * Convert all HTML entities to their applicable characters
1874
   *
1875
   * INFO: opposite to UTF8::html_encode()
1876
   *
1877
   * @link http://php.net/manual/en/function.html-entity-decode.php
1878
   *
1879
   * @param string $str      <p>
1880
   *                         The input string.
1881
   *                         </p>
1882
   * @param int    $flags    [optional] <p>
1883
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
1884
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
1885
   *                         <table>
1886
   *                         Available <i>flags</i> constants
1887
   *                         <tr valign="top">
1888
   *                         <td>Constant Name</td>
1889
   *                         <td>Description</td>
1890
   *                         </tr>
1891
   *                         <tr valign="top">
1892
   *                         <td><b>ENT_COMPAT</b></td>
1893
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
1894
   *                         </tr>
1895
   *                         <tr valign="top">
1896
   *                         <td><b>ENT_QUOTES</b></td>
1897
   *                         <td>Will convert both double and single quotes.</td>
1898
   *                         </tr>
1899
   *                         <tr valign="top">
1900
   *                         <td><b>ENT_NOQUOTES</b></td>
1901
   *                         <td>Will leave both double and single quotes unconverted.</td>
1902
   *                         </tr>
1903
   *                         <tr valign="top">
1904
   *                         <td><b>ENT_HTML401</b></td>
1905
   *                         <td>
1906
   *                         Handle code as HTML 4.01.
1907
   *                         </td>
1908
   *                         </tr>
1909
   *                         <tr valign="top">
1910
   *                         <td><b>ENT_XML1</b></td>
1911
   *                         <td>
1912
   *                         Handle code as XML 1.
1913
   *                         </td>
1914
   *                         </tr>
1915
   *                         <tr valign="top">
1916
   *                         <td><b>ENT_XHTML</b></td>
1917
   *                         <td>
1918
   *                         Handle code as XHTML.
1919
   *                         </td>
1920
   *                         </tr>
1921
   *                         <tr valign="top">
1922
   *                         <td><b>ENT_HTML5</b></td>
1923
   *                         <td>
1924
   *                         Handle code as HTML 5.
1925
   *                         </td>
1926 9
   *                         </tr>
1927
   *                         </table>
1928 9
   *                         </p>
1929
   * @param string $encoding [optional] <p>Encoding to use.</p>
1930 9
   *
1931 6
   * @return string <p>The decoded string.</p>
1932
   */
1933
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
1934 9
  {
1935 7
    $str = (string)$str;
1936
1937
    if (!isset($str[0])) {
1938
      return '';
1939 9
    }
1940 9
1941
    if (!isset($str[3])) { // examples: &; || &x;
0 ignored issues
show
Unused Code Comprehensibility introduced by
46% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
1942 9
      return $str;
1943 9
    }
1944 9
1945 9
    if (
1946 9
        strpos($str, '&') === false
1947 6
        ||
1948
        (
1949
            strpos($str, '&#') === false
1950 9
            &&
1951 2
            strpos($str, ';') === false
1952 2
        )
1953
    ) {
1954 9
      return $str;
1955 4
    }
1956 4
1957 4
    if ($encoding !== 'UTF-8') {
1958
      $encoding = self::normalize_encoding($encoding);
1959
    }
1960 4
1961
    if ($flags === null) {
1962
      if (Bootup::is_php('5.4') === true) {
1963 9
        $flags = ENT_COMPAT | ENT_HTML5;
1964
      } else {
1965 9
        $flags = ENT_COMPAT;
1966 9
      }
1967
    }
1968 7
1969
    do {
1970 7
      $str_compare = $str;
1971 6
1972
      $str = preg_replace_callback(
1973 4
          "/&#\d{2,5};/",
1974
          function ($matches) use ($encoding) {
1975 9
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
1976
1977 9
            if ($returnTmp !== '"' && $returnTmp !== "'") {
1978
              return $returnTmp;
1979
            } else {
1980 9
              return $matches[0];
1981 9
            }
1982 9
          },
1983
          $str
1984 9
      );
1985
1986 9
      // decode numeric & UTF16 two byte entities
1987
      $str = html_entity_decode(
1988 9
          preg_replace('/(&#(?:x0*[0-9a-f]{2,5}(?![0-9a-f;])|(?:0*\d{2,4}(?![0-9;]))))/iS', '$1;', $str),
1989
          $flags,
1990
          $encoding
1991
      );
1992
1993
    } while ($str_compare !== $str);
1994
1995
    return $str;
1996
  }
1997
1998
  /**
1999
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2000
   *
2001
   * @link http://php.net/manual/en/function.htmlentities.php
2002
   *
2003
   * @param string $str           <p>
2004
   *                              The input string.
2005
   *                              </p>
2006
   * @param int    $flags         [optional] <p>
2007
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2008
   *                              invalid code unit sequences and the used document type. The default is
2009
   *                              ENT_COMPAT | ENT_HTML401.
2010
   *                              <table>
2011
   *                              Available <i>flags</i> constants
2012
   *                              <tr valign="top">
2013
   *                              <td>Constant Name</td>
2014
   *                              <td>Description</td>
2015
   *                              </tr>
2016
   *                              <tr valign="top">
2017
   *                              <td><b>ENT_COMPAT</b></td>
2018
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2019
   *                              </tr>
2020
   *                              <tr valign="top">
2021
   *                              <td><b>ENT_QUOTES</b></td>
2022
   *                              <td>Will convert both double and single quotes.</td>
2023
   *                              </tr>
2024
   *                              <tr valign="top">
2025
   *                              <td><b>ENT_NOQUOTES</b></td>
2026
   *                              <td>Will leave both double and single quotes unconverted.</td>
2027
   *                              </tr>
2028
   *                              <tr valign="top">
2029
   *                              <td><b>ENT_IGNORE</b></td>
2030
   *                              <td>
2031
   *                              Silently discard invalid code unit sequences instead of returning
2032
   *                              an empty string. Using this flag is discouraged as it
2033
   *                              may have security implications.
2034
   *                              </td>
2035
   *                              </tr>
2036
   *                              <tr valign="top">
2037
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2038
   *                              <td>
2039
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2040
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2041
   *                              </td>
2042
   *                              </tr>
2043
   *                              <tr valign="top">
2044
   *                              <td><b>ENT_DISALLOWED</b></td>
2045
   *                              <td>
2046
   *                              Replace invalid code points for the given document type with a
2047
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2048
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2049
   *                              instance, to ensure the well-formedness of XML documents with
2050
   *                              embedded external content.
2051
   *                              </td>
2052
   *                              </tr>
2053
   *                              <tr valign="top">
2054
   *                              <td><b>ENT_HTML401</b></td>
2055
   *                              <td>
2056
   *                              Handle code as HTML 4.01.
2057
   *                              </td>
2058
   *                              </tr>
2059
   *                              <tr valign="top">
2060
   *                              <td><b>ENT_XML1</b></td>
2061
   *                              <td>
2062
   *                              Handle code as XML 1.
2063
   *                              </td>
2064
   *                              </tr>
2065
   *                              <tr valign="top">
2066
   *                              <td><b>ENT_XHTML</b></td>
2067
   *                              <td>
2068
   *                              Handle code as XHTML.
2069
   *                              </td>
2070
   *                              </tr>
2071
   *                              <tr valign="top">
2072
   *                              <td><b>ENT_HTML5</b></td>
2073
   *                              <td>
2074
   *                              Handle code as HTML 5.
2075
   *                              </td>
2076
   *                              </tr>
2077
   *                              </table>
2078
   *                              </p>
2079
   * @param string $encoding      [optional] <p>
2080
   *                              Like <b>htmlspecialchars</b>,
2081
   *                              <b>htmlentities</b> takes an optional third argument
2082
   *                              <i>encoding</i> which defines encoding used in
2083
   *                              conversion.
2084
   *                              Although this argument is technically optional, you are highly
2085
   *                              encouraged to specify the correct value for your code.
2086
   *                              </p>
2087
   * @param bool   $double_encode [optional] <p>
2088
   *                              When <i>double_encode</i> is turned off PHP will not
2089
   *                              encode existing html entities. The default is to convert everything.
2090
   *                              </p>
2091
   *
2092
   *
2093
   * @return string the encoded string.
2094 2
   * </p>
2095
   * <p>
2096 2
   * If the input <i>string</i> contains an invalid code unit
2097 1
   * sequence within the given <i>encoding</i> an empty string
2098 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2099
   * <b>ENT_SUBSTITUTE</b> flags are set.
2100 2
   */
2101
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2102 2
  {
2103 1
    if ($encoding !== 'UTF-8') {
2104
      $encoding = self::normalize_encoding($encoding);
2105
    }
2106 2
2107 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2108 2
2109 2
    if ($encoding !== 'UTF-8') {
2110 2
      return $str;
2111 1
    }
2112
2113 1
    $byteLengths = self::chr_size_list($str);
2114 1
    $search = array();
2115 1
    $replacements = array();
2116 1
    foreach ($byteLengths as $counter => $byteLength) {
2117 1
      if ($byteLength >= 3) {
2118 2
        $char = self::access($str, $counter);
2119
2120 2
        if (!isset($replacements[$char])) {
2121
          $search[$char] = $char;
2122
          $replacements[$char] = self::html_encode($char);
0 ignored issues
show
Security Bug introduced by
It seems like $char defined by self::access($str, $counter) on line 2118 can also be of type false; however, voku\helper\UTF8::html_encode() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
2123
        }
2124
      }
2125
    }
2126
2127
    return str_replace($search, $replacements, $str);
2128
  }
2129
2130
  /**
2131
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2132
   *
2133
   * INFO: Take a look at "UTF8::htmlentities()"
2134
   *
2135
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2136
   *
2137
   * @param string $str           <p>
2138
   *                              The string being converted.
2139
   *                              </p>
2140
   * @param int    $flags         [optional] <p>
2141
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2142
   *                              invalid code unit sequences and the used document type. The default is
2143
   *                              ENT_COMPAT | ENT_HTML401.
2144
   *                              <table>
2145
   *                              Available <i>flags</i> constants
2146
   *                              <tr valign="top">
2147
   *                              <td>Constant Name</td>
2148
   *                              <td>Description</td>
2149
   *                              </tr>
2150
   *                              <tr valign="top">
2151
   *                              <td><b>ENT_COMPAT</b></td>
2152
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2153
   *                              </tr>
2154
   *                              <tr valign="top">
2155
   *                              <td><b>ENT_QUOTES</b></td>
2156
   *                              <td>Will convert both double and single quotes.</td>
2157
   *                              </tr>
2158
   *                              <tr valign="top">
2159
   *                              <td><b>ENT_NOQUOTES</b></td>
2160
   *                              <td>Will leave both double and single quotes unconverted.</td>
2161
   *                              </tr>
2162
   *                              <tr valign="top">
2163
   *                              <td><b>ENT_IGNORE</b></td>
2164
   *                              <td>
2165
   *                              Silently discard invalid code unit sequences instead of returning
2166
   *                              an empty string. Using this flag is discouraged as it
2167
   *                              may have security implications.
2168
   *                              </td>
2169
   *                              </tr>
2170
   *                              <tr valign="top">
2171
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2172
   *                              <td>
2173
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2174
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2175
   *                              </td>
2176
   *                              </tr>
2177
   *                              <tr valign="top">
2178
   *                              <td><b>ENT_DISALLOWED</b></td>
2179
   *                              <td>
2180
   *                              Replace invalid code points for the given document type with a
2181
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2182
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2183
   *                              instance, to ensure the well-formedness of XML documents with
2184
   *                              embedded external content.
2185
   *                              </td>
2186
   *                              </tr>
2187
   *                              <tr valign="top">
2188
   *                              <td><b>ENT_HTML401</b></td>
2189
   *                              <td>
2190
   *                              Handle code as HTML 4.01.
2191
   *                              </td>
2192
   *                              </tr>
2193
   *                              <tr valign="top">
2194
   *                              <td><b>ENT_XML1</b></td>
2195
   *                              <td>
2196
   *                              Handle code as XML 1.
2197
   *                              </td>
2198
   *                              </tr>
2199
   *                              <tr valign="top">
2200
   *                              <td><b>ENT_XHTML</b></td>
2201
   *                              <td>
2202
   *                              Handle code as XHTML.
2203
   *                              </td>
2204
   *                              </tr>
2205
   *                              <tr valign="top">
2206
   *                              <td><b>ENT_HTML5</b></td>
2207
   *                              <td>
2208
   *                              Handle code as HTML 5.
2209
   *                              </td>
2210
   *                              </tr>
2211
   *                              </table>
2212
   *                              </p>
2213
   * @param string $encoding      [optional] <p>
2214
   *                              Defines encoding used in conversion.
2215
   *                              </p>
2216
   *                              <p>
2217
   *                              For the purposes of this function, the encodings
2218
   *                              ISO-8859-1, ISO-8859-15,
2219
   *                              UTF-8, cp866,
2220
   *                              cp1251, cp1252, and
2221
   *                              KOI8-R are effectively equivalent, provided the
2222
   *                              <i>string</i> itself is valid for the encoding, as
2223
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2224
   *                              the same positions in all of these encodings.
2225
   *                              </p>
2226
   * @param bool   $double_encode [optional] <p>
2227
   *                              When <i>double_encode</i> is turned off PHP will not
2228
   *                              encode existing html entities, the default is to convert everything.
2229
   *                              </p>
2230
   *
2231
   * @return string The converted string.
2232 1
   * </p>
2233
   * <p>
2234 1
   * If the input <i>string</i> contains an invalid code unit
2235
   * sequence within the given <i>encoding</i> an empty string
2236
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2237
   * <b>ENT_SUBSTITUTE</b> flags are set.
2238 1
   */
2239
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2240
  {
2241
    if ($encoding !== 'UTF-8') {
2242
      $encoding = self::normalize_encoding($encoding);
2243
    }
2244
2245
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2246 1
  }
2247
2248 1
  /**
2249
   * Checks whether iconv is available on the server.
2250
   *
2251
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2252
   */
2253
  public static function iconv_loaded()
2254
  {
2255
    return extension_loaded('iconv') ? true : false;
2256
  }
2257
2258
  /**
2259
   * Converts Integer to hexadecimal U+xxxx code point representation.
2260
   *
2261 3
   * INFO: opposite to UTF8::hex_to_int()
2262
   *
2263 3
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2264 3
   * @param string $pfix [optional]
2265
   *
2266 3
   * @return string <p>The code point, or empty string on failure.</p>
2267
   */
2268 3
  public static function int_to_hex($int, $pfix = 'U+')
2269
  {
2270
    if (ctype_digit((string)$int)) {
2271
      $hex = dechex((int)$int);
2272
2273
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2274
2275
      return $pfix . $hex;
2276
    }
2277
2278
    return '';
2279 1
  }
2280
2281 1
  /**
2282
   * Checks whether intl-char is available on the server.
2283
   *
2284
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2285
   */
2286
  public static function intlChar_loaded()
2287
  {
2288
    return (Bootup::is_php('7.0') === true && class_exists('IntlChar') === true);
2289 2
  }
2290
2291 2
  /**
2292
   * Checks whether intl is available on the server.
2293
   *
2294
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2295
   */
2296
  public static function intl_loaded()
2297
  {
2298
    return extension_loaded('intl') ? true : false;
2299
  }
2300
2301
  /**
2302
   * alias for "UTF8::is_ascii()"
2303 2
   *
2304
   * @see UTF8::is_ascii()
2305 2
   *
2306
   * @param string $str
2307
   *
2308
   * @return boolean
2309
   */
2310
  public static function isAscii($str)
2311
  {
2312
    return self::is_ascii($str);
2313
  }
2314
2315
  /**
2316
   * alias for "UTF8::is_base64()"
2317 1
   *
2318
   * @see UTF8::is_base64()
2319 1
   *
2320
   * @param string $str
2321
   *
2322
   * @return bool
2323
   */
2324
  public static function isBase64($str)
2325
  {
2326
    return self::is_base64($str);
2327
  }
2328
2329
  /**
2330
   * alias for "UTF8::is_binary()"
2331
   *
2332
   * @see UTF8::is_binary()
2333
   *
2334
   * @param string $str
2335
   *
2336
   * @return bool
2337
   */
2338
  public static function isBinary($str)
2339
  {
2340
    return self::is_binary($str);
2341
  }
2342
2343
  /**
2344
   * alias for "UTF8::is_bom()"
2345
   *
2346
   * @see UTF8::is_bom()
2347
   *
2348
   * @param string $utf8_chr
2349
   *
2350
   * @return boolean
2351
   */
2352
  public static function isBom($utf8_chr)
2353
  {
2354
    return self::is_bom($utf8_chr);
2355
  }
2356
2357
  /**
2358
   * alias for "UTF8::is_html()"
2359 1
   *
2360
   * @see UTF8::is_html()
2361 1
   *
2362
   * @param string $str
2363
   *
2364
   * @return boolean
2365
   */
2366
  public static function isHtml($str)
2367
  {
2368
    return self::is_html($str);
2369
  }
2370
2371
  /**
2372
   * alias for "UTF8::is_json()"
2373
   *
2374
   * @see UTF8::is_json()
2375
   *
2376
   * @param string $str
2377
   *
2378
   * @return bool
2379
   */
2380
  public static function isJson($str)
2381
  {
2382
    return self::is_json($str);
2383
  }
2384
2385
  /**
2386
   * alias for "UTF8::is_utf16()"
2387 1
   *
2388
   * @see UTF8::is_utf16()
2389 1
   *
2390
   * @param string $str
2391
   *
2392
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2393
   */
2394
  public static function isUtf16($str)
2395
  {
2396
    return self::is_utf16($str);
2397
  }
2398
2399
  /**
2400
   * alias for "UTF8::is_utf32()"
2401 1
   *
2402
   * @see UTF8::is_utf32()
2403 1
   *
2404
   * @param string $str
2405
   *
2406
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2407
   */
2408
  public static function isUtf32($str)
2409
  {
2410
    return self::is_utf32($str);
2411
  }
2412
2413
  /**
2414
   * alias for "UTF8::is_utf8()"
2415
   *
2416 16
   * @see UTF8::is_utf8()
2417
   *
2418 16
   * @param string $str
2419
   * @param bool   $strict
2420
   *
2421
   * @return bool
2422
   */
2423
  public static function isUtf8($str, $strict = false)
2424
  {
2425
    return self::is_utf8($str, $strict);
2426
  }
2427
2428
  /**
2429
   * Checks if a string is 7 bit ASCII.
2430
   *
2431 28
   * @param string $str <p>The string to check.</p>
2432
   *
2433 28
   * @return bool <p>
2434
   *              <strong>true</strong> if it is ASCII<br />
2435 28
   *              <strong>false</strong> otherwise
2436 5
   *              </p>
2437
   */
2438
  public static function is_ascii($str)
2439 28
  {
2440
    $str = (string)$str;
2441
2442
    if (!isset($str[0])) {
2443
      return true;
2444
    }
2445
2446
    return (bool)!preg_match('/[\x80-\xFF]/', $str);
2447
  }
2448
2449 1
  /**
2450
   * Returns true if the string is base64 encoded, false otherwise.
2451 1
   *
2452
   * @param string $str <p>The input string.</p>
2453 1
   *
2454 1
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2455
   */
2456
  public static function is_base64($str)
2457 1
  {
2458 1
    $str = (string)$str;
2459
2460 1
    if (!isset($str[0])) {
2461
      return false;
2462
    }
2463
2464
    if (base64_encode(base64_decode($str, true)) === $str) {
2465
      return true;
2466
    } else {
2467
      return false;
2468
    }
2469
  }
2470
2471 16
  /**
2472
   * Check if the input is binary... (is look like a hack).
2473
   *
2474 16
   * @param mixed $input
2475
   *
2476
   * @return bool
2477 16
   */
2478
  public static function is_binary($input)
2479 16
  {
2480 16
2481 15
    $testLength = strlen($input);
2482 16
2483 6
    if (
2484
        preg_match('~^[01]+$~', $input)
2485 15
        ||
2486
        substr_count($input, "\x00") > 0
2487
        ||
2488
        ($testLength ? substr_count($input, '^ -~') / $testLength > 0.3 : 1 === 0)
2489
    ) {
2490
      return true;
2491
    } else {
2492
      return false;
2493
    }
2494
  }
2495
2496
  /**
2497
   * Check if the file is binary.
2498
   *
2499
   * @param string $file
2500
   *
2501
   * @return boolean
2502
   */
2503
  public static function is_binary_file($file)
2504
  {
2505
    try {
2506
      $fp = fopen($file, 'r');
2507
      $block = fread($fp, 512);
2508
      fclose($fp);
2509
    } catch (\Exception $e) {
2510
      $block = '';
2511
    }
2512
2513
    return self::is_binary($block);
2514
  }
2515
2516
  /**
2517
   * Checks if the given string is equal to any "Byte Order Mark".
2518
   *
2519
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2520
   *
2521
   * @param string $str <p>The input string.</p>
2522
   *
2523
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2524
   */
2525
  public static function is_bom($str)
2526
  {
2527
    foreach (self::$bom as $bomString => $bomByteLength) {
2528
      if ($str === $bomString) {
2529
        return true;
2530
      }
2531
    }
2532
2533
    return false;
2534
  }
2535
2536 1
  /**
2537
   * Check if the string contains any html-tags <lall>.
2538 1
   *
2539
   * @param string $str <p>The input string.</p>
2540 1
   *
2541
   * @return boolean
2542
   */
2543
  public static function is_html($str)
2544
  {
2545 1
    $str = (string)$str;
2546
2547 1
    if (!isset($str[0])) {
2548
      return false;
2549 1
    }
2550 1
2551
    // init
2552 1
    $matches = array();
2553
2554
    preg_match("/<\/?\w+((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>/", $str, $matches);
2555
2556
    if (count($matches) == 0) {
2557
      return false;
2558
    } else {
2559
      return true;
2560
    }
2561
  }
2562
2563 1
  /**
2564
   * Try to check if "$str" is an json-string.
2565 1
   *
2566
   * @param string $str <p>The input string.</p>
2567 1
   *
2568
   * @return bool
2569
   */
2570
  public static function is_json($str)
2571
  {
2572 1
    $str = (string)$str;
2573 1
2574 1
    if (!isset($str[0])) {
2575 1
      return false;
2576 1
    }
2577
2578 1
    if (
2579
        is_object(self::json_decode($str))
2580
        &&
2581
        json_last_error() === JSON_ERROR_NONE
2582
    ) {
2583
      return true;
2584
    } else {
2585
      return false;
2586
    }
2587
  }
2588
2589
  /**
2590
   * Check if the string is UTF-16.
2591
   *
2592
   * @param string $str <p>The input string.</p>
2593 4
   *
2594
   * @return int|false <p>
2595 4
   *                   <strong>false</strong> if is't not UTF-16,<br />
2596
   *                   <strong>1</strong> for UTF-16LE,<br />
2597 4
   *                   <strong>2</strong> for UTF-16BE.
2598
   *                   </p>
2599 4
   */
2600 4 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2601 4
  {
2602 4
    $str = self::remove_bom($str);
2603 4
2604 4
    if (self::is_binary($str)) {
2605 4
2606 4
      $maybeUTF16LE = 0;
2607 4
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2608 2
      if ($test) {
2609 2
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2610 4
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2611 4
        if ($test3 === $test) {
2612 4
          $strChars = self::count_chars($str, true);
2613
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2614 4
            if (in_array($test3char, $strChars, true) === true) {
2615 4
              $maybeUTF16LE++;
2616 4
            }
2617 4
          }
2618 4
        }
2619 4
      }
2620 4
2621 4
      $maybeUTF16BE = 0;
2622 4
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2623 3
      if ($test) {
2624 3
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2625 4
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2626 4
        if ($test3 === $test) {
2627 4
          $strChars = self::count_chars($str, true);
2628
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2629 4
            if (in_array($test3char, $strChars, true) === true) {
2630 3
              $maybeUTF16BE++;
2631 2
            }
2632
          }
2633 3
        }
2634
      }
2635
2636
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2637 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
2638
          return 1;
2639 3
        } else {
2640
          return 2;
2641
        }
2642
      }
2643
2644
    }
2645
2646
    return false;
2647
  }
2648
2649
  /**
2650
   * Check if the string is UTF-32.
2651
   *
2652
   * @param string $str
2653 3
   *
2654
   * @return int|false <p>
2655 3
   *                   <strong>false</strong> if is't not UTF-16,<br />
2656
   *                   <strong>1</strong> for UTF-32LE,<br />
2657 3
   *                   <strong>2</strong> for UTF-32BE.
2658
   *                   </p>
2659 3
   */
2660 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2661 3
  {
2662 3
    $str = self::remove_bom($str);
2663 3
2664 3
    if (self::is_binary($str)) {
2665 3
2666 3
      $maybeUTF32LE = 0;
2667 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
2668 1
      if ($test) {
2669 1
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
2670 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
2671 3
        if ($test3 === $test) {
2672 3
          $strChars = self::count_chars($str, true);
2673
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2674 3
            if (in_array($test3char, $strChars, true) === true) {
2675 3
              $maybeUTF32LE++;
2676 3
            }
2677 3
          }
2678 3
        }
2679 3
      }
2680 3
2681 3
      $maybeUTF32BE = 0;
2682 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
2683 1
      if ($test) {
2684 1
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
2685 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
2686 3
        if ($test3 === $test) {
2687 3
          $strChars = self::count_chars($str, true);
2688
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2689 3
            if (in_array($test3char, $strChars, true) === true) {
2690 1
              $maybeUTF32BE++;
2691 1
            }
2692
          }
2693 1
        }
2694
      }
2695
2696
      if ($maybeUTF32BE !== $maybeUTF32LE) {
2697 3
        if ($maybeUTF32LE > $maybeUTF32BE) {
2698
          return 1;
2699 3
        } else {
2700
          return 2;
2701
        }
2702
      }
2703
2704
    }
2705
2706
    return false;
2707
  }
2708
2709
  /**
2710
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
2711
   *
2712 43
   * @see    http://hsivonen.iki.fi/php-utf8/
2713
   *
2714 43
   * @param string $str    <p>The string to be checked.</p>
2715
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
2716 43
   *
2717 3
   * @return bool
2718
   */
2719
  public static function is_utf8($str, $strict = false)
2720 41
  {
2721 1
    $str = (string)$str;
2722 1
2723
    if (!isset($str[0])) {
2724
      return true;
2725
    }
2726
2727
    if ($strict === true) {
2728
      if (self::is_utf16($str) !== false) {
2729
        return false;
2730 41
      }
2731
2732
      if (self::is_utf32($str) !== false) {
2733
        return false;
2734
      }
2735
    }
2736
2737
    if (self::pcre_utf8_support() !== true) {
2738
2739
      // If even just the first character can be matched, when the /u
2740 41
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
2741
      // invalid, nothing at all will match, even if the string contains
2742 41
      // some valid sequences
2743 41
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
2744 41
2745
    } else {
2746
2747 41
      $mState = 0; // cached expected number of octets after the current octet
2748 41
      // until the beginning of the next UTF8 character sequence
2749 41
      $mUcs4 = 0; // cached Unicode character
2750
      $mBytes = 1; // cached expected number of octets in the current sequence
2751
      $len = strlen($str);
2752 41
2753
      /** @noinspection ForeachInvariantsInspection */
2754 36
      for ($i = 0; $i < $len; $i++) {
2755 41
        $in = ord($str[$i]);
2756
        if ($mState === 0) {
2757 34
          // When mState is zero we expect either a US-ASCII character or a
2758 34
          // multi-octet sequence.
2759 34
          if (0 === (0x80 & $in)) {
2760 34
            // US-ASCII, pass straight through.
2761 39
            $mBytes = 1;
2762 View Code Duplication
          } elseif (0xC0 === (0xE0 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2763 21
            // First octet of 2 octet sequence.
2764 21
            $mUcs4 = $in;
2765 21
            $mUcs4 = ($mUcs4 & 0x1F) << 6;
2766 21
            $mState = 1;
2767 33
            $mBytes = 2;
2768
          } elseif (0xE0 === (0xF0 & $in)) {
2769 9
            // First octet of 3 octet sequence.
2770 9
            $mUcs4 = $in;
2771 9
            $mUcs4 = ($mUcs4 & 0x0F) << 12;
2772 9
            $mState = 2;
2773 16
            $mBytes = 3;
2774 View Code Duplication
          } elseif (0xF0 === (0xF8 & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2775
            // First octet of 4 octet sequence.
2776
            $mUcs4 = $in;
2777
            $mUcs4 = ($mUcs4 & 0x07) << 18;
2778
            $mState = 3;
2779
            $mBytes = 4;
2780
          } elseif (0xF8 === (0xFC & $in)) {
2781
            /* First octet of 5 octet sequence.
2782 3
            *
2783 3
            * This is illegal because the encoded codepoint must be either
2784 3
            * (a) not the shortest form or
2785 3
            * (b) outside the Unicode range of 0-0x10FFFF.
2786 9
            * Rather than trying to resynchronize, we will carry on until the end
2787
            * of the sequence and let the later error handling code catch it.
2788 3
            */
2789 3
            $mUcs4 = $in;
2790 3
            $mUcs4 = ($mUcs4 & 0x03) << 24;
2791 3
            $mState = 4;
2792 3
            $mBytes = 5;
2793 View Code Duplication
          } elseif (0xFC === (0xFE & $in)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2794
            // First octet of 6 octet sequence, see comments for 5 octet sequence.
2795
            $mUcs4 = $in;
2796 5
            $mUcs4 = ($mUcs4 & 1) << 30;
2797
            $mState = 5;
2798 41
            $mBytes = 6;
2799
          } else {
2800
            /* Current octet is neither in the US-ASCII range nor a legal first
2801 36
             * octet of a multi-octet sequence.
2802
             */
2803 33
            return false;
2804 33
          }
2805 33
        } else {
2806 33
          // When mState is non-zero, we expect a continuation of the multi-octet
2807
          // sequence
2808
          if (0x80 === (0xC0 & $in)) {
2809
            // Legal continuation.
2810
            $shift = ($mState - 1) * 6;
2811 33
            $tmp = $in;
2812
            $tmp = ($tmp & 0x0000003F) << $shift;
2813
            $mUcs4 |= $tmp;
2814
            /**
2815
             * End of the multi-octet sequence. mUcs4 now contains the final
2816
             * Unicode code point to be output
2817 33
             */
2818 33
            if (0 === --$mState) {
2819 33
              /*
2820 33
              * Check for illegal sequences and code points.
2821
              */
2822 33
              // From Unicode 3.1, non-shortest form is illegal
2823
              if (
2824 33
                  (2 === $mBytes && $mUcs4 < 0x0080) ||
2825 33
                  (3 === $mBytes && $mUcs4 < 0x0800) ||
2826 5
                  (4 === $mBytes && $mUcs4 < 0x10000) ||
2827
                  (4 < $mBytes) ||
2828
                  // From Unicode 3.2, surrogate characters are illegal.
2829 33
                  (($mUcs4 & 0xFFFFF800) === 0xD800) ||
2830 33
                  // Code points outside the Unicode range are illegal.
2831 33
                  ($mUcs4 > 0x10FFFF)
2832 33
              ) {
2833 33
                return false;
2834
              }
2835
              // initialize UTF8 cache
2836
              $mState = 0;
2837
              $mUcs4 = 0;
2838 18
              $mBytes = 1;
2839
            }
2840
          } else {
2841 41
            /**
2842
             *((0xC0 & (*in) != 0x80) && (mState != 0))
2843 20
             * Incomplete multi-octet sequence.
2844
             */
2845
            return false;
2846
          }
2847
        }
2848
      }
2849
2850
      return true;
2851
    }
2852
  }
2853
2854
  /**
2855
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2856
   * Decodes a JSON string
2857
   *
2858
   * @link http://php.net/manual/en/function.json-decode.php
2859
   *
2860
   * @param string $json    <p>
2861
   *                        The <i>json</i> string being decoded.
2862
   *                        </p>
2863
   *                        <p>
2864
   *                        This function only works with UTF-8 encoded strings.
2865
   *                        </p>
2866
   *                        <p>PHP implements a superset of
2867
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2868
   *                        only supports these values when they are nested inside an array or an object.
2869
   *                        </p>
2870
   * @param bool   $assoc   [optional] <p>
2871
   *                        When <b>TRUE</b>, returned objects will be converted into
2872
   *                        associative arrays.
2873
   *                        </p>
2874
   * @param int    $depth   [optional] <p>
2875
   *                        User specified recursion depth.
2876
   *                        </p>
2877
   * @param int    $options [optional] <p>
2878
   *                        Bitmask of JSON decode options. Currently only
2879
   *                        <b>JSON_BIGINT_AS_STRING</b>
2880
   *                        is supported (default is to cast large integers as floats)
2881
   *                        </p>
2882
   *
2883 2
   * @return mixed the value encoded in <i>json</i> in appropriate
2884
   * PHP type. Values true, false and
2885 2
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
2886
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
2887 2
   * <i>json</i> cannot be decoded or if the encoded
2888 2
   * data is deeper than the recursion limit.
2889 2
   */
2890
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
2891
  {
2892
    $json = self::filter($json);
2893 2
2894
    if (Bootup::is_php('5.4') === true) {
2895
      $json = json_decode($json, $assoc, $depth, $options);
2896
    } else {
2897
      $json = json_decode($json, $assoc, $depth);
2898
    }
2899
2900
    return $json;
2901
  }
2902
2903
  /**
2904
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
2905
   * Returns the JSON representation of a value.
2906
   *
2907
   * @link http://php.net/manual/en/function.json-encode.php
2908
   *
2909
   * @param mixed $value   <p>
2910
   *                       The <i>value</i> being encoded. Can be any type except
2911
   *                       a resource.
2912
   *                       </p>
2913
   *                       <p>
2914
   *                       All string data must be UTF-8 encoded.
2915
   *                       </p>
2916
   *                       <p>PHP implements a superset of
2917
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
2918
   *                       only supports these values when they are nested inside an array or an object.
2919
   *                       </p>
2920
   * @param int   $options [optional] <p>
2921
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
2922
   *                       <b>JSON_HEX_TAG</b>,
2923
   *                       <b>JSON_HEX_AMP</b>,
2924
   *                       <b>JSON_HEX_APOS</b>,
2925
   *                       <b>JSON_NUMERIC_CHECK</b>,
2926
   *                       <b>JSON_PRETTY_PRINT</b>,
2927
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
2928
   *                       <b>JSON_FORCE_OBJECT</b>,
2929
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
2930
   *                       constants is described on
2931
   *                       the JSON constants page.
2932 2
   *                       </p>
2933
   * @param int   $depth   [optional] <p>
2934 2
   *                       Set the maximum depth. Must be greater than zero.
2935
   *                       </p>
2936 2
   *
2937
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
2938
   */
2939 2
  public static function json_encode($value, $options = 0, $depth = 512)
2940
  {
2941
    $value = self::filter($value);
2942 2
2943
    if (Bootup::is_php('5.5')) {
2944
      $json = json_encode($value, $options, $depth);
2945
    } else {
2946
      $json = json_encode($value, $options);
2947
    }
2948
2949
    return $json;
2950
  }
2951
2952 6
  /**
2953
   * Makes string's first char lowercase.
2954 6
   *
2955
   * @param string $str <p>The input string</p>
2956
   *
2957
   * @return string <p>The resulting string</p>
2958
   */
2959
  public static function lcfirst($str)
2960
  {
2961
    return self::strtolower(self::substr($str, 0, 1)) . self::substr($str, 1);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtolower() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
2962
  }
2963
2964
  /**
2965 24
   * Strip whitespace or other characters from beginning of a UTF-8 string.
2966
   *
2967 24
   * @param string $str   <p>The string to be trimmed</p>
2968
   * @param string $chars <p>Optional characters to be stripped</p>
2969 24
   *
2970 2
   * @return string <p>The string with unwanted characters stripped from the left.</p>
2971
   */
2972 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2973
  {
2974 23
    $str = (string)$str;
2975 2
2976
    if (!isset($str[0])) {
2977
      return '';
2978 23
    }
2979
2980 23
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
2981
    if ($chars === INF || !$chars) {
2982
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
2983
    }
2984
2985
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
2986
2987
    return preg_replace("/^{$chars}+/u", '', $str);
2988
  }
2989
2990 1
  /**
2991
   * Returns the UTF-8 character with the maximum code point in the given data.
2992 1
   *
2993
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
2994
   *
2995
   * @return string <p>The character with the highest code point than others.</p>
2996 1
   */
2997 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2998
  {
2999
    if (is_array($arg)) {
3000
      $arg = implode($arg);
3001
    }
3002
3003
    return self::chr(max(self::codepoints($arg)));
3004
  }
3005
3006
  /**
3007 1
   * Calculates and returns the maximum number of bytes taken by any
3008
   * UTF-8 encoded character in the given string.
3009 1
   *
3010 1
   * @param string $str <p>The original Unicode string.</p>
3011 1
   *
3012
   * @return int <p>Max byte lengths of the given chars.</p>
3013 1
   */
3014
  public static function max_chr_width($str)
3015
  {
3016
    $bytes = self::chr_size_list($str);
3017
    if (count($bytes) > 0) {
3018
      return (int)max($bytes);
3019
    } else {
3020
      return 0;
3021
    }
3022 2
  }
3023
3024 2
  /**
3025
   * Checks whether mbstring is available on the server.
3026 2
   *
3027 2
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3028 2
   */
3029
  public static function mbstring_loaded()
3030 2
  {
3031
    $return = extension_loaded('mbstring');
3032
3033
    if ($return === true) {
3034
      \mb_internal_encoding('UTF-8');
3035
    }
3036
3037
    return $return;
3038
  }
3039
3040 1
  /**
3041
   * Returns the UTF-8 character with the minimum code point in the given data.
3042 1
   *
3043
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3044
   *
3045
   * @return string <p>The character with the lowest code point than others.</p>
3046 1
   */
3047 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3048
  {
3049
    if (is_array($arg)) {
3050
      $arg = implode($arg);
3051
    }
3052
3053
    return self::chr(min(self::codepoints($arg)));
3054
  }
3055
3056
  /**
3057
   * alias for "UTF8::normalize_encoding()"
3058 1
   *
3059
   * @see UTF8::normalize_encoding()
3060 1
   *
3061
   * @param string $encoding
3062
   *
3063
   * @return string
3064
   */
3065
  public static function normalizeEncoding($encoding)
3066
  {
3067
    return self::normalize_encoding($encoding);
3068
  }
3069
3070 16
  /**
3071
   * Normalize the encoding-"name" input.
3072 16
   *
3073
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3074 16
   *
3075 2
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3076
   */
3077
  public static function normalize_encoding($encoding)
3078 16
  {
3079 1
    static $staticNormalizeEncodingCache = array();
3080
3081
    if (!$encoding) {
3082 16
      return false;
0 ignored issues
show
Bug Best Practice introduced by
The return type of return false; (false) is incompatible with the return type documented by voku\helper\UTF8::normalize_encoding of type string.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
3083 4
    }
3084
3085
    if ('UTF-8' === $encoding) {
3086 15
      return $encoding;
3087 14
    }
3088
3089
    if (in_array($encoding, self::$iconvEncoding, true)) {
3090 4
      return $encoding;
3091 4
    }
3092 4
3093
    if (isset($staticNormalizeEncodingCache[$encoding])) {
3094
      return $staticNormalizeEncodingCache[$encoding];
3095 4
    }
3096 4
3097 4
    $encodingOrig = $encoding;
3098 4
    $encoding = strtoupper($encoding);
3099 4
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3100 4
3101 4
    $equivalences = array(
3102 4
        'ISO88591'    => 'ISO-8859-1',
3103 4
        'ISO8859'     => 'ISO-8859-1',
3104 4
        'ISO'         => 'ISO-8859-1',
3105 4
        'LATIN1'      => 'ISO-8859-1',
3106 4
        'LATIN'       => 'ISO-8859-1',
3107 4
        'WIN1252'     => 'ISO-8859-1',
3108 4
        'WINDOWS1252' => 'ISO-8859-1',
3109 4
        'UTF16'       => 'UTF-16',
3110
        'UTF32'       => 'UTF-32',
3111 4
        'UTF8'        => 'UTF-8',
3112 4
        'UTF'         => 'UTF-8',
3113 4
        'UTF7'        => 'UTF-7',
3114
        '8BIT'        => 'CP850',
3115 4
        'BINARY'      => 'CP850',
3116
    );
3117 4
3118
    if (!empty($equivalences[$encodingUpperHelper])) {
3119
      $encoding = $equivalences[$encodingUpperHelper];
3120
    }
3121
3122
    $staticNormalizeEncodingCache[$encodingOrig] = $encoding;
3123
3124
    return $encoding;
3125
  }
3126
3127 13
  /**
3128
   * Normalize some MS Word special characters.
3129 13
   *
3130 13
   * @param string $str <p>The string to be normalized.</p>
3131
   *
3132 13
   * @return string
3133 1
   */
3134 1 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3135 1
  {
3136
    // init
3137 13
    $str = (string)$str;
3138
3139
    if (!isset($str[0])) {
3140
      return '';
3141
    }
3142
3143
    static $UTF8_MSWORD_KEYS_CACHE = null;
3144
    static $UTF8_MSWORD_VALUES_CACHE = null;
3145
3146
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3147
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$utf8MSWord);
3148
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$utf8MSWord);
3149
    }
3150 18
3151
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3152 18
  }
3153 18
3154
  /**
3155 18
   * Normalize the whitespace.
3156
   *
3157 18
   * @param string $str                     <p>The string to be normalized.</p>
3158
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3159 2
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3160
   *                                        bidirectional text chars.</p>
3161 2
   *
3162
   * @return string
3163 1
   */
3164 1
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3165
  {
3166 2
    // init
3167 2
    $str = (string)$str;
3168
3169 18
    if (!isset($str[0])) {
3170 18
      return '';
3171 1
    }
3172 1
3173
    static $WHITESPACE_CACHE = array();
3174 18
    $cacheKey = (int)$keepNonBreakingSpace;
3175 18
3176
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3177 18
3178
      $WHITESPACE_CACHE[$cacheKey] = self::$whitespaceTable;
3179
3180
      if ($keepNonBreakingSpace === true) {
3181
        /** @noinspection OffsetOperationsInspection */
3182
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3183
      }
3184
3185
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3186
    }
3187
3188
    if ($keepBidiUnicodeControls === false) {
3189
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3190
3191
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3192
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$bidiUniCodeControlsTable);
3193
      }
3194
3195
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3196
    }
3197
3198
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3199
  }
3200
3201
  /**
3202
   * Format a number with grouped thousands.
3203
   *
3204
   * @param float  $number
3205
   * @param int    $decimals
3206
   * @param string $dec_point
3207
   * @param string $thousands_sep
3208
   *
3209
   * @return string
3210
   *    *
3211
   * @deprecated Because this has nothing to do with UTF8. :/
3212
   */
3213
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3214
  {
3215
    $thousands_sep = (string)$thousands_sep;
3216
    $dec_point = (string)$dec_point;
3217
3218
    if (
3219
        isset($thousands_sep[1], $dec_point[1])
3220
        &&
3221
        Bootup::is_php('5.4') === true
3222
    ) {
3223
      return str_replace(
3224
          array(
3225
              '.',
3226
              ',',
3227
          ),
3228
          array(
3229
              $dec_point,
3230 17
              $thousands_sep,
3231
          ),
3232 17
          number_format($number, $decimals, '.', ',')
3233 3
      );
3234
    }
3235
3236 16
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3237
  }
3238
3239
  /**
3240 16
   * Calculates Unicode code point of the given UTF-8 encoded character.
3241
   *
3242
   * INFO: opposite to UTF8::chr()
3243
   *
3244
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3245
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3246
   *
3247
   * @return int <p>
3248 16
   *             Unicode code point of the given character,<br />
3249 16
   *             0 on invalid UTF-8 byte sequence.
3250 15
   *             </p>
3251
   */
3252
  public static function ord($chr, $encoding = 'UTF-8')
3253 9
  {
3254 9
    if (!$chr && $chr !== '0') {
3255 9
      return 0;
3256
    }
3257 9
3258 1
    if ($encoding !== 'UTF-8') {
3259
      $encoding = self::normalize_encoding($encoding);
3260
      $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3261 9
    }
3262 4
3263
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3264
      self::checkForSupport();
3265 9
    }
3266 5
3267
    if (self::$support['intlChar'] === true) {
3268
      $tmpReturn = \IntlChar::ord($chr);
3269 9
      if ($tmpReturn) {
3270
        return $tmpReturn;
3271
      }
3272
    }
3273
3274
    // use static cache, if there is no support for "IntlChar"
3275
    static $cache = array();
3276
    if (isset($cache[$chr]) === true) {
3277
      return $cache[$chr];
3278
    }
3279
3280
    $chr_orig = $chr;
3281
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3282
    $chr = unpack('C*', substr($chr, 0, 4));
3283
    $code = $chr ? $chr[1] : 0;
3284
3285 1
    if (0xF0 <= $code && isset($chr[4])) {
3286
      return $cache[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3287
    }
3288 1
3289
    if (0xE0 <= $code && isset($chr[3])) {
3290 1
      return $cache[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3291 1
    }
3292 1
3293
    if (0xC0 <= $code && isset($chr[2])) {
3294
      return $cache[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3295 1
    }
3296
3297
    return $cache[$chr_orig] = $code;
3298
  }
3299
3300
  /**
3301
   * Parses the string into an array (into the the second parameter).
3302
   *
3303 41
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3304
   *          if the second parameter is not set!
3305
   *
3306 41
   * @link http://php.net/manual/en/function.parse-str.php
3307
   *
3308
   * @param string $str    <p>The input string.</p>
3309
   * @param array  $result <p>The result will be returned into this reference parameter.</p>
3310
   *
3311
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3312
   */
3313
  public static function parse_str($str, &$result)
3314
  {
3315
    // clean broken utf8
3316
    $str = self::clean($str);
3317 1
3318
    $return = \mb_parse_str($str, $result);
3319 1
    if ($return === false || empty($result)) {
3320 1
      return false;
3321
    }
3322
3323 1
    return true;
3324 1
  }
3325 1
3326
  /**
3327
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3328 1
   *
3329
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3330
   */
3331 1
  public static function pcre_utf8_support()
3332
  {
3333
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3334
    return (bool)@preg_match('//u', '');
3335 1
  }
3336 1
3337 1
  /**
3338
   * Create an array containing a range of UTF-8 characters.
3339
   *
3340 1
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3341
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3342
   *
3343 1
   * @return array
3344
   */
3345
  public static function range($var1, $var2)
3346
  {
3347 1
    if (!$var1 || !$var2) {
3348
      return array();
3349 1
    }
3350 1
3351 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3352 1
      $start = (int)$var1;
3353 1
    } elseif (ctype_xdigit($var1)) {
3354
      $start = (int)self::hex_to_int($var1);
3355
    } else {
3356
      $start = self::ord($var1);
3357
    }
3358
3359
    if (!$start) {
3360
      return array();
3361
    }
3362
3363 View Code Duplication
    if (ctype_digit((string)$var2)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3364
      $end = (int)$var2;
3365 5
    } elseif (ctype_xdigit($var2)) {
3366
      $end = (int)self::hex_to_int($var2);
3367 5
    } else {
3368
      $end = self::ord($var2);
3369
    }
3370
3371
    if (!$end) {
3372
      return array();
3373
    }
3374
3375
    return array_map(
3376
        array(
3377 10
            '\\voku\\helper\\UTF8',
3378
            'chr',
3379 10
        ),
3380 10
        range($start, $end)
3381 5
    );
3382 5
  }
3383 10
3384
  /**
3385 10
   * alias for "UTF8::remove_bom()"
3386
   *
3387
   * @see UTF8::remove_bom()
3388
   *
3389
   * @param string $str
3390
   *
3391
   * @return string
3392
   */
3393
  public static function removeBOM($str)
3394
  {
3395
    return self::remove_bom($str);
3396 1
  }
3397
3398 1
  /**
3399 1
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3400 1
   *
3401
   * @param string $str <p>The input string.</p>
3402 1
   *
3403 1
   * @return string <p>String without UTF-BOM</p>
3404 1
   */
3405 1
  public static function remove_bom($str)
3406 1
  {
3407
    foreach (self::$bom as $bomString => $bomByteLength) {
3408 1
      if (0 === strpos($str, $bomString)) {
3409
        $str = substr($str, $bomByteLength);
3410
      }
3411
    }
3412
3413
    return $str;
3414
  }
3415
3416
  /**
3417
   * Removes duplicate occurrences of a string in another string.
3418
   *
3419
   * @param string          $str  <p>The base string.</p>
3420
   * @param string|string[] $what <p>String to search for in the base string.</p>
3421
   *
3422
   * @return string <p>The result string with removed duplicates.</p>
3423
   */
3424 45
  public static function remove_duplicates($str, $what = ' ')
3425
  {
3426
    if (is_string($what)) {
3427 45
      $what = array($what);
3428
    }
3429
3430
    if (is_array($what)) {
3431 45
      /** @noinspection ForeachSourceInspection */
3432 45
      foreach ($what as $item) {
3433 45
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3434 45
      }
3435
    }
3436 45
3437
    return $str;
3438
  }
3439 45
3440 45
  /**
3441
   * Remove invisible characters from a string.
3442 45
   *
3443
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3444
   *
3445
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3446
   *
3447
   * @param string $str
3448
   * @param bool   $url_encoded
3449
   * @param string $replacement
3450
   *
3451
   * @return string
3452
   */
3453 45
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3454
  {
3455 45
    // init
3456
    $non_displayables = array();
3457 45
3458 45
    // every control character except newline (dec 10),
3459 45
    // carriage return (dec 13) and horizontal tab (dec 09)
0 ignored issues
show
Unused Code Comprehensibility introduced by
37% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3460
    if ($url_encoded) {
3461 45
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
0 ignored issues
show
Unused Code Comprehensibility introduced by
50% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3462 45
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3463 45
    }
3464
3465 45
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
3466
3467
    do {
3468
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
3469
    } while ($count !== 0);
3470
3471
    return $str;
3472
  }
3473
3474
  /**
3475
   * Replace the diamond question mark (�) with the replacement.
3476 23
   *
3477
   * @param string $str
3478 23
   * @param string $unknown
3479
   *
3480 23
   * @return string
3481 5
   */
3482
  public static function replace_diamond_question_mark($str, $unknown = '?')
3483
  {
3484
    return str_replace(
3485 19
        array(
3486 3
            "\xEF\xBF\xBD",
3487
            '�',
3488
        ),
3489 18
        array(
3490
            $unknown,
3491 18
            $unknown,
3492
        ),
3493
        $str
3494
    );
3495
  }
3496
3497
  /**
3498
   * Strip whitespace or other characters from end of a UTF-8 string.
3499
   *
3500
   * @param string $str   <p>The string to be trimmed.</p>
3501
   * @param string $chars <p>Optional characters to be stripped.</p>
3502 52
   *
3503
   * @return string <p>The string with unwanted characters stripped from the right.</p>
3504 52
   */
3505 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3506 52
  {
3507
    $str = (string)$str;
3508 52
3509 40
    if (!isset($str[0])) {
3510
      return '';
3511
    }
3512 18
3513
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3514
    if ($chars === INF || !$chars) {
3515 18
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
3516 17
    }
3517
3518 17
    $chars = INF === $chars ? '\s' : self::rxClass($chars);
3519 17
3520 17
    return preg_replace("/{$chars}+$/u", '', $str);
3521 2
  }
3522 2
3523
  /**
3524
   * rxClass
3525 18
   *
3526
   * @param string $s
3527 18
   * @param string $class
3528 18
   *
3529 18
   * @return string
3530
   */
3531 18
  private static function rxClass($s, $class = '')
3532 18
  {
3533 18
    static $rxClassCache = array();
3534
3535
    $cacheKey = $s . $class;
3536
3537 18
    if (isset($rxClassCache[$cacheKey])) {
3538
      return $rxClassCache[$cacheKey];
3539 18
    }
3540
3541
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3542
    $class = array($class);
3543
3544
    /** @noinspection SuspiciousLoopInspection */
3545
    foreach (self::str_split($s) as $s) {
3546
      if ('-' === $s) {
3547
        $class[0] = '-' . $class[0];
3548
      } elseif (!isset($s[2])) {
3549
        $class[0] .= preg_quote($s, '/');
3550
      } elseif (1 === self::strlen($s)) {
3551
        $class[0] .= $s;
3552
      } else {
3553
        $class[] = $s;
3554
      }
3555
    }
3556
3557
    if ($class[0]) {
3558
      $class[0] = '[' . $class[0] . ']';
3559
    }
3560 1
3561
    if (1 === count($class)) {
3562 1
      $return = $class[0];
3563 1
    } else {
3564
      $return = '(?:' . implode('|', $class) . ')';
3565
    }
3566
3567
    $rxClassCache[$cacheKey] = $return;
3568 1
3569 1
    return $return;
3570 1
  }
3571 1
3572
  /**
3573
   * WARNING: Echo native UTF8-Support libs, e.g. for debugging.
3574 1
   */
3575
  public static function showSupport()
3576
  {
3577
    foreach (self::$support as $utf8Support) {
3578
      echo $utf8Support . "\n<br>";
3579
    }
3580
  }
3581
3582
  /**
3583
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
3584
   *
3585
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
3586 36
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
3587
   *
3588 36
   * @return string <p>The HTML numbered entity.</p>
3589
   */
3590 36
  public static function single_chr_html_encode($char, $keepAsciiChars = false)
3591 2
  {
3592
    if (!$char) {
3593
      return '';
3594
    }
3595 36
3596 36
    if (
3597
        $keepAsciiChars === true
3598 36
        &&
3599
        self::isAscii($char) === true
3600
    ) {
3601
      return $char;
3602 36
    }
3603
3604 36
    return '&#' . self::ord($char) . ';';
3605 6
  }
3606 6
3607
  /**
3608 36
   * Convert a string to an array of Unicode characters.
3609 36
   *
3610 36
   * @param string  $str       <p>The string to split into array.</p>
3611 36
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
3612 36
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
3613
   *
3614 36
   * @return string[] <p>An array containing chunks of the string.</p>
3615
   */
3616
  public static function split($str, $length = 1, $cleanUtf8 = false)
3617
  {
3618
    $str = (string)$str;
3619
3620
    if (!isset($str[0])) {
3621
      return array();
3622
    }
3623
3624
    // init
3625
    $str = (string)$str;
3626
    $ret = array();
3627
3628
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
3629
      self::checkForSupport();
3630
    }
3631
3632
    if (self::$support['pcre_utf8'] === true) {
3633
3634
      if ($cleanUtf8 === true) {
3635
        $str = self::clean($str);
3636
      }
3637
3638
      preg_match_all('/./us', $str, $retArray);
3639
      if (isset($retArray[0])) {
3640
        $ret = $retArray[0];
3641
      }
3642
      unset($retArray);
3643
3644
    } else {
3645
3646 36
      // fallback
3647 5
3648
      $len = strlen($str);
3649 5
3650 5
      /** @noinspection ForeachInvariantsInspection */
3651
      for ($i = 0; $i < $len; $i++) {
3652
        if (($str[$i] & "\x80") === "\x00") {
3653 36
          $ret[] = $str[$i];
3654
        } elseif ((($str[$i] & "\xE0") === "\xC0") && isset($str[$i + 1])) {
3655
          if (($str[$i + 1] & "\xC0") === "\x80") {
3656
            $ret[] = $str[$i] . $str[$i + 1];
3657 36
3658
            $i++;
3659
          }
3660 View Code Duplication
        } elseif ((($str[$i] & "\xF0") === "\xE0") && isset($str[$i + 2])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3661
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80")) {
3662
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
3663
3664
            $i += 2;
3665
          }
3666
        } elseif ((($str[$i] & "\xF8") === "\xF0") && isset($str[$i + 3])) {
3667 View Code Duplication
          if ((($str[$i + 1] & "\xC0") === "\x80") && (($str[$i + 2] & "\xC0") === "\x80") && (($str[$i + 3] & "\xC0") === "\x80")) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3668
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
3669
3670 12
            $i += 3;
3671
          }
3672
        }
3673
      }
3674
    }
3675
3676 12
    if ($length > 1) {
3677 2
      $ret = array_chunk($ret, $length);
3678 1
3679 2
      $ret = array_map('implode', $ret);
3680 1
    }
3681 2
3682
    /** @noinspection OffsetOperationsInspection */
3683 2
    if (isset($ret[0]) && $ret[0] === '') {
3684
      return array();
3685
    }
3686 2
3687
    return $ret;
3688
  }
3689
3690
  /**
3691
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
3692 12
   *
3693 3
   * @param string $str <p>The input string.</p>
3694
   *
3695
   * @return false|string <p>
3696
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
3697
   *                      otherwise it will return false.
3698
   *                      </p>
3699
   */
3700 12
  public static function str_detect_encoding($str)
3701 9
  {
3702
    //
3703
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
3704
    //
3705
3706
    if (self::is_binary($str)) {
3707
      if (self::is_utf16($str) === 1) {
3708
        return 'UTF-16LE';
3709
      } elseif (self::is_utf16($str) === 2) {
3710 6
        return 'UTF-16BE';
3711 6
      } elseif (self::is_utf32($str) === 1) {
3712 6
        return 'UTF-32LE';
3713 6
      } elseif (self::is_utf32($str) === 2) {
3714 6
        return 'UTF-32BE';
3715 6
      }
3716 6
    }
3717 6
3718 6
    //
3719 6
    // 2.) simple check for ASCII chars
3720 6
    //
3721 6
3722 6
    if (self::is_ascii($str) === true) {
3723 6
      return 'ASCII';
3724 6
    }
3725 6
3726 6
    //
3727 6
    // 3.) simple check for UTF-8 chars
3728 6
    //
3729 6
3730 6
    if (self::is_utf8($str) === true) {
3731
      return 'UTF-8';
3732 6
    }
3733 6
3734 6
    //
3735
    // 4.) check via "\mb_detect_encoding()"
3736
    //
3737
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
3738
3739
    $detectOrder = array(
3740
        'ISO-8859-1',
3741
        'ISO-8859-2',
3742
        'ISO-8859-3',
3743
        'ISO-8859-4',
3744
        'ISO-8859-5',
3745
        'ISO-8859-6',
3746
        'ISO-8859-7',
3747
        'ISO-8859-8',
3748
        'ISO-8859-9',
3749
        'ISO-8859-10',
3750
        'ISO-8859-13',
3751
        'ISO-8859-14',
3752
        'ISO-8859-15',
3753
        'ISO-8859-16',
3754
        'WINDOWS-1251',
3755
        'WINDOWS-1252',
3756
        'WINDOWS-1254',
3757
        'ISO-2022-JP',
3758
        'JIS',
3759
        'EUC-JP',
3760
    );
3761
3762
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
3763
    if ($encoding) {
3764
      return $encoding;
3765
    }
3766
3767
    //
3768
    // 5.) check via "iconv()"
3769
    //
3770
3771
    $md5 = md5($str);
3772
    foreach (self::$iconvEncoding as $encodingTmp) {
3773
      # INFO: //IGNORE and //TRANSLIT still throw notice
3774
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
3775
      if (md5(@iconv($encodingTmp, $encodingTmp, $str)) === $md5) {
3776
        return $encodingTmp;
3777
      }
3778 14
    }
3779
3780 14
    return false;
3781
  }
3782
3783 14
  /**
3784 14
   * Check if the string ends with the given substring.
3785 1
   *
3786 1
   * @param string $haystack <p>The string to search in.</p>
3787 13
   * @param string $needle   <p>The substring to search for.</p>
3788
   *
3789 14
   * @return bool
3790
   */
3791 14 View Code Duplication
  public static function str_ends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3792 14
  {
3793
    $haystack = (string)$haystack;
3794 14
    $needle = (string)$needle;
3795
3796
    if (!isset($haystack[0], $needle[0])) {
3797
      return false;
3798
    }
3799
3800
    if ($needle === self::substr($haystack, -self::strlen($needle))) {
3801
      return true;
3802
    }
3803
3804
    return false;
3805
  }
3806 1
3807
  /**
3808 1
   * Check if the string ends with the given substring, case insensitive.
3809
   *
3810 1
   * @param string $haystack <p>The string to search in.</p>
3811
   * @param string $needle   <p>The substring to search for.</p>
3812
   *
3813
   * @return bool
3814 1
   */
3815 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3816 1
  {
3817
    $haystack = (string)$haystack;
3818
    $needle = (string)$needle;
3819
3820 1
    if (!isset($haystack[0], $needle[0])) {
3821 1
      return false;
3822
    }
3823
3824 1
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3825 1
      return true;
3826 1
    }
3827 1
3828
    return false;
3829 1
  }
3830
3831
  /**
3832 1
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
3833
   *
3834
   * @link  http://php.net/manual/en/function.str-ireplace.php
3835 1
   *
3836
   * @param mixed $search  <p>
3837
   *                       Every replacement with search array is
3838
   *                       performed on the result of previous replacement.
3839
   *                       </p>
3840
   * @param mixed $replace <p>
3841
   *                       </p>
3842
   * @param mixed $subject <p>
3843
   *                       If subject is an array, then the search and
3844
   *                       replace is performed with every entry of
3845
   *                       subject, and the return value is an array as
3846
   *                       well.
3847
   *                       </p>
3848
   * @param int   $count   [optional] <p>
3849
   *                       The number of matched and replaced needles will
3850
   *                       be returned in count which is passed by
3851 2
   *                       reference.
3852
   *                       </p>
3853 2
   *
3854
   * @return mixed <p>A string or an array of replacements.</p>
3855
   */
3856 2
  public static function str_ireplace($search, $replace, $subject, &$count = null)
3857 2
  {
3858
    $search = (array)$search;
3859 2
3860
    /** @noinspection AlterInForeachInspection */
3861 2
    foreach ($search as &$s) {
3862 2
      if ('' === $s .= '') {
3863
        $s = '/^(?<=.)$/';
3864 2
      } else {
3865
        $s = '/' . preg_quote($s, '/') . '/ui';
3866
      }
3867 2
    }
3868 2
3869 2
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
3870 2
    $count = $replace; // used as reference parameter
3871 2
3872
    return $subject;
3873 2
  }
3874 2
3875 2
  /**
3876 2
   * Check if the string starts with the given substring, case insensitive.
3877 2
   *
3878 2
   * @param string $haystack <p>The string to search in.</p>
3879
   * @param string $needle   <p>The substring to search for.</p>
3880 2
   *
3881 2
   * @return bool
3882 2
   */
3883 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3884 2
  {
3885 2
    $haystack = (string)$haystack;
3886
    $needle = (string)$needle;
3887 2
3888
    if (!isset($haystack[0], $needle[0])) {
3889
      return false;
3890 2
    }
3891
3892
    if (self::stripos($haystack, $needle) === 0) {
3893
      return true;
3894
    }
3895
3896
    return false;
3897
  }
3898
3899
  /**
3900
   * Limit the number of characters in a string, but also after the next word.
3901
   *
3902
   * @param string $str
3903
   * @param int    $length
3904
   * @param string $strAddOn
3905
   *
3906
   * @return string
3907
   */
3908
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
3909
  {
3910
    $str = (string)$str;
3911 1
3912
    if (!isset($str[0])) {
3913 1
      return '';
3914
    }
3915 1
3916
    $length = (int)$length;
3917
3918
    if (self::strlen($str) <= $length) {
3919
      return $str;
3920
    }
3921
3922
    if (self::substr($str, $length - 1, 1) === ' ') {
3923
      return self::substr($str, 0, $length - 1) . $strAddOn;
3924
    }
3925
3926
    $str = self::substr($str, 0, $length);
3927
    $array = explode(' ', $str);
3928
    array_pop($array);
3929
    $new_str = implode(' ', $array);
3930
3931
    if ($new_str === '') {
3932
      $str = self::substr($str, 0, $length - 1) . $strAddOn;
0 ignored issues
show
Security Bug introduced by
It seems like $str can also be of type false; however, voku\helper\UTF8::substr() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
3933
    } else {
3934
      $str = $new_str . $strAddOn;
3935
    }
3936
3937
    return $str;
3938
  }
3939
3940
  /**
3941
   * Pad a UTF-8 string to given length with another string.
3942
   *
3943
   * @param string $str        <p>The input string.</p>
3944
   * @param int    $pad_length <p>The length of return string.</p>
3945
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
3946
   * @param int    $pad_type   [optional] <p>
3947 12
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
3948
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
3949 12
   *                           </p>
3950
   *
3951
   * @return string <strong>Returns the padded string</strong>
3952
   */
3953
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
3954
  {
3955
    $str_length = self::strlen($str);
3956
3957
    if (
3958
        is_int($pad_length) === true
3959 1
        &&
3960
        $pad_length > 0
3961 1
        &&
3962
        $pad_length >= $str_length
3963 1
    ) {
3964
      $ps_length = self::strlen($pad_string);
3965 1
3966
      $diff = $pad_length - $str_length;
3967
3968
      switch ($pad_type) {
3969 View Code Duplication
        case STR_PAD_LEFT:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3970
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3971
          $pre = self::substr($pre, 0, $diff);
3972
          $post = '';
3973
          break;
3974
3975
        case STR_PAD_BOTH:
3976
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3977 1
          $pre = self::substr($pre, 0, (int)$diff / 2);
3978
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
3979 1
          $post = self::substr($post, 0, (int)ceil($diff / 2));
3980
          break;
3981 1
3982 1
        case STR_PAD_RIGHT:
3983 1 View Code Duplication
        default:
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3984
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
3985 1
          $post = self::substr($post, 0, $diff);
3986 1
          $pre = '';
3987 1
      }
3988 1
3989
      return $pre . $str . $post;
3990
    }
3991 1
3992
    return $str;
3993
  }
3994
3995
  /**
3996
   * Repeat a string.
3997
   *
3998
   * @param string $str        <p>
3999
   *                           The string to be repeated.
4000
   *                           </p>
4001
   * @param int    $multiplier <p>
4002 21
   *                           Number of time the input string should be
4003
   *                           repeated.
4004
   *                           </p>
4005 21
   *                           <p>
4006 21
   *                           multiplier has to be greater than or equal to 0.
4007
   *                           If the multiplier is set to 0, the function
4008 21
   *                           will return an empty string.
4009 1
   *                           </p>
4010
   *
4011
   * @return string <p>The repeated string.</p>
4012 20
   */
4013
  public static function str_repeat($str, $multiplier)
4014
  {
4015
    $str = self::filter($str);
4016 20
4017 20
    return str_repeat($str, $multiplier);
4018
  }
4019 20
4020 20
  /**
4021
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4022
   *
4023 1
   * Replace all occurrences of the search string with the replacement string
4024 1
   *
4025
   * @link http://php.net/manual/en/function.str-replace.php
4026
   *
4027 1
   * @param mixed $search  <p>
4028 1
   *                       The value being searched for, otherwise known as the needle.
4029 1
   *                       An array may be used to designate multiple needles.
4030 1
   *                       </p>
4031 1
   * @param mixed $replace <p>
4032
   *                       The replacement value that replaces found search
4033 1
   *                       values. An array may be used to designate multiple replacements.
4034
   *                       </p>
4035 1
   * @param mixed $subject <p>
4036
   *                       The string or array being searched and replaced on,
4037
   *                       otherwise known as the haystack.
4038
   *                       </p>
4039
   *                       <p>
4040
   *                       If subject is an array, then the search and
4041
   *                       replace is performed with every entry of
4042
   *                       subject, and the return value is an array as
4043
   *                       well.
4044
   *                       </p>
4045 1
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4046
   *
4047 1
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4048
   */
4049 1
  public static function str_replace($search, $replace, $subject, &$count = null)
4050
  {
4051 1
    return str_replace($search, $replace, $subject, $count);
4052
  }
4053
4054
  /**
4055
   * Shuffles all the characters in the string.
4056
   *
4057
   * @param string $str <p>The input string</p>
4058
   *
4059
   * @return string <p>The shuffled string.</p>
4060
   */
4061
  public static function str_shuffle($str)
4062
  {
4063
    $array = self::split($str);
4064
4065 7
    shuffle($array);
4066
4067 7
    return implode('', $array);
4068
  }
4069
4070
  /**
4071
   * Sort all characters according to code points.
4072
   *
4073
   * @param string $str    <p>A UTF-8 string.</p>
4074
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4075
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4076
   *
4077
   * @return string <p>String of sorted characters.</p>
4078
   */
4079
  public static function str_sort($str, $unique = false, $desc = false)
4080
  {
4081
    $array = self::codepoints($str);
4082
4083 1
    if ($unique) {
4084
      $array = array_flip(array_flip($array));
4085 1
    }
4086 1
4087
    if ($desc) {
4088 1
      arsort($array);
4089
    } else {
4090 1
      asort($array);
4091
    }
4092 1
4093 1
    return self::string($array);
4094 1
  }
4095 1
4096
  /**
4097 1
   * Split a string into an array.
4098
   *
4099 1
   * @param string $str
4100 1
   * @param int    $len
4101 1
   *
4102 1
   * @return array
4103 1
   */
4104 1
  public static function str_split($str, $len = 1)
4105
  {
4106 1
    // init
4107
    $len = (int)$len;
4108 1
    $str = (string)$str;
4109
4110
    if (!isset($str[0])) {
4111
      return array();
4112 1
    }
4113
4114
    if ($len < 1) {
4115
      return str_split($str, $len);
4116
    }
4117
4118
    /** @noinspection PhpInternalEntityUsedInspection */
4119
    preg_match_all('/' . Grapheme::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4120
    $a = $a[0];
4121
4122
    if ($len === 1) {
4123
      return $a;
4124
    }
4125
4126
    $arrayOutput = array();
4127
    $p = -1;
4128
4129 9
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4130
    foreach ($a as $l => $a) {
4131 9
      if ($l % $len) {
4132
        $arrayOutput[$p] .= $a;
4133
      } else {
4134
        $arrayOutput[++$p] = $a;
4135
      }
4136
    }
4137
4138
    return $arrayOutput;
4139
  }
4140
4141
  /**
4142
   * Check if the string starts with the given substring.
4143
   *
4144
   * @param string $haystack <p>The string to search in.</p>
4145
   * @param string $needle   <p>The substring to search for.</p>
4146
   *
4147 1
   * @return bool
4148
   */
4149 1 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4150
  {
4151
    $haystack = (string)$haystack;
4152
    $needle = (string)$needle;
4153
4154
    if (!isset($haystack[0], $needle[0])) {
4155
      return false;
4156
    }
4157
4158
    if (self::strpos($haystack, $needle) === 0) {
4159
      return true;
4160
    }
4161
4162
    return false;
4163
  }
4164 12
4165
  /**
4166 12
   * Get a binary representation of a specific string.
4167 11
   *
4168 11
   * @param string $str <p>The input string.</p>
4169 12
   *
4170
   * @return string
4171
   */
4172
  public static function str_to_binary($str)
4173
  {
4174
    $str = (string)$str;
4175
4176
    $value = unpack('H*', $str);
4177
4178
    return base_convert($value[1], 16, 2);
4179
  }
4180
4181
  /**
4182 9
   * alias for "UTF8::to_ascii()"
4183
   *
4184 9
   * @see UTF8::to_ascii()
4185 1
   *
4186
   * @param string $str
4187
   * @param string $unknown
4188 8
   * @param bool   $strict
4189 2
   *
4190 2
   * @return string
4191
   */
4192 8
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4193 8
  {
4194 1
    return self::to_ascii($str, $unknown, $strict);
4195
  }
4196
4197 7
  /**
4198
   * Counts number of words in the UTF-8 string.
4199 7
   *
4200
   * @param string $str      <p>The input string.</p>
4201
   * @param int    $format   [optional] <p>
4202 1
   *                         <strong>0</strong> => return a number of words (default)<br />
4203
   *                         <strong>1</strong> => return an array of words<br />
4204
   *                         <strong>2</strong> => return an array of words with word-offset as key
4205
   *                         </p>
4206
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4207
   *
4208
   * @return array|int <p>The number of words in the string</p>
4209
   */
4210
  public static function str_word_count($str, $format = 0, $charlist = '')
4211
  {
4212
    $charlist = self::rxClass($charlist, '\pL');
4213
    $strParts = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4214
4215
    $len = count($strParts);
4216
4217
    if ($format === 1) {
4218 1
4219
      $numberOfWords = array();
4220 1
      for ($i = 1; $i < $len; $i += 2) {
4221
        $numberOfWords[] = $strParts[$i];
4222
      }
4223
4224
    } elseif ($format === 2) {
4225
4226
      $numberOfWords = array();
4227
      $offset = self::strlen($strParts[0]);
4228
      for ($i = 1; $i < $len; $i += 2) {
4229
        $numberOfWords[$offset] = $strParts[$i];
4230
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4231
      }
4232 2
4233
    } else {
4234 2
4235 2
      $numberOfWords = ($len - 1) / 2;
4236
4237 2
    }
4238 2
4239 2
    return $numberOfWords;
4240
  }
4241 2
4242 2
  /**
4243
   * Case-insensitive string comparison.
4244
   *
4245
   * INFO: Case-insensitive version of UTF8::strcmp()
4246
   *
4247
   * @param string $str1
4248
   * @param string $str2
4249
   *
4250
   * @return int <p>
4251
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4252 3
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4253
   *             <strong>0</strong> if they are equal.
4254 3
   *             </p>
4255 3
   */
4256 3
  public static function strcasecmp($str1, $str2)
4257
  {
4258 3
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4259
  }
4260 3
4261
  /**
4262
   * alias for "UTF8::strstr()"
4263
   *
4264
   * @see UTF8::strstr()
4265
   *
4266
   * @param string  $haystack
4267
   * @param string  $needle
4268
   * @param bool    $before_needle
4269
   * @param string  $encoding
4270
   * @param boolean $cleanUtf8
4271
   *
4272
   * @return string|false
4273
   */
4274
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4275
  {
4276
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4277
  }
4278
4279
  /**
4280
   * Case-sensitive string comparison.
4281
   *
4282 2
   * @param string $str1
4283
   * @param string $str2
4284
   *
4285 2
   * @return int  <p>
4286
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4287 2
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4288
   *              <strong>0</strong> if they are equal.
4289
   *              </p>
4290
   */
4291
  public static function strcmp($str1, $str2)
4292
  {
4293
    /** @noinspection PhpUndefinedClassInspection */
4294
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
4295
        \Normalizer::normalize($str1, \Normalizer::NFD),
4296
        \Normalizer::normalize($str2, \Normalizer::NFD)
4297
    );
4298
  }
4299
4300
  /**
4301
   * Find length of initial segment not matching mask.
4302
   *
4303
   * @param string $str
4304
   * @param string $charList
4305
   * @param int    $offset
4306
   * @param int    $length
4307
   *
4308
   * @return int|null
4309
   */
4310
  public static function strcspn($str, $charList, $offset = 0, $length = 2147483647)
4311
  {
4312
    if ('' === $charList .= '') {
4313
      return null;
4314 8
    }
4315
4316 8
    if ($offset || 2147483647 !== $length) {
4317 8
      $str = (string)self::substr($str, $offset, $length);
4318
    }
4319 8
4320 3
    $str = (string)$str;
4321
    if (!isset($str[0])) {
4322
      return null;
4323 7
    }
4324 1
4325 1
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
4326 1
      /** @noinspection OffsetOperationsInspection */
4327
      return self::strlen($length[1]);
4328
    }
4329
4330 7
    return self::strlen($str);
4331 1
  }
4332 7
4333 7
  /**
4334 7
   * alias for "UTF8::stristr()"
4335
   *
4336
   * @see UTF8::stristr()
4337
   *
4338 7
   * @param string  $haystack
4339
   * @param string  $needle
4340
   * @param bool    $before_needle
4341
   * @param string  $encoding
4342
   * @param boolean $cleanUtf8
4343
   *
4344
   * @return string|false
4345
   */
4346
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4347
  {
4348
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4349
  }
4350
4351
  /**
4352
   * Create a UTF-8 string from code points.
4353
   *
4354
   * INFO: opposite to UTF8::codepoints()
4355 8
   *
4356
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
4357 8
   *
4358 2
   * @return string <p>UTF-8 encoded string.</p>
4359
   */
4360
  public static function string(array $array)
4361 6
  {
4362
    return implode(
4363
        array_map(
4364
            array(
4365 6
                '\\voku\\helper\\UTF8',
4366
                'chr',
4367
            ),
4368
            $array
4369
        )
4370
    );
4371
  }
4372 6
4373
  /**
4374
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
4375
   *
4376
   * @param string $str <p>The input string.</p>
4377
   *
4378
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
4379
   */
4380
  public static function string_has_bom($str)
4381
  {
4382
    foreach (self::$bom as $bomString => $bomByteLength) {
4383
      if (0 === strpos($str, $bomString)) {
4384
        return true;
4385
      }
4386
    }
4387 62
4388
    return false;
4389 62
  }
4390
4391 62
  /**
4392 4
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
4393
   *
4394
   * @link http://php.net/manual/en/function.strip-tags.php
4395
   *
4396
   * @param string $str            <p>
4397 61
   *                               The input string.
4398 2
   *                               </p>
4399 61
   * @param string $allowable_tags [optional] <p>
4400 60
   *                               You can use the optional second parameter to specify tags which should
4401 60
   *                               not be stripped.
4402 2
   *                               </p>
4403
   *                               <p>
4404
   *                               HTML comments and PHP tags are also stripped. This is hardcoded and
4405
   *                               can not be changed with allowable_tags.
4406 61
   *                               </p>
4407 61
   *
4408 1
   * @return string <p>The stripped string.</p>
4409
   */
4410
  public static function strip_tags($str, $allowable_tags = null)
4411 61
  {
4412 2
    // clean broken utf8
4413 2
    $str = self::clean($str);
4414
4415 61
    return strip_tags($str, $allowable_tags);
4416
  }
4417
4418
  /**
4419
   * Finds position of first occurrence of a string within another, case insensitive.
4420
   *
4421
   * @link http://php.net/manual/en/function.mb-stripos.php
4422
   *
4423
   * @param string  $haystack  <p>
4424
   *                           The string from which to get the position of the first occurrence
4425
   *                           of needle
4426
   *                           </p>
4427
   * @param string  $needle    <p>
4428
   *                           The string to find in haystack
4429
   *                           </p>
4430 1
   * @param int     $offset    [optional] <p>
4431
   *                           The position in haystack
4432 1
   *                           to start searching
4433
   *                           </p>
4434
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4435
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4436
   *
4437
   * @return int|false <p>
4438
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
4439
   *                   or false if needle is not found.
4440
   *                   </p>
4441
   */
4442
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4443
  {
4444
    $haystack = (string)$haystack;
4445
    $needle = (string)$needle;
4446
4447
    if (!isset($haystack[0], $needle[0])) {
4448
      return false;
4449 2
    }
4450
4451 2
    if ($cleanUtf8 === true) {
4452
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4453
      // if invalid characters are found in $haystack before $needle
4454
      $haystack = self::clean($haystack);
4455
      $needle = self::clean($needle);
4456
    }
4457
4458 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4459
        $encoding === 'UTF-8'
4460
        ||
4461
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4462
    ) {
4463
      $encoding = 'UTF-8';
4464
    } else {
4465
      $encoding = self::normalize_encoding($encoding);
4466
    }
4467 1
4468
    return \mb_stripos($haystack, $needle, $offset, $encoding);
4469 1
  }
4470
4471
  /**
4472
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
4473
   *
4474
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4475
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4476
   * @param bool    $before_needle [optional] <p>
4477
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
4478
   *                               haystack before the first occurrence of the needle (excluding the needle).
4479
   *                               </p>
4480
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
4481
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4482
   *
4483
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
4484
   */
4485 2
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4486
  {
4487 2
    if ('' === $needle .= '') {
4488 2
      return false;
4489
    }
4490 2
4491
    if ($encoding !== 'UTF-8') {
4492
      $encoding = self::normalize_encoding($encoding);
4493
    }
4494
4495
    if ($cleanUtf8 === true) {
4496
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4497
      // if invalid characters are found in $haystack before $needle
4498
      $needle = self::clean($needle);
4499
      $haystack = self::clean($haystack);
4500
    }
4501
4502
    return \mb_stristr($haystack, $needle, $before_needle, $encoding);
4503 1
  }
4504
4505 1
  /**
4506 1
   * Get the string length, not the byte-length!
4507
   *
4508 1
   * @link     http://php.net/manual/en/function.mb-strlen.php
4509 1
   *
4510
   * @param string  $str       <p>The string being checked for length.</p>
4511
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4512 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4513 1
   *
4514
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
4515 1
   *             character counted as +1)</p>
4516
   */
4517
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
4518
  {
4519
    $str = (string)$str;
4520
4521
    if (!isset($str[0])) {
4522
      return 0;
4523
    }
4524
4525 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4526
        $encoding === 'UTF-8'
4527
        ||
4528
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4529
    ) {
4530
      $encoding = 'UTF-8';
4531
    } else {
4532
      $encoding = self::normalize_encoding($encoding);
4533
    }
4534
4535 15
    switch ($encoding) {
4536
      case 'ASCII':
4537 15
      case 'CP850':
4538 15
        return strlen($str);
4539
    }
4540 15
4541 2
    if ($cleanUtf8 === true) {
4542
      $str = self::clean($str);
4543
    }
4544
4545 14
    return \mb_strlen($str, $encoding);
4546
  }
4547
4548
  /**
4549 14
   * Case insensitive string comparisons using a "natural order" algorithm.
4550
   *
4551
   * INFO: natural order version of UTF8::strcasecmp()
4552
   *
4553 14
   * @param string $str1 <p>The first string.</p>
4554
   * @param string $str2 <p>The second string.</p>
4555
   *
4556 2
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
4557 2
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
4558 2
   *             <strong>0</strong> if they are equal
4559
   */
4560 14
  public static function strnatcasecmp($str1, $str2)
4561
  {
4562
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4563
  }
4564
4565
  /**
4566 14
   * String comparisons using a "natural order" algorithm
4567 2
   *
4568 14
   * INFO: natural order version of UTF8::strcmp()
4569 14
   *
4570 14
   * @link  http://php.net/manual/en/function.strnatcmp.php
4571 1
   *
4572
   * @param string $str1 <p>The first string.</p>
4573
   * @param string $str2 <p>The second string.</p>
4574 14
   *
4575 14
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
4576
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
4577
   *             <strong>0</strong> if they are equal
4578
   */
4579
  public static function strnatcmp($str1, $str2)
4580
  {
4581
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
4582
  }
4583
4584
  /**
4585
   * Case-insensitive string comparison of the first n characters.
4586
   *
4587
   * @link  http://php.net/manual/en/function.strncasecmp.php
4588
   *
4589
   * @param string $str1 <p>The first string.</p>
4590
   * @param string $str2 <p>The second string.</p>
4591
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
4592
   *
4593
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4594
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4595
   *             <strong>0</strong> if they are equal
4596
   */
4597
  public static function strncasecmp($str1, $str2, $len)
4598
  {
4599
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
4600
  }
4601
4602
  /**
4603
   * String comparison of the first n characters.
4604
   *
4605
   * @link  http://php.net/manual/en/function.strncmp.php
4606
   *
4607
   * @param string $str1 <p>The first string.</p>
4608
   * @param string $str2 <p>The second string.</p>
4609
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
4610
   *
4611
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
4612
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
4613
   *             <strong>0</strong> if they are equal
4614
   */
4615
  public static function strncmp($str1, $str2, $len)
4616
  {
4617
    $str1 = self::substr($str1, 0, $len);
4618
    $str2 = self::substr($str2, 0, $len);
4619
4620 1
    return self::strcmp($str1, $str2);
0 ignored issues
show
Security Bug introduced by
It seems like $str1 defined by self::substr($str1, 0, $len) on line 4617 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str2 defined by self::substr($str2, 0, $len) on line 4618 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
4621
  }
4622 1
4623 1
  /**
4624 1
   * Search a string for any of a set of characters.
4625
   *
4626 1
   * @link  http://php.net/manual/en/function.strpbrk.php
4627
   *
4628
   * @param string $haystack  <p>The string where char_list is looked for.</p>
4629
   * @param string $char_list <p>This parameter is case sensitive.</p>
4630
   *
4631
   * @return string String starting from the character found, or false if it is not found.
4632
   */
4633 1
  public static function strpbrk($haystack, $char_list)
4634
  {
4635
    $haystack = (string)$haystack;
4636
    $char_list = (string)$char_list;
4637
4638
    if (!isset($haystack[0], $char_list[0])) {
4639
      return false;
4640
    }
4641
4642
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
4643 4
      return substr($haystack, strpos($haystack, $m[0]));
4644
    } else {
4645 4
      return false;
4646
    }
4647 4
  }
4648 2
4649
  /**
4650
   * Find position of first occurrence of string in a string.
4651 3
   *
4652
   * @link http://php.net/manual/en/function.mb-strpos.php
4653
   *
4654
   * @param string  $haystack  <p>The string being checked.</p>
4655
   * @param string  $needle    <p>The position counted from the beginning of haystack.</p>
4656
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
4657
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4658
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4659
   *
4660
   * @return int|false <p>
4661
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
4662
   *                   If needle is not found it returns false.
4663
   *                   </p>
4664
   */
4665
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4666
  {
4667
    $haystack = (string)$haystack;
4668
    $needle = (string)$needle;
4669
4670
    if (!isset($haystack[0], $needle[0])) {
4671
      return false;
4672
    }
4673
4674
    // init
4675
    $offset = (int)$offset;
4676
4677 1
    // iconv and mbstring do not support integer $needle
4678
4679 1
    if (((int)$needle) === $needle && ($needle >= 0)) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
4680 1
      $needle = (string)self::chr($needle);
4681 1
    }
4682
4683 1
    if ($cleanUtf8 === true) {
4684
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4685
      // if invalid characters are found in $haystack before $needle
4686
      $needle = self::clean($needle);
4687
      $haystack = self::clean($haystack);
4688
    }
4689
4690 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4691
      self::checkForSupport();
4692
    }
4693
4694 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4695
        $encoding === 'UTF-8'
4696
        ||
4697
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4698
    ) {
4699
      $encoding = 'UTF-8';
4700
    } else {
4701
      $encoding = self::normalize_encoding($encoding);
4702
    }
4703
4704
    if (self::$support['mbstring'] === true) {
4705
      return \mb_strpos($haystack, $needle, $offset, $encoding);
4706
    }
4707 1
4708
    if (self::$support['iconv'] === true) {
4709 1
      // ignore invalid negative offset to keep compatibility
4710
      // with php < 5.5.35, < 5.6.21, < 7.0.6
0 ignored issues
show
Unused Code Comprehensibility introduced by
39% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
4711
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
4712
    }
4713
4714
    if ($offset > 0) {
4715
      $haystack = self::substr($haystack, $offset);
4716
    }
4717
4718 View Code Duplication
    if (($pos = strpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4719
      $left = substr($haystack, 0, $pos);
4720
4721
      // negative offset not supported in PHP strpos(), ignoring
4722
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4723
    }
4724
4725
    return false;
4726
  }
4727
4728
  /**
4729 11
   * Finds the last occurrence of a character in a string within another.
4730
   *
4731 11
   * @link http://php.net/manual/en/function.mb-strrchr.php
4732
   *
4733 11
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
4734 2
   * @param string $needle        <p>The string to find in haystack</p>
4735 2
   * @param bool   $before_needle [optional] <p>
4736
   *                              Determines which portion of haystack
4737 11
   *                              this function returns.
4738
   *                              If set to true, it returns all of haystack
4739 11
   *                              from the beginning to the last occurrence of needle.
4740 2
   *                              If set to false, it returns all of haystack
4741
   *                              from the last occurrence of needle to the end,
4742
   *                              </p>
4743
   * @param string $encoding      [optional] <p>
4744 10
   *                              Character encoding name to use.
4745 10
   *                              If it is omitted, internal character encoding is used.
4746
   *                              </p>
4747
   * @param bool   $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4748
   *
4749 10
   * @return string|false The portion of haystack or false if needle is not found.
4750
   */
4751 10 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4752
  {
4753
    if ($encoding !== 'UTF-8') {
4754 3
      $encoding = self::normalize_encoding($encoding);
4755 3
    }
4756 3
4757
    if ($cleanUtf8 === true) {
4758 10
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4759
      // if invalid characters are found in $haystack before $needle
4760
      $needle = self::clean($needle);
4761
      $haystack = self::clean($haystack);
4762
    }
4763
4764 10
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
4765 1
  }
4766 10
4767 10
  /**
4768 10
   * Reverses characters order in the string.
4769 1
   *
4770
   * @param string $str The input string
4771
   *
4772
   * @return string The string with characters in the reverse sequence
4773
   */
4774 10
  public static function strrev($str)
4775 10
  {
4776 10
    $str = (string)$str;
4777 10
4778
    if (!isset($str[0])) {
4779
      return '';
4780
    }
4781
4782
    return implode(array_reverse(self::split($str)));
4783
  }
4784
4785
  /**
4786
   * Finds the last occurrence of a character in a string within another, case insensitive.
4787
   *
4788
   * @link http://php.net/manual/en/function.mb-strrichr.php
4789
   *
4790
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
4791
   * @param string  $needle        <p>The string to find in haystack.</p>
4792
   * @param bool    $before_needle [optional] <p>
4793
   *                               Determines which portion of haystack
4794
   *                               this function returns.
4795
   *                               If set to true, it returns all of haystack
4796
   *                               from the beginning to the last occurrence of needle.
4797
   *                               If set to false, it returns all of haystack
4798
   *                               from the last occurrence of needle to the end,
4799
   *                               </p>
4800
   * @param string  $encoding      [optional] <p>
4801
   *                               Character encoding name to use.
4802
   *                               If it is omitted, internal character encoding is used.
4803
   *                               </p>
4804
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4805
   *
4806
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
4807
   */
4808 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4809
  {
4810
    if ($encoding !== 'UTF-8') {
4811
      $encoding = self::normalize_encoding($encoding);
4812
    }
4813 10
4814
    if ($cleanUtf8 === true) {
4815
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4816 10
      // if invalid characters are found in $haystack before $needle
4817 10
      $needle = self::clean($needle);
4818
      $haystack = self::clean($haystack);
4819 10
    }
4820 2
4821 2
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
4822
  }
4823 10
4824 10
  /**
4825 2
   * Find position of last occurrence of a case-insensitive string.
4826
   *
4827
   * @param string  $haystack  <p>The string to look in.</p>
4828 8
   * @param string  $needle    <p>The string to look for.</p>
4829
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
4830
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4831
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4832
   *
4833
   * @return int|false <p>
4834
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
4835
   *                   not found, it returns false.
4836
   *                   </p>
4837
   */
4838
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
4839
  {
4840
    return self::strrpos(self::strtolower($haystack), self::strtolower($needle), $offset, $encoding, $cleanUtf8);
4841
  }
4842
4843
  /**
4844
   * Find position of last occurrence of a string in a string.
4845 2
   *
4846
   * @link http://php.net/manual/en/function.mb-strrpos.php
4847 2
   *
4848
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
4849
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
4850
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
4851
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
4852
   *                              the end of the string.
4853
   *                              </p>
4854 2
   * @param string     $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4855 1
   * @param boolean    $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
4856 1
   *
4857
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
4858
   *                   is not found, it returns false.</p>
4859
   */
4860 2
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
4861 2
  {
4862 2
    if (((int)$needle) === $needle && ($needle >= 0)) {
4863 2
      $needle = (string)self::chr($needle);
4864
    }
4865
4866
    $haystack = (string)$haystack;
4867
    $needle = (string)$needle;
4868
4869
    if (!isset($haystack[0], $needle[0])) {
4870
      return false;
4871
    }
4872
4873
    // init
4874
    $needle = (string)$needle;
4875
    $offset = (int)$offset;
4876
4877
    if (
4878
        $cleanUtf8 === true
4879
        ||
4880
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
4881
    ) {
4882 11
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
4883
4884 11
      $needle = self::clean($needle);
4885 11
      $haystack = self::clean($haystack);
4886 11
    }
4887
4888 11
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
4889 1
      self::checkForSupport();
4890 1
    }
4891 1
4892 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4893 11
        $encoding === 'UTF-8'
4894
        ||
4895 11
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
4896
    ) {
4897 11
      $encoding = 'UTF-8';
4898 1
    } else {
4899 1
      $encoding = self::normalize_encoding($encoding);
4900
    }
4901
4902 11
    if (
4903 11
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
4904
        ||
4905 11
        self::$support['mbstring'] === true
4906
    ) {
4907 11
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
4908
    }
4909
4910
    if (self::$support['iconv'] === true) {
4911
      return \grapheme_strrpos($haystack, $needle, $offset);
4912
    }
4913
4914
    // fallback
4915
4916
    if ($offset > 0) {
4917
      $haystack = self::substr($haystack, $offset);
4918
    } elseif ($offset < 0) {
4919
      $haystack = self::substr($haystack, 0, $offset);
4920
    }
4921 21
4922 View Code Duplication
    if (($pos = strrpos($haystack, $needle)) !== false) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4923
      $left = substr($haystack, 0, $pos);
4924 21
4925
      // negative offset not supported in PHP strpos(), ignoring
4926 21
      return ($offset > 0 ? $offset : 0) + self::strlen($left);
4927 6
    }
4928
4929
    return false;
4930 19
  }
4931
4932
  /**
4933
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
4934
   * mask.
4935
   *
4936 19
   * @param string $str    <p>The input string.</p>
4937 2
   * @param string $mask   <p>The mask of chars</p>
4938 2
   * @param int    $offset [optional]
4939
   * @param int    $length [optional]
4940 19
   *
4941
   * @return int
4942
   */
4943
  public static function strspn($str, $mask, $offset = 0, $length = 2147483647)
4944
  {
4945
    // init
4946
    $length = (int)$length;
4947
    $offset = (int)$offset;
4948
4949
    if ($offset || 2147483647 !== $length) {
4950 3
      $str = self::substr($str, $offset, $length);
4951
    }
4952 3
4953
    $str = (string)$str;
4954
    if (!isset($str[0], $mask[0])) {
4955
      return 0;
4956
    }
4957
4958
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
4959
  }
4960
4961
  /**
4962
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
4963
   *
4964
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
4965
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
4966 16
   * @param bool    $before_needle [optional] <p>
4967
   *                               If <b>TRUE</b>, strstr() returns the part of the
4968 16
   *                               haystack before the first occurrence of the needle (excluding the needle).
4969
   *                               </p>
4970 16
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function.</p>
4971 2
   * @param boolean $cleanUtf8     [optional] <p>Clean non UTF-8 chars from the string.</p>
4972
   *
4973
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
4974 15
   */
4975
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4976
  {
4977
    if ($cleanUtf8 === true) {
4978
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
4979
      // if invalid characters are found in $haystack before $needle
4980 15
      $needle = self::clean($needle);
4981 2
      $haystack = self::clean($haystack);
4982 2
    }
4983
4984 15
    if ($encoding !== 'UTF-8') {
4985
      $encoding = self::normalize_encoding($encoding);
4986
    }
4987
4988
    if (
4989
        $encoding !== 'UTF-8' // INFO: use "mb_"-function (with polyfill) also if we need another encoding
4990
        ||
4991
        self::$support['mbstring'] === true
4992
    ) {
4993
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
4994
    }
4995
4996
    return \grapheme_strstr($haystack, $needle, $before_needle);
4997
  }
4998
4999
  /**
5000
   * Unicode transformation for case-less matching.
5001 1
   *
5002
   * @link http://unicode.org/reports/tr21/tr21-5.html
5003 1
   *
5004 1
   * @param string  $str       <p>The input string.</p>
5005 1
   * @param bool    $full      [optional] <p>
5006 1
   *                           <b>true</b>, replace full case folding chars (default)<br />
5007 1
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
5008
   *                           </p>
5009 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5010 1
   *
5011 1
   * @return string
5012 1
   */
5013 1
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
5014
  {
5015 1
    // init
5016 1
    $str = (string)$str;
5017
5018 1
    if (!isset($str[0])) {
5019
      return '';
5020
    }
5021
5022
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
5023
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
5024
5025
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
5026
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$commonCaseFold);
5027
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$commonCaseFold);
5028
    }
5029
5030 1
    $str = str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
5031
5032 1
    if ($full) {
5033 1
5034 1
      static $fullCaseFold = null;
5035
5036 1
      if ($fullCaseFold === null) {
5037
        $fullCaseFold = self::getData('caseFolding_full');
5038
      }
5039
5040 1
      /** @noinspection OffsetOperationsInspection */
5041 1
      $str = str_replace($fullCaseFold[0], $fullCaseFold[1], $str);
5042
    }
5043 1
5044
    if ($cleanUtf8 === true) {
5045
      $str = self::clean($str);
5046
    }
5047
5048
    return self::strtolower($str);
5049
  }
5050
5051
  /**
5052
   * Make a string lowercase.
5053
   *
5054
   * @link http://php.net/manual/en/function.mb-strtolower.php
5055
   *
5056
   * @param string  $str       <p>The string being lowercased.</p>
5057
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
5058
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5059 47
   *
5060
   * @return string str with all alphabetic characters converted to lowercase.
5061
   */
5062 47 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5063
  {
5064 47
    // init
5065 9
    $str = (string)$str;
5066
5067
    if (!isset($str[0])) {
5068 45
      return '';
5069
    }
5070
5071
    if ($cleanUtf8 === true) {
5072 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5073 1
      // if invalid characters are found in $haystack before $needle
5074
      $str = self::clean($str);
5075 45
    }
5076 45
5077 37
    if ($encoding !== 'UTF-8') {
5078 37
      $encoding = self::normalize_encoding($encoding);
5079
    }
5080 45
5081 2
    return \mb_strtolower($str, $encoding);
5082
  }
5083
5084 43
  /**
5085 20
   * Generic case sensitive transformation for collation matching.
5086 20
   *
5087 41
   * @param string $str <p>The input string</p>
5088
   *
5089
   * @return string
5090 43
   */
5091
  private static function strtonatfold($str)
5092
  {
5093
    /** @noinspection PhpUndefinedClassInspection */
5094
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
5095
  }
5096 43
5097 2
  /**
5098 43
   * Make a string uppercase.
5099 43
   *
5100 43
   * @link http://php.net/manual/en/function.mb-strtoupper.php
5101 1
   *
5102
   * @param string  $str       <p>The string being uppercased.</p>
5103
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5104 43
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5105 43
   *
5106
   * @return string str with all alphabetic characters converted to uppercase.
5107
   */
5108 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5109
  {
5110
    $str = (string)$str;
5111
5112
    if (!isset($str[0])) {
5113
      return '';
5114
    }
5115
5116
    if ($cleanUtf8 === true) {
5117
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5118
      // if invalid characters are found in $haystack before $needle
5119
      $str = self::clean($str);
5120
    }
5121
5122
    if ($encoding !== 'UTF-8') {
5123
      $encoding = self::normalize_encoding($encoding);
5124
    }
5125
5126
    return \mb_strtoupper($str, $encoding);
5127
  }
5128
5129
  /**
5130
   * Translate characters or replace sub-strings.
5131
   *
5132
   * @link  http://php.net/manual/en/function.strtr.php
5133
   *
5134
   * @param string          $str  <p>The string being translated.</p>
5135 1
   * @param string|string[] $from <p>The string replacing from.</p>
5136
   * @param string|string[] $to   <p>The string being translated to to.</p>
5137 1
   *
5138 1
   * @return string <p>
5139
   *                This function returns a copy of str, translating all occurrences of each character in from to the
5140 1
   *                corresponding character in to.
5141
   *                </p>
5142
   */
5143
  public static function strtr($str, $from, $to = INF)
5144
  {
5145
    if (INF !== $to) {
5146
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 5146 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5147
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 5147 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
5148
      $countFrom = count($from);
5149
      $countTo = count($to);
5150
5151
      if ($countFrom > $countTo) {
5152
        $from = array_slice($from, 0, $countTo);
5153
      } elseif ($countFrom < $countTo) {
5154
        $to = array_slice($to, 0, $countFrom);
5155
      }
5156
5157
      $from = array_combine($from, $to);
5158
    }
5159
5160
    return strtr($str, $from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by parameter $from on line 5143 can also be of type string; however, strtr() does only seem to accept array, maybe add an additional type check?

This check looks at variables that have been passed in as parameters and are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5161 1
  }
5162
5163 1
  /**
5164 1
   * Return the width of a string.
5165
   *
5166 1
   * @param string  $str       <p>The input string.</p>
5167 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5168
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5169
   *
5170 1
   * @return int
5171 1
   */
5172 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5173
  {
5174 1
    if ($encoding !== 'UTF-8') {
5175 1
      $encoding = self::normalize_encoding($encoding);
5176
    }
5177
5178 1
    if ($cleanUtf8 === true) {
5179 1
      // iconv and mbstring are not tolerant to invalid encoding
5180
      // further, their behaviour is inconsistent with that of PHP's substr
5181 1
5182 1
      $str = self::clean($str);
5183 1
    }
5184
5185 1
    return \mb_strwidth($str, $encoding);
5186
  }
5187
5188
  /**
5189
   * Get part of a string.
5190
   *
5191
   * @link http://php.net/manual/en/function.mb-substr.php
5192 1
   *
5193
   * @param string  $str       <p>The string being checked.</p>
5194
   * @param int     $start     <p>The first position used in str.</p>
5195
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
5196
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5197
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5198
   *
5199
   * @return string <p>Returns a sub-string specified by the start and length parameters.</p>
5200
   */
5201
  public static function substr($str, $start = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5202
  {
5203
    // init
5204
    $str = (string)$str;
5205
5206
    if (!isset($str[0])) {
5207 6
      return '';
5208
    }
5209 6
5210 1
    if ($cleanUtf8 === true) {
5211
      // iconv and mbstring are not tolerant to invalid encoding
5212
      // further, their behaviour is inconsistent with that of PHP's substr
5213 1
5214 1
      $str = self::clean($str);
5215 1
    }
5216 1
5217
    $str_length = 0;
5218
    if ($start || $length === null) {
5219
      $str_length = (int)self::strlen($str);
5220 1
    }
5221 1
5222 1
    if ($start && $start > $str_length) {
5223 1
      return false;
5224 1
    }
5225 1
5226 1
    if ($length === null) {
5227 1
      $length = $str_length;
5228
    } else {
5229
      $length = (int)$length;
5230
    }
5231 1
5232 1
    if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5233 1
      self::checkForSupport();
5234 1
    }
5235 1
5236 1 View Code Duplication
    if (
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5237 1
        $encoding === 'UTF-8'
5238 1
        ||
5239
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5240
    ) {
5241 1
      $encoding = 'UTF-8';
5242 1
    } else {
5243 1
      $encoding = self::normalize_encoding($encoding);
5244 1
    }
5245
5246
    if (self::$support['mbstring'] === true) {
5247
      return \mb_substr($str, $start, $length, $encoding);
5248 1
    }
5249
5250 6
    if (self::$support['iconv'] === true) {
5251 1
      return \iconv_substr($str, $start, $length, $encoding);
5252 1
    }
5253 1
5254 1
    // fallback
5255
5256 1
    // split to array, and remove invalid characters
5257
    $array = self::split($str);
5258
5259 6
    // extract relevant part, and join to make sting again
5260 6
    return implode(array_slice($array, $start, $length));
5261
  }
5262 6
5263 4
  /**
5264 4
   * Binary safe comparison of two strings from an offset, up to length characters.
5265
   *
5266 6
   * @param string  $main_str           <p>The main string being compared.</p>
5267
   * @param string  $str                <p>The secondary string being compared.</p>
5268 6
   * @param int     $offset             <p>The start position for the comparison. If negative, it starts counting from
5269
   *                                    the end of the string.</p>
5270
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
5271
   *                                    the length of the str compared to the length of main_str less the offset.</p>
5272
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
5273
   *                                    insensitive.</p>
5274
   *
5275
   * @return int
5276
   */
5277
  public static function substr_compare($main_str, $str, $offset, $length = 2147483647, $case_insensitivity = false)
5278
  {
5279
    $main_str = self::substr($main_str, $offset, $length);
5280 1
    $str = self::substr($str, 0, self::strlen($main_str));
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5279 can also be of type false; however, voku\helper\UTF8::strlen() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5281
5282 1
    return $case_insensitivity === true ? self::strcasecmp($main_str, $str) : self::strcmp($main_str, $str);
0 ignored issues
show
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5279 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5280 can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $main_str defined by self::substr($main_str, $offset, $length) on line 5279 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
Security Bug introduced by
It seems like $str defined by self::substr($str, 0, self::strlen($main_str)) on line 5280 can also be of type false; however, voku\helper\UTF8::strcmp() does only seem to accept string, did you maybe forget to handle an error condition?

This check looks for type mismatches where the missing type is false. This is usually indicative of an error condtion.

Consider the follow example

<?php

function getDate($date)
{
    if ($date !== null) {
        return new DateTime($date);
    }

    return false;
}

This function either returns a new DateTime object or false, if there was an error. This is a typical pattern in PHP programming to show that an error has occurred without raising an exception. The calling code should check for this returned false before passing on the value to another function or method that may not be able to handle a false.

Loading history...
5283
  }
5284 1
5285 1
  /**
5286
   * Count the number of substring occurrences.
5287
   *
5288 1
   * @link  http://php.net/manual/en/function.substr-count.php
5289 1
   *
5290 1
   * @param string  $haystack  <p>The string to search in.</p>
5291
   * @param string  $needle    <p>The substring to search for.</p>
5292 1
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
5293
   * @param int     $length    [optional] <p>
5294
   *                           The maximum length after the specified offset to search for the
5295 1
   *                           substring. It outputs a warning if the offset plus the length is
5296 1
   *                           greater than the haystack length.
5297
   *                           </p>
5298 1
   * @param string  $encoding  <p>Set the charset for e.g. "\mb_" function.</p>
5299 1
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5300
   *
5301 1
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
5302
   */
5303 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5304 1
  {
5305
    $haystack = (string)$haystack;
5306 1
    $needle = (string)$needle;
5307
5308 1
    if (!isset($haystack[0], $needle[0])) {
5309
      return false;
5310 1
    }
5311
5312 1
    if ($offset || $length) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $length of type integer|null is loosely compared to true; this is ambiguous if the integer can be zero. You might want to explicitly use !== null instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For integer values, zero is a special case, in particular the following results might be unexpected:

0   == false // true
0   == null  // true
123 == false // false
123 == null  // false

// It is often better to use strict comparison
0 === false // false
0 === null  // false
Loading history...
5313
      $offset = (int)$offset;
5314
      $length = (int)$length;
5315
5316
      if (
5317
          $length + $offset <= 0
5318
          &&
5319
          Bootup::is_php('7.1') === false
5320
      ) {
5321
        return false;
5322
      }
5323
5324
      $haystack = self::substr($haystack, $offset, $length, $encoding);
5325
    }
5326 7
5327
    if ($encoding !== 'UTF-8') {
5328 7
      $encoding = self::normalize_encoding($encoding);
5329
    }
5330
5331
    if ($cleanUtf8 === true) {
5332
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5333
      // if invalid characters are found in $haystack before $needle
5334
      $needle = self::clean($needle);
5335
      $haystack = self::clean($haystack);
0 ignored issues
show
Security Bug introduced by
It seems like $haystack can also be of type false; however, voku\helper\UTF8::clean() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5336
    }
5337
5338
    return \mb_substr_count($haystack, $needle, $encoding);
5339
  }
5340 1
5341
  /**
5342 1
   * Replace text within a portion of a string.
5343
   *
5344
   * source: https://gist.github.com/stemar/8287074
5345
   *
5346
   * @param string|string[] $str         <p>The input string or an array of stings.</p>
5347
   * @param string|string[] $replacement <p>The replacement string or an array of stings.</p>
5348
   * @param int|int[]       $start
5349
   * @param int|int[]|void  $length      [optional]
5350
   *
5351
   * @return string|string[]
5352
   */
5353
  public static function substr_replace($str, $replacement, $start, $length = null)
5354 1
  {
5355
    if (is_array($str)) {
5356 1
      $num = count($str);
5357
5358
      // $replacement
5359
      if (is_array($replacement)) {
5360
        $replacement = array_slice($replacement, 0, $num);
5361
      } else {
5362
        $replacement = array_pad(array($replacement), $num, $replacement);
5363
      }
5364
5365
      // $start
5366 View Code Duplication
      if (is_array($start)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5367
        $start = array_slice($start, 0, $num);
5368 1
        foreach ($start as &$valueTmp) {
5369
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
5370 1
        }
5371
        unset($valueTmp);
5372
      } else {
5373
        $start = array_pad(array($start), $num, $start);
5374
      }
5375
5376
      // $length
5377
      if (!isset($length)) {
5378
        $length = array_fill(0, $num, 0);
5379 View Code Duplication
      } elseif (is_array($length)) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5380
        $length = array_slice($length, 0, $num);
5381
        foreach ($length as &$valueTmpV2) {
5382
          if (isset($valueTmpV2)) {
5383
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
5384
          } else {
5385 13
            $valueTmpV2 = 0;
5386
          }
5387 13
        }
5388
        unset($valueTmpV2);
5389
      } else {
5390 13
        $length = array_pad(array($length), $num, $length);
5391
      }
5392 13
5393 3
      // Recursive call
5394
      return array_map(array(__CLASS__, 'substr_replace'), $str, $replacement, $start, $length);
5395
    } else {
5396 11
      if (is_array($replacement)) {
5397
        if (count($replacement) > 0) {
5398
          $replacement = $replacement[0];
5399 11
        } else {
5400 7
          $replacement = '';
5401
        }
5402
      }
5403 5
    }
5404 1
5405
    preg_match_all('/./us', (string)$str, $smatches);
5406
    preg_match_all('/./us', (string)$replacement, $rmatches);
5407
5408 1
    if ($length === null) {
5409 1
      $length = (int)\mb_strlen($str);
5410
    }
5411
5412 1
    array_splice($smatches[0], $start, $length, $rmatches[0]);
5413 1
5414
    return implode($smatches[0], null);
5415
  }
5416 1
5417
  /**
5418
   * Returns a case swapped version of the string.
5419 1
   *
5420
   * @param string  $str       <p>The input string.</p>
5421 5
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
5422 5
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5423 5
   *
5424
   * @return string <p>Each character's case swapped.</p>
5425 5
   */
5426
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5427 5
  {
5428 5
    $str = (string)$str;
5429
5430
    if (!isset($str[0])) {
5431 5
      return '';
5432
    }
5433
5434 5
    if ($encoding !== 'UTF-8') {
5435 5
      $encoding = self::normalize_encoding($encoding);
5436 5
    }
5437
5438 5
    if ($cleanUtf8 === true) {
5439 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5440
      // if invalid characters are found in $haystack before $needle
5441 2
      $str = self::clean($str);
5442 2
    }
5443 2
5444
    $strSwappedCase = preg_replace_callback(
5445 2
        '/[\S]/u',
5446 1
        function ($match) use ($encoding) {
5447
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
5448 1
5449 1
          if ($match[0] === $marchToUpper) {
5450 1
            return UTF8::strtolower($match[0], $encoding);
5451
          } else {
5452 1
            return $marchToUpper;
5453
          }
5454
        },
5455
        $str
5456
    );
5457
5458
    return $strSwappedCase;
5459
  }
5460
5461
  /**
5462
   * alias for "UTF8::to_ascii()"
5463
   *
5464
   * @see UTF8::to_ascii()
5465
   *
5466
   * @param string $s
5467 1
   * @param string $subst_chr
5468 2
   * @param bool   $strict
5469
   *
5470 5
   * @return string
5471
   */
5472
  public static function toAscii($s, $subst_chr = '?', $strict = false)
5473
  {
5474
    return self::to_ascii($s, $subst_chr, $strict);
5475 5
  }
5476
5477
  /**
5478
   * alias for "UTF8::to_iso8859()"
5479
   *
5480 5
   * @see UTF8::to_iso8859()
5481 5
   *
5482 1
   * @param string $str
5483 1
   *
5484
   * @return string|string[]
5485 1
   */
5486 1
  public static function toIso8859($str)
5487 1
  {
5488
    return self::to_iso8859($str);
5489 1
  }
5490
5491 5
  /**
5492 5
   * alias for "UTF8::to_latin1()"
5493 5
   *
5494 5
   * @see UTF8::to_latin1()
5495 1
   *
5496
   * @param $str
5497 5
   *
5498
   * @return string
5499 5
   */
5500
  public static function toLatin1($str)
5501
  {
5502
    return self::to_latin1($str);
5503
  }
5504
5505
  /**
5506
   * alias for "UTF8::to_utf8()"
5507
   *
5508
   * @see UTF8::to_utf8()
5509 2
   *
5510
   * @param string $str
5511 2
   *
5512
   * @return string
5513 1
   */
5514
  public static function toUTF8($str)
5515
  {
5516 1
    return self::to_utf8($str);
5517 1
  }
5518
5519 1
  /**
5520
   * Convert a string into ASCII.
5521
   *
5522 2
   * @param string $str     <p>The input string.</p>
5523
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
5524 2
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
5525 1
   *                        performance</p>
5526
   *
5527
   * @return string
5528 2
   *
5529
   * @throws \Exception
5530
   */
5531
  public static function to_ascii($str, $unknown = '?', $strict = false)
5532
  {
5533
    static $UTF8_TO_ASCII;
5534
5535
    // init
5536
    $str = (string)$str;
5537
5538
    if (!isset($str[0])) {
5539
      return '';
5540 1
    }
5541
5542 1
    $str = self::clean($str, false, true, true);
5543
5544
    // check if we only have ASCII
5545
    if (self::is_ascii($str) === true) {
5546
      return $str;
5547
    }
5548
5549
    if ($strict === true) {
5550
      if (!isset(self::$support['already_checked_via_portable_utf8'])) {
5551
        self::checkForSupport();
5552
      }
5553
5554
      if (self::$support['intl'] === true && Bootup::is_php('5.4')) {
5555
        $str = transliterator_transliterate('Any-Latin; Latin-ASCII;', $str);
5556
5557
        // check again, if we only have ASCII, now ...
5558
        if (self::is_ascii($str) === true) {
5559
          return $str;
5560
        }
5561
5562
      } else {
5563
        throw new \Exception('Intl is not supported or you use PHP < 5.4!');
5564
      }
5565
    }
5566
5567
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
5568 20
    $chars = $ar[0];
5569
    foreach ($chars as &$c) {
5570 20
5571 2
      $ordC0 = ord($c[0]);
5572
5573
      if ($ordC0 >= 0 && $ordC0 <= 127) {
5574 2
        continue;
5575 2
      }
5576
5577 2
      $ordC1 = ord($c[1]);
5578
5579
      // ASCII - next please
5580 20
      if ($ordC0 >= 192 && $ordC0 <= 223) {
5581
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
5582 20
      }
5583 4
5584
      if ($ordC0 >= 224) {
5585
        $ordC2 = ord($c[2]);
5586 19
5587 19
        if ($ordC0 <= 239) {
5588
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
5589
        }
5590 19
5591 19
        if ($ordC0 >= 240) {
5592
          $ordC3 = ord($c[3]);
5593 19
5594 19
          if ($ordC0 <= 247) {
5595 19
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
5596 19
          }
5597
5598 19
          if ($ordC0 >= 248) {
5599
            $ordC4 = ord($c[4]);
5600 16
5601 16 View Code Duplication
            if ($ordC0 <= 251) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5602 16
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
5603 16
            }
5604 5
5605 5
            if ($ordC0 >= 252) {
5606 5
              $ordC5 = ord($c[5]);
5607
5608 View Code Duplication
              if ($ordC0 <= 253) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5609 19
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
5610
              }
5611 17
            }
5612 13
          }
5613 13
        }
5614 13
      }
5615 8
5616 8
      if ($ordC0 >= 254 && $ordC0 <= 255) {
5617 8
        $c = $unknown;
5618
        continue;
5619
      }
5620 19
5621
      if (!isset($ord)) {
5622 9
        $c = $unknown;
5623 4
        continue;
5624 4
      }
5625 4
5626 6
      $bank = $ord >> 8;
5627 6
      if (!array_key_exists($bank, (array)$UTF8_TO_ASCII)) {
5628 6
        $bankfile = __DIR__ . '/data/' . sprintf('x%02x', $bank) . '.php';
5629
        if (file_exists($bankfile)) {
5630
          /** @noinspection PhpIncludeInspection */
5631 9
          require $bankfile;
5632 6
        } else {
5633 6
          $UTF8_TO_ASCII[$bank] = array();
5634 6
        }
5635
      }
5636
5637 19
      $newchar = $ord & 255;
5638
      if (array_key_exists($newchar, $UTF8_TO_ASCII[$bank])) {
5639 4
        $c = $UTF8_TO_ASCII[$bank][$newchar];
5640 4
      } else {
5641 2
        $c = $unknown;
5642 2
      }
5643 3
    }
5644 3
5645 3
    return implode('', $chars);
5646
  }
5647
5648 4
  /**
5649 16
   * Convert a string into "ISO-8859"-encoding (Latin-1).
5650
   *
5651 19
   * @param string|string[] $str
5652
   *
5653
   * @return string|string[]
5654 19
   */
5655 19
  public static function to_iso8859($str)
5656
  {
5657 3
    if (is_array($str)) {
5658 19
5659
      /** @noinspection ForeachSourceInspection */
5660 19
      foreach ($str as $k => $v) {
5661
        /** @noinspection AlterInForeachInspection */
5662
        /** @noinspection OffsetOperationsInspection */
5663 19
        $str[$k] = self::to_iso8859($v);
5664 19
      }
5665 19
5666 2
      return $str;
5667 19
    }
5668
5669 19
    $str = (string)$str;
5670
5671 19
    if (!isset($str[0])) {
5672
      return '';
5673
    }
5674
5675
    return self::utf8_decode($str);
5676
  }
5677
5678
  /**
5679
   * alias for "UTF8::to_iso8859()"
5680
   *
5681
   * @see UTF8::to_iso8859()
5682
   *
5683
   * @param string|string[] $str
5684
   *
5685
   * @return string|string[]
5686
   */
5687 26
  public static function to_latin1($str)
5688
  {
5689 26
    return self::to_iso8859($str);
5690
  }
5691 26
5692 5
  /**
5693
   * This function leaves UTF8 characters alone, while converting almost all non-UTF8 to UTF8.
5694
   *
5695
   * - It decode UTF-8 codepoints and unicode escape sequences.
5696 22
   *
5697 6
   * - It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.
5698
   *
5699
   * - It may fail to convert characters to UTF-8 if they fall into one of these scenarios:
5700 16
   *
5701
   * 1) when any of these characters:   ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞß
5702
   *    are followed by any of these:  ("group B")
5703
   *                                    ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶•¸¹º»¼½¾¿
5704
   * For example:   %ABREPRESENT%C9%BB. «REPRESENTÉ»
5705
   * The "«" (%AB) character will be converted, but the "É" followed by "»" (%C9%BB)
5706
   * is also a valid unicode character, and will be left unchanged.
5707
   *
5708
   * 2) when any of these: àáâãäåæçèéêëìíîï  are followed by TWO chars from group B,
5709
   * 3) when any of these: ðñòó  are followed by THREE chars from group B.
5710
   *
5711
   * @param string|string[] $str <p>Any string or array.</p>
5712 14
   *
5713
   * @return string|string[] <p>The UTF-8 encoded string.</p>
5714 14
   */
5715
  public static function to_utf8($str)
5716
  {
5717
    if (is_array($str)) {
5718
      /** @noinspection ForeachSourceInspection */
5719
      foreach ($str as $k => $v) {
5720
        /** @noinspection AlterInForeachInspection */
5721
        /** @noinspection OffsetOperationsInspection */
5722
        $str[$k] = self::to_utf8($v);
5723
      }
5724
5725
      return $str;
5726
    }
5727
5728 1
    $str = (string)$str;
5729
5730 1
    if (!isset($str[0])) {
5731
      return $str;
5732
    }
5733
5734
    $max = strlen($str);
5735
    $buf = '';
5736
5737
    /** @noinspection ForeachInvariantsInspection */
5738
    for ($i = 0; $i < $max; $i++) {
5739
      $c1 = $str[$i];
5740
5741
      if ($c1 >= "\xc0") { // should be converted to UTF8, if it's not UTF8 already
5742
        $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
5743
        $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
5744 8
        $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
5745
5746 8
        if ($c1 >= "\xc0" & $c1 <= "\xdf") { // looks like 2 bytes UTF8
5747 2
5748
          if ($c2 >= "\x80" && $c2 <= "\xbf") { // yeah, almost sure it's UTF8 already
5749
            $buf .= $c1 . $c2;
5750 7
            $i++;
5751 7
          } else { // not valid UTF8 - convert it
5752 7
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5753
            $cc2 = ($c1 & "\x3f") | "\x80";
5754 7
            $buf .= $cc1 . $cc2;
5755 1
          }
5756 1
5757 7 View Code Duplication
        } elseif ($c1 >= "\xe0" & $c1 <= "\xef") { // looks like 3 bytes UTF8
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5758
5759
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf") { // yeah, almost sure it's UTF8 already
5760 7
            $buf .= $c1 . $c2 . $c3;
5761
            $i += 2;
5762 7
          } else { // not valid UTF8 - convert it
5763 7
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5764
            $cc2 = ($c1 & "\x3f") | "\x80";
5765
            $buf .= $cc1 . $cc2;
5766
          }
5767 7
5768
        } elseif ($c1 >= "\xf0" & $c1 <= "\xf7") { // looks like 4 bytes UTF8
5769
5770 View Code Duplication
          if ($c2 >= "\x80" && $c2 <= "\xbf" && $c3 >= "\x80" && $c3 <= "\xbf" && $c4 >= "\x80" && $c4 <= "\xbf") { // yeah, almost sure it's UTF8 already
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5771 1
            $buf .= $c1 . $c2 . $c3 . $c4;
5772 1
            $i += 3;
5773 1
          } else { // not valid UTF8 - convert it
5774 7
            $cc1 = (chr(ord($c1) / 64) | "\xc0");
5775 7
            $cc2 = ($c1 & "\x3f") | "\x80";
5776 7
            $buf .= $cc1 . $cc2;
5777
          }
5778 7
5779 7
        } else { // doesn't look like UTF8, but should be converted
5780
          $cc1 = (chr(ord($c1) / 64) | "\xc0");
5781 7
          $cc2 = (($c1 & "\x3f") | "\x80");
5782
          $buf .= $cc1 . $cc2;
5783
        }
5784
5785
      } elseif (($c1 & "\xc0") === "\x80") { // needs conversion
5786
5787
        $ordC1 = ord($c1);
5788
        if (isset(self::$win1252ToUtf8[$ordC1])) { // found in Windows-1252 special cases
5789
          $buf .= self::$win1252ToUtf8[$ordC1];
5790
        } else {
5791
          $cc1 = (chr($ordC1 / 64) | "\xc0");
5792
          $cc2 = (($c1 & "\x3f") | "\x80");
5793
          $buf .= $cc1 . $cc2;
5794
        }
5795
5796
      } else { // it doesn't need conversion
5797
        $buf .= $c1;
5798
      }
5799
    }
5800
5801 1
    // decode unicode escape sequences
5802
    $buf = preg_replace_callback(
5803 1
        '/\\\\u([0-9a-f]{4})/i',
5804
        function ($match) {
5805 1
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
5806 1
        },
5807
        $buf
5808
    );
5809 1
5810
    // decode UTF-8 codepoints
5811 1
    $buf = preg_replace_callback(
5812
        '/&#\d{2,4};/',
5813 1
        function ($match) {
5814 1
          return \mb_convert_encoding($match[0], 'UTF-8', 'HTML-ENTITIES');
5815 1
        },
5816 1
        $buf
5817
    );
5818 1
5819 1
    return $buf;
5820 1
  }
5821
5822 1
  /**
5823
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
5824
   *
5825
   * INFO: This is slower then "trim()"
5826
   *
5827
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
5828
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
5829
   *
5830 1
   * @param string $str   <p>The string to be trimmed</p>
5831
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
5832
   *
5833
   * @return string <p>The trimmed string.</p>
5834
   */
5835
  public static function trim($str = '', $chars = INF)
5836
  {
5837
    $str = (string)$str;
5838
5839
    if (!isset($str[0])) {
5840
      return '';
5841
    }
5842
5843
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
5844
    if ($chars === INF || !$chars) {
5845
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
5846
    }
5847
5848
    return self::rtrim(self::ltrim($str, $chars), $chars);
5849
  }
5850
5851
  /**
5852
   * Makes string's first char uppercase.
5853
   *
5854
   * @param string  $str       <p>The input string.</p>
5855
   * @param string  $encoding  [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5856
   * @param boolean $cleanUtf8 [optional] <p>Clean non UTF-8 chars from the string.</p>
5857
   *
5858
   * @return string <p>The resulting string</p>
5859
   */
5860
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5861
  {
5862
    return self::strtoupper(self::substr($str, 0, 1, $encoding, $cleanUtf8), $encoding, $cleanUtf8) . self::substr($str, 1, null, $encoding, $cleanUtf8);
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($str, 0, 1, $encoding, $cleanUtf8) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strtoupper() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
5863
  }
5864
5865
  /**
5866
   * alias for "UTF8::ucfirst()"
5867
   *
5868
   * @see UTF8::ucfirst()
5869
   *
5870
   * @param string  $word
5871
   * @param string  $encoding
5872
   * @param boolean $cleanUtf8
5873
   *
5874
   * @return string
5875
   */
5876
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
5877
  {
5878
    return self::ucfirst($word, $encoding, $cleanUtf8);
5879
  }
5880
5881
  /**
5882
   * Uppercase for all words in the string.
5883
   *
5884
   * @param string   $str        <p>The input string.</p>
5885
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
5886
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5887
   * @param string   $encoding   [optional] <p>Set the charset for e.g. "\mb_" function.</p>
5888
   * @param boolean  $cleanUtf8  [optional] <p>Clean non UTF-8 chars from the string.</p>
5889
   *
5890
   * @return string
5891
   */
5892
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
5893
  {
5894
    if (!$str) {
5895
      return '';
5896
    }
5897
5898
    $charlist = self::rxClass($charlist, '\pL');
5899
    $words = \preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
5900
    $newwords = array();
5901
5902
    if (count($exceptions) > 0) {
5903
      $useExceptions = true;
5904
    } else {
5905
      $useExceptions = false;
5906
    }
5907
5908
    foreach ($words as $word) {
5909
5910
      if (!$word) {
5911
        continue;
5912
      }
5913
5914
      if (
5915
          ($useExceptions === false)
5916
          ||
5917
          (
5918
              $useExceptions === true
5919
              &&
5920
              !in_array($word, $exceptions, true)
5921
          )
5922
      ) {
5923
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
5924
      }
5925
5926
      $newwords[] = $word;
5927
    }
5928
5929
    return implode('', $newwords);
5930
  }
5931
5932
  /**
5933
   * Multi decode html entity & fix urlencoded-win1252-chars.
5934
   *
5935
   * e.g:
5936
   * 'D&#252;sseldorf'               => 'Düsseldorf'
5937
   * 'D%FCsseldorf'                  => 'Düsseldorf'
5938
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
5939
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
5940
   * 'Düsseldorf'                   => 'Düsseldorf'
5941
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
5942
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
5943
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
5944
   *
5945
   * @param string $str          <p>The input string.</p>
5946
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
5947
   *
5948
   * @return string
5949
   */
5950
  public static function urldecode($str, $multi_decode = true)
5951
  {
5952
    $str = (string)$str;
5953
5954
    if (!isset($str[0])) {
5955
      return '';
5956
    }
5957
5958
    $str = preg_replace('/%u([0-9a-f]{3,4})/i', '&#x\\1;', urldecode($str));
5959
5960
    $flags = Bootup::is_php('5.4') ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
5961
5962
    do {
5963
      $str_compare = $str;
5964
5965
      $str = self::fix_simple_utf8(
5966
          rawurldecode(
5967
              self::html_entity_decode(
5968
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
5969
                  $flags
5970
              )
5971
          )
5972
      );
5973
5974
    } while ($multi_decode === true && $str_compare !== $str);
5975
5976
    return (string)$str;
5977
  }
5978
5979
  /**
5980
   * Return a array with "urlencoded"-win1252 -> UTF-8
5981
   *
5982
   * @deprecated use the "UTF8::urldecode()" function to decode a string
5983
   *
5984
   * @return array
5985
   */
5986
  public static function urldecode_fix_win1252_chars()
5987
  {
5988
    static $array = array(
5989
        '%20' => ' ',
5990
        '%21' => '!',
5991
        '%22' => '"',
5992
        '%23' => '#',
5993
        '%24' => '$',
5994
        '%25' => '%',
5995
        '%26' => '&',
5996
        '%27' => "'",
5997
        '%28' => '(',
5998
        '%29' => ')',
5999
        '%2A' => '*',
6000
        '%2B' => '+',
6001
        '%2C' => ',',
6002
        '%2D' => '-',
6003
        '%2E' => '.',
6004
        '%2F' => '/',
6005
        '%30' => '0',
6006
        '%31' => '1',
6007
        '%32' => '2',
6008
        '%33' => '3',
6009
        '%34' => '4',
6010
        '%35' => '5',
6011
        '%36' => '6',
6012
        '%37' => '7',
6013
        '%38' => '8',
6014
        '%39' => '9',
6015
        '%3A' => ':',
6016
        '%3B' => ';',
6017
        '%3C' => '<',
6018
        '%3D' => '=',
6019
        '%3E' => '>',
6020
        '%3F' => '?',
6021
        '%40' => '@',
6022
        '%41' => 'A',
6023
        '%42' => 'B',
6024
        '%43' => 'C',
6025
        '%44' => 'D',
6026
        '%45' => 'E',
6027
        '%46' => 'F',
6028
        '%47' => 'G',
6029
        '%48' => 'H',
6030
        '%49' => 'I',
6031
        '%4A' => 'J',
6032
        '%4B' => 'K',
6033
        '%4C' => 'L',
6034
        '%4D' => 'M',
6035
        '%4E' => 'N',
6036
        '%4F' => 'O',
6037
        '%50' => 'P',
6038
        '%51' => 'Q',
6039
        '%52' => 'R',
6040
        '%53' => 'S',
6041
        '%54' => 'T',
6042
        '%55' => 'U',
6043
        '%56' => 'V',
6044
        '%57' => 'W',
6045
        '%58' => 'X',
6046
        '%59' => 'Y',
6047
        '%5A' => 'Z',
6048
        '%5B' => '[',
6049
        '%5C' => '\\',
6050
        '%5D' => ']',
6051
        '%5E' => '^',
6052
        '%5F' => '_',
6053
        '%60' => '`',
6054
        '%61' => 'a',
6055
        '%62' => 'b',
6056
        '%63' => 'c',
6057 1
        '%64' => 'd',
6058
        '%65' => 'e',
6059 1
        '%66' => 'f',
6060
        '%67' => 'g',
6061
        '%68' => 'h',
6062
        '%69' => 'i',
6063
        '%6A' => 'j',
6064
        '%6B' => 'k',
6065
        '%6C' => 'l',
6066
        '%6D' => 'm',
6067
        '%6E' => 'n',
6068
        '%6F' => 'o',
6069 6
        '%70' => 'p',
6070
        '%71' => 'q',
6071 6
        '%72' => 'r',
6072 6
        '%73' => 's',
6073
        '%74' => 't',
6074 6
        '%75' => 'u',
6075
        '%76' => 'v',
6076 6
        '%77' => 'w',
6077 3
        '%78' => 'x',
6078
        '%79' => 'y',
6079
        '%7A' => 'z',
6080
        '%7B' => '{',
6081 6
        '%7C' => '|',
6082
        '%7D' => '}',
6083 6
        '%7E' => '~',
6084 1
        '%7F' => '',
6085 1
        '%80' => '`',
6086 1
        '%81' => '',
6087
        '%82' => '‚',
6088 6
        '%83' => 'ƒ',
6089
        '%84' => '„',
6090
        '%85' => '…',
6091
        '%86' => '†',
6092
        '%87' => '‡',
6093
        '%88' => 'ˆ',
6094
        '%89' => '‰',
6095
        '%8A' => 'Š',
6096
        '%8B' => '‹',
6097
        '%8C' => 'Œ',
6098 6
        '%8D' => '',
6099
        '%8E' => 'Ž',
6100 6
        '%8F' => '',
6101
        '%90' => '',
6102 6
        '%91' => '‘',
6103 6
        '%92' => '’',
6104
        '%93' => '“',
6105
        '%94' => '”',
6106 5
        '%95' => '•',
6107 5
        '%96' => '–',
6108
        '%97' => '—',
6109 5
        '%98' => '˜',
6110 1
        '%99' => '™',
6111 1
        '%9A' => 'š',
6112 1
        '%9B' => '›',
6113
        '%9C' => 'œ',
6114 5
        '%9D' => '',
6115
        '%9E' => 'ž',
6116
        '%9F' => 'Ÿ',
6117
        '%A0' => '',
6118
        '%A1' => '¡',
6119
        '%A2' => '¢',
6120
        '%A3' => '£',
6121
        '%A4' => '¤',
6122
        '%A5' => '¥',
6123
        '%A6' => '¦',
6124
        '%A7' => '§',
6125
        '%A8' => '¨',
6126
        '%A9' => '©',
6127
        '%AA' => 'ª',
6128
        '%AB' => '«',
6129
        '%AC' => '¬',
6130
        '%AD' => '',
6131
        '%AE' => '®',
6132
        '%AF' => '¯',
6133
        '%B0' => '°',
6134
        '%B1' => '±',
6135
        '%B2' => '²',
6136
        '%B3' => '³',
6137
        '%B4' => '´',
6138
        '%B5' => 'µ',
6139
        '%B6' => '¶',
6140
        '%B7' => '·',
6141
        '%B8' => '¸',
6142
        '%B9' => '¹',
6143
        '%BA' => 'º',
6144 1
        '%BB' => '»',
6145
        '%BC' => '¼',
6146 1
        '%BD' => '½',
6147
        '%BE' => '¾',
6148
        '%BF' => '¿',
6149
        '%C0' => 'À',
6150
        '%C1' => 'Á',
6151
        '%C2' => 'Â',
6152
        '%C3' => 'Ã',
6153
        '%C4' => 'Ä',
6154
        '%C5' => 'Å',
6155
        '%C6' => 'Æ',
6156
        '%C7' => 'Ç',
6157
        '%C8' => 'È',
6158 1
        '%C9' => 'É',
6159
        '%CA' => 'Ê',
6160 1
        '%CB' => 'Ë',
6161
        '%CC' => 'Ì',
6162 1
        '%CD' => 'Í',
6163 1
        '%CE' => 'Î',
6164
        '%CF' => 'Ï',
6165
        '%D0' => 'Ð',
6166 1
        '%D1' => 'Ñ',
6167
        '%D2' => 'Ò',
6168 1
        '%D3' => 'Ó',
6169 1
        '%D4' => 'Ô',
6170
        '%D5' => 'Õ',
6171
        '%D6' => 'Ö',
6172 1
        '%D7' => '×',
6173
        '%D8' => 'Ø',
6174
        '%D9' => 'Ù',
6175 1
        '%DA' => 'Ú',
6176 1
        '%DB' => 'Û',
6177 1
        '%DC' => 'Ü',
6178 1
        '%DD' => 'Ý',
6179 1
        '%DE' => 'Þ',
6180
        '%DF' => 'ß',
6181
        '%E0' => 'à',
6182 1
        '%E1' => 'á',
6183
        '%E2' => 'â',
6184
        '%E3' => 'ã',
6185
        '%E4' => 'ä',
6186
        '%E5' => 'å',
6187
        '%E6' => 'æ',
6188
        '%E7' => 'ç',
6189
        '%E8' => 'è',
6190
        '%E9' => 'é',
6191
        '%EA' => 'ê',
6192
        '%EB' => 'ë',
6193
        '%EC' => 'ì',
6194
        '%ED' => 'í',
6195
        '%EE' => 'î',
6196
        '%EF' => 'ï',
6197
        '%F0' => 'ð',
6198
        '%F1' => 'ñ',
6199
        '%F2' => 'ò',
6200
        '%F3' => 'ó',
6201 10
        '%F4' => 'ô',
6202
        '%F5' => 'õ',
6203 10
        '%F6' => 'ö',
6204 10
        '%F7' => '÷',
6205
        '%F8' => 'ø',
6206 10
        '%F9' => 'ù',
6207 3
        '%FA' => 'ú',
6208
        '%FB' => 'û',
6209
        '%FC' => 'ü',
6210 8
        '%FD' => 'ý',
6211 8
        '%FE' => 'þ',
6212 8
        '%FF' => 'ÿ',
6213
    );
6214 8
6215
    return $array;
6216 8
  }
6217
6218 8
  /**
6219 1
   * Decodes an UTF-8 string to ISO-8859-1.
6220 1
   *
6221 1
   * @param string $str <p>The input string.</p>
6222
   *
6223 8
   * @return string
6224 8
   */
6225
  public static function utf8_decode($str)
6226 8
  {
6227 8
    // init
6228 8
    $str = (string)$str;
6229 8
6230 8
    if (!isset($str[0])) {
6231
      return '';
6232 8
    }
6233 8
6234 8
    $str = (string)self::to_utf8($str);
6235 8
6236
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
6237 8
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
6238 6
6239 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
6240 6
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$utf8ToWin1252);
6241 6
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$utf8ToWin1252);
6242
    }
6243 6
6244 3
    /** @noinspection PhpInternalEntityUsedInspection */
6245 3
    return Xml::utf8_decode(str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str));
6246
  }
6247 6
6248 6
  /**
6249
   * Encodes an ISO-8859-1 string to UTF-8.
6250 8
   *
6251
   * @param string $str <p>The input string.</p>
6252
   *
6253
   * @return string
6254
   */
6255
  public static function utf8_encode($str)
6256
  {
6257
    // init
6258 1
    $str = (string)$str;
6259
6260 1
    if (!isset($str[0])) {
6261
      return '';
6262
    }
6263
6264
    $str = \utf8_encode($str);
6265
6266
    if (false === strpos($str, "\xC2")) {
6267
      return $str;
6268
    } else {
6269
6270
      static $CP1252_TO_UTF8_KEYS_CACHE = null;
6271
      static $CP1252_TO_UTF8_VALUES_CACHE = null;
6272
6273
      if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
6274
        $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$cp1252ToUtf8);
6275
        $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$cp1252ToUtf8);
6276
      }
6277
6278
      return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
6279
    }
6280
  }
6281
6282
  /**
6283
   * fix -> utf8-win1252 chars
6284
   *
6285
   * @param string $str <p>The input string.</p>
6286
   *
6287
   * @return string
6288
   *
6289
   * @deprecated use "UTF8::fix_simple_utf8()"
6290
   */
6291
  public static function utf8_fix_win1252_chars($str)
6292
  {
6293
    return self::fix_simple_utf8($str);
6294
  }
6295
6296
  /**
6297
   * Returns an array with all utf8 whitespace characters.
6298
   *
6299
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
6300
   *
6301
   * @author: Derek E. [email protected]
6302
   *
6303
   * @return array <p>
6304
   *               An array with all known whitespace characters as values and the type of whitespace as keys
6305
   *               as defined in above URL.
6306
   *               </p>
6307
   */
6308
  public static function whitespace_table()
6309
  {
6310
    return self::$whitespaceTable;
6311
  }
6312
6313
  /**
6314
   * Limit the number of words in a string.
6315
   *
6316
   * @param string $str      <p>The input string.</p>
6317
   * @param int    $words    <p>The limit of words as integer.</p>
6318
   * @param string $strAddOn <p>Replacement for the striped string.</p>
6319
   *
6320
   * @return string
6321
   */
6322
  public static function words_limit($str, $words = 100, $strAddOn = '...')
6323
  {
6324
    $str = (string)$str;
6325
6326
    if (!isset($str[0])) {
6327
      return '';
6328
    }
6329
6330
    $words = (int)$words;
6331
6332
    if ($words < 1) {
6333
      return '';
6334
    }
6335
6336
    preg_match('/^\s*+(?:\S++\s*+){1,' . $words . '}/u', $str, $matches);
6337
6338
    if (
6339
        !isset($matches[0])
6340
        ||
6341
        self::strlen($str) === self::strlen($matches[0])
6342
    ) {
6343
      return $str;
6344
    }
6345
6346
    return self::rtrim($matches[0]) . $strAddOn;
6347
  }
6348
6349
  /**
6350
   * Wraps a string to a given number of characters
6351
   *
6352
   * @link  http://php.net/manual/en/function.wordwrap.php
6353
   *
6354
   * @param string $str   <p>The input string.</p>
6355
   * @param int    $width [optional] <p>The column width.</p>
6356
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
6357
   * @param bool   $cut   [optional] <p>
6358
   *                      If the cut is set to true, the string is
6359
   *                      always wrapped at or before the specified width. So if you have
6360
   *                      a word that is larger than the given width, it is broken apart.
6361
   *                      </p>
6362
   *
6363
   * @return string <p>The given string wrapped at the specified column.</p>
6364
   */
6365
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
6366
  {
6367
    $str = (string)$str;
6368
    $break = (string)$break;
6369
6370
    if (!isset($str[0], $break[0])) {
6371
      return '';
6372
    }
6373
6374
    $w = '';
6375
    $strSplit = explode($break, $str);
6376
    $count = count($strSplit);
6377
6378
    $chars = array();
6379
    /** @noinspection ForeachInvariantsInspection */
6380
    for ($i = 0; $i < $count; ++$i) {
6381
6382
      if ($i) {
6383
        $chars[] = $break;
6384
        $w .= '#';
6385
      }
6386
6387
      $c = $strSplit[$i];
6388
      unset($strSplit[$i]);
6389
6390
      foreach (self::split($c) as $c) {
6391
        $chars[] = $c;
6392
        $w .= ' ' === $c ? ' ' : '?';
6393
      }
6394
    }
6395
6396
    $strReturn = '';
6397
    $j = 0;
6398
    $b = $i = -1;
6399
    $w = wordwrap($w, $width, '#', $cut);
6400
6401
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
6402
      for (++$i; $i < $b; ++$i) {
6403
        $strReturn .= $chars[$j];
6404
        unset($chars[$j++]);
6405
      }
6406
6407
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
6408
        unset($chars[$j++]);
6409
      }
6410
6411
      $strReturn .= $break;
6412
    }
6413
6414
    return $strReturn . implode('', $chars);
6415
  }
6416
6417
  /**
6418
   * Returns an array of Unicode White Space characters.
6419
   *
6420
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
6421
   */
6422
  public static function ws()
6423
  {
6424
    return self::$whitespace;
6425
  }
6426
6427
}
6428