Completed
Push — master ( 5343dd...690c42 )
by Lars
12:33 queued 02:01
created

UTF8::normalizeEncoding()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 2
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965 10
    // init
966
    static $CHAR_CACHE = array();
967
968
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
969 10
      self::checkForSupport();
970 2
    }
971 10
972
    if ($encoding !== 'UTF-8') {
973
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
974
    }
975
976 10 View Code Duplication
    if (
977 10
        $encoding !== 'UTF-8'
978 1
        &&
979
        $encoding !== 'WINDOWS-1252'
980
        &&
981
        self::$SUPPORT['mbstring'] === false
982 10
    ) {
983 10
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
984 10
    }
985 8
986
    $cacheKey = $code_point . $encoding;
987
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
988 9
      return $CHAR_CACHE[$cacheKey];
989 7
    }
990 9
991 6
    if (self::$SUPPORT['intlChar'] === true) {
992 6
      $str = \IntlChar::chr($code_point);
993 7
994 7
      if ($encoding !== 'UTF-8') {
995 7
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
996 7
      }
997 7
998 1
      $CHAR_CACHE[$cacheKey] = $str;
999 1
      return $str;
1000 1
    }
1001 1
1002
    // check type of code_point, only if there is no support for "\IntlChar"
1003
    if ((int)$code_point !== $code_point) {
1004 9
      $CHAR_CACHE[$cacheKey] = null;
1005 1
      return null;
1006 1
    }
1007
1008
    if ($code_point <= 0x7F) {
1009 9
      $str = self::chr_and_parse_int($code_point);
1010
    } elseif ($code_point <= 0x7FF) {
1011 9
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
1012
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1013
    } elseif ($code_point <= 0xFFFF) {
1014
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
1015
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1016
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1017
    } else {
1018
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
1019 26
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1020
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1021 26
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1022
    }
1023
1024
    if ($encoding !== 'UTF-8') {
1025
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1026
    }
1027
1028
    // add into static cache
1029
    $CHAR_CACHE[$cacheKey] = $str;
1030
1031
    return $str;
1032 1
  }
1033
1034 1
  /**
1035
   * @param int $int
1036 1
   *
1037
   * @return string
1038
   */
1039
  private static function chr_and_parse_int($int)
1040
  {
1041
    return chr((int)$int);
1042
  }
1043
1044
  /**
1045
   * Applies callback to all characters of a string.
1046
   *
1047
   * @param string|array $callback <p>The callback function.</p>
1048
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1049
   *
1050
   * @return array <p>The outcome of callback.</p>
1051 4
   */
1052
  public static function chr_map($callback, $str)
1053 4
  {
1054
    $chars = self::split($str);
1055 4
1056 3
    return array_map($callback, $chars);
1057
  }
1058
1059 4
  /**
1060
   * Generates an array of byte length of each character of a Unicode string.
1061 4
   *
1062 4
   * 1 byte => U+0000  - U+007F
1063 4
   * 2 byte => U+0080  - U+07FF
1064 4
   * 3 byte => U+0800  - U+FFFF
1065
   * 4 byte => U+10000 - U+10FFFF
1066
   *
1067
   * @param string $str <p>The original Unicode string.</p>
1068
   *
1069
   * @return array <p>An array of byte lengths of each character.</p>
1070
   */
1071
  public static function chr_size_list($str)
1072
  {
1073
    $str = (string)$str;
1074 2
1075
    if (!isset($str[0])) {
1076 2
      return array();
1077 2
    }
1078 2
1079
    return array_map(
1080 2
        function ($data) {
1081
          return self::strlen($data, '8BIT');
1082 2
        },
1083
        self::split($str)
1084
    );
1085 2
  }
1086
1087 2
  /**
1088 2
   * Get a decimal code representation of a specific character.
1089 2
   *
1090
   * @param string $char <p>The input character.</p>
1091 2
   *
1092 2
   * @return int
1093 2
   */
1094
  public static function chr_to_decimal($char)
1095 1
  {
1096 1
    $char = (string)$char;
1097 1
    $code = self::ord($char[0]);
1098
    $bytes = 1;
1099 2
1100
    if (!($code & 0x80)) {
1101 2
      // 0xxxxxxx
1102 2
      return $code;
1103
    }
1104 2
1105
    if (($code & 0xe0) === 0xc0) {
1106
      // 110xxxxx
1107
      $bytes = 2;
1108
      $code &= ~0xc0;
1109
    } elseif (($code & 0xf0) === 0xe0) {
1110
      // 1110xxxx
1111
      $bytes = 3;
1112
      $code &= ~0xe0;
1113
    } elseif (($code & 0xf8) === 0xf0) {
1114
      // 11110xxx
1115 1
      $bytes = 4;
1116
      $code &= ~0xf0;
1117 1
    }
1118
1119 1
    for ($i = 2; $i <= $bytes; $i++) {
1120 1
      // 10xxxxxx
1121
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1122
    }
1123 1
1124
    return $code;
1125
  }
1126
1127 1
  /**
1128
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1129
   *
1130
   * @param string $char <p>The input character</p>
1131
   * @param string $pfix [optional]
1132
   *
1133
   * @return string <p>The code point encoded as U+xxxx<p>
1134
   */
1135
  public static function chr_to_hex($char, $pfix = 'U+')
1136
  {
1137
    $char = (string)$char;
1138
1139 1
    if (!isset($char[0])) {
1140
      return '';
1141 1
    }
1142
1143
    if ($char === '&#0;') {
1144
      $char = '';
1145
    }
1146
1147
    return self::int_to_hex(self::ord($char), $pfix);
1148
  }
1149
1150
  /**
1151
   * alias for "UTF8::chr_to_decimal()"
1152
   *
1153 1
   * @see UTF8::chr_to_decimal()
1154
   *
1155 1
   * @param string $chr
1156
   *
1157
   * @return int
1158
   */
1159
  public static function chr_to_int($chr)
1160
  {
1161
    return self::chr_to_decimal($chr);
1162
  }
1163
1164
  /**
1165
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1166
   *
1167
   * @param string $body     <p>The original string to be split.</p>
1168
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1169
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1170
   *
1171 56
   * @return string <p>The chunked string</p>
1172
   */
1173
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1174
  {
1175
    return implode($end, self::split($body, $chunklen));
1176
  }
1177
1178
  /**
1179
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1180
   *
1181
   * @param string $str                     <p>The string to be sanitized.</p>
1182
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1183
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1184
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1185
   *                                        => "..."</p>
1186 56
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1187 56
   *                                        $normalize_whitespace</p>
1188
   *
1189 56
   * @return string <p>Clean UTF-8 encoded string.</p>
1190 56
   */
1191
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1192 56
  {
1193 36
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1194 36
    // caused connection reset problem on larger strings
1195
1196 56
    $regx = '/
1197 15
      (
1198 15
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1199
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1200 56
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1201 35
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1202 35
        ){1,100}                      # ...one or more times
1203
      )
1204 56
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1205
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1206
    /x';
1207
    $str = preg_replace($regx, '$1', $str);
1208
1209
    $str = self::replace_diamond_question_mark($str, '');
1210
    $str = self::remove_invisible_characters($str);
1211
1212
    if ($normalize_whitespace === true) {
1213
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1214 21
    }
1215
1216 21
    if ($normalize_msword === true) {
1217
      $str = self::normalize_msword($str);
1218 21
    }
1219 2
1220
    if ($remove_bom === true) {
1221
      $str = self::remove_bom($str);
1222
    }
1223 21
1224
    return $str;
1225
  }
1226
1227
  /**
1228
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1229
   *
1230 21
   * @param string $str <p>The input string.</p>
1231
   *
1232 21
   * @return string
1233
   */
1234 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236
    $str = (string)$str;
1237
1238
    if (!isset($str[0])) {
1239
      return '';
1240
    }
1241
1242
    // fixed ISO <-> UTF-8 Errors
1243
    $str = self::fix_simple_utf8($str);
1244
1245
    // remove all none UTF-8 symbols
1246 7
    // && remove diamond question mark (�)
1247
    // && remove remove invisible characters (e.g. "\0")
1248 7
    // && remove BOM
1249 7
    // && normalize whitespace chars (but keep non-breaking-spaces)
1250 7
    $str = self::clean($str, true, true, false, true);
1251
1252 7
    return (string)$str;
1253
  }
1254 7
1255 7
  /**
1256 7
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1257
   *
1258 7
   * INFO: opposite to UTF8::string()
1259
   *
1260 7
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1261 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1262
   *                                    default, code points will be returned as integers.</p>
1263 1
   *
1264 1
   * @return array <p>The array of code points.</p>
1265 1
   */
1266
  public static function codepoints($arg, $u_style = false)
1267 1
  {
1268 1
    if (is_string($arg) === true) {
1269
      $arg = self::split($arg);
1270 7
    }
1271
1272
    $arg = array_map(
1273
        array(
1274
            '\\voku\\helper\\UTF8',
1275
            'ord',
1276
        ),
1277
        $arg
1278
    );
1279
1280
    if ($u_style) {
1281
      $arg = array_map(
1282 7
          array(
1283
              '\\voku\\helper\\UTF8',
1284 7
              'int_to_hex',
1285
          ),
1286
          $arg
1287
      );
1288
    }
1289
1290
    return $arg;
1291
  }
1292
1293
  /**
1294 5
   * Returns count of characters used in a string.
1295
   *
1296 5
   * @param string $str       <p>The input string.</p>
1297
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1298
   *
1299 5
   * @return array <p>An associative array of Character as keys and
1300
   *               their count as values.</p>
1301
   */
1302 5
  public static function count_chars($str, $cleanUtf8 = false)
1303
  {
1304
    return array_count_values(self::split($str, 1, $cleanUtf8));
1305
  }
1306
1307
  /**
1308
   * Converts a int-value into an UTF-8 character.
1309
   *
1310
   * @param mixed $int
1311
   *
1312
   * @return string
1313
   */
1314
  public static function decimal_to_chr($int)
1315
  {
1316
    if (Bootup::is_php('5.4') === true) {
1317
      $flags = ENT_QUOTES | ENT_HTML5;
1318 11
    } else {
1319
      $flags = ENT_QUOTES;
1320 11
    }
1321 11
1322
    return self::html_entity_decode('&#' . $int . ';', $flags);
1323 11
  }
1324 5
1325
  /**
1326
   * Encode a string with a new charset-encoding.
1327 11
   *
1328 2
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1329 2
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1330
   *
1331 11
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1332
   * @param string $str      <p>The input string</p>
1333
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1334
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1335 11
   *
1336
   * @return string
1337
   */
1338
  public static function encode($encoding, $str, $force = true)
1339 11
  {
1340
    $str = (string)$str;
1341
    $encoding = (string)$encoding;
1342 11
1343
    if (!isset($str[0], $encoding[0])) {
1344 3
      return $str;
1345 11
    }
1346
1347
    if ($encoding !== 'UTF-8') {
1348
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1349 11
    }
1350
1351
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1352 11
      self::checkForSupport();
1353 2
    }
1354 2
1355 2
    $encodingDetected = self::str_detect_encoding($str);
1356 11
1357 11
    if (
1358
        $encodingDetected !== false
1359
        &&
1360
        (
1361
            $force === true
1362 3
            ||
1363
            $encodingDetected !== $encoding
1364
        )
1365 2
    ) {
1366 1
1367 1 View Code Duplication
      if (
1368 3
          $encoding === 'UTF-8'
1369 2
          &&
1370
          (
1371
              $force === true
1372
              || $encodingDetected === 'UTF-8'
1373
              || $encodingDetected === 'WINDOWS-1252'
1374 2
              || $encodingDetected === 'ISO-8859-1'
1375
          )
1376 2
      ) {
1377 1
        return self::to_utf8($str);
1378 2
      }
1379
1380 View Code Duplication
      if (
1381
          $encoding === 'ISO-8859-1'
1382 2
          &&
1383 2
          (
1384 2
              $force === true
1385
              || $encodingDetected === 'ISO-8859-1'
1386 2
              || $encodingDetected === 'UTF-8'
1387
          )
1388 2
      ) {
1389 2
        return self::to_iso8859($str);
1390
      }
1391
1392 View Code Duplication
      if (
1393 1
          $encoding !== 'UTF-8'
1394
          &&
1395
          $encoding !== 'WINDOWS-1252'
1396
          &&
1397
          self::$SUPPORT['mbstring'] === false
1398
      ) {
1399
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1400
      }
1401
1402
      $strEncoded = \mb_convert_encoding(
1403
          $str,
1404
          $encoding,
1405
          $encodingDetected
1406
      );
1407
1408
      if ($strEncoded) {
1409
        return $strEncoded;
1410
      }
1411
    }
1412
1413
    return $str;
1414
  }
1415
1416
  /**
1417
   * Reads entire file into a string.
1418
   *
1419
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1420
   *
1421
   * @link http://php.net/manual/en/function.file-get-contents.php
1422
   *
1423
   * @param string        $filename      <p>
1424
   *                                     Name of the file to read.
1425
   *                                     </p>
1426
   * @param int|false     $flags         [optional] <p>
1427
   *                                     Prior to PHP 6, this parameter is called
1428
   *                                     use_include_path and is a bool.
1429
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1430
   *                                     to trigger include path
1431
   *                                     search.
1432
   *                                     </p>
1433
   *                                     <p>
1434
   *                                     The value of flags can be any combination of
1435
   *                                     the following flags (with some restrictions), joined with the
1436
   *                                     binary OR (|)
1437
   *                                     operator.
1438
   *                                     </p>
1439
   *                                     <p>
1440
   *                                     <table>
1441
   *                                     Available flags
1442
   *                                     <tr valign="top">
1443
   *                                     <td>Flag</td>
1444
   *                                     <td>Description</td>
1445
   *                                     </tr>
1446
   *                                     <tr valign="top">
1447
   *                                     <td>
1448
   *                                     FILE_USE_INCLUDE_PATH
1449
   *                                     </td>
1450
   *                                     <td>
1451
   *                                     Search for filename in the include directory.
1452
   *                                     See include_path for more
1453
   *                                     information.
1454
   *                                     </td>
1455
   *                                     </tr>
1456
   *                                     <tr valign="top">
1457
   *                                     <td>
1458
   *                                     FILE_TEXT
1459
   *                                     </td>
1460
   *                                     <td>
1461
   *                                     As of PHP 6, the default encoding of the read
1462
   *                                     data is UTF-8. You can specify a different encoding by creating a
1463
   *                                     custom context or by changing the default using
1464
   *                                     stream_default_encoding. This flag cannot be
1465
   *                                     used with FILE_BINARY.
1466
   *                                     </td>
1467
   *                                     </tr>
1468
   *                                     <tr valign="top">
1469
   *                                     <td>
1470
   *                                     FILE_BINARY
1471
   *                                     </td>
1472
   *                                     <td>
1473
   *                                     With this flag, the file is read in binary mode. This is the default
1474
   *                                     setting and cannot be used with FILE_TEXT.
1475
   *                                     </td>
1476
   *                                     </tr>
1477
   *                                     </table>
1478 3
   *                                     </p>
1479
   * @param resource|null $context       [optional] <p>
1480
   *                                     A valid context resource created with
1481 3
   *                                     stream_context_create. If you don't need to use a
1482 3
   *                                     custom context, you can skip this parameter by &null;.
1483
   *                                     </p>
1484 3
   * @param int|null $offset             [optional] <p>
1485 2
   *                                     The offset where the reading starts.
1486
   *                                     </p>
1487
   * @param int|null $maxLength          [optional] <p>
1488
   *                                     Maximum length of data read. The default is to read until end
1489 2
   *                                     of file is reached.
1490 2
   *                                     </p>
1491
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1492 2
   *
1493 2
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1494
   *                                     or pdf, because they used non default utf-8 chars</p>
1495 3
   *
1496 3
   * @return string <p>The function returns the read data or false on failure.</p>
1497 3
   */
1498
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1499 3
  {
1500 3
    // init
1501 3
    $timeout = (int)$timeout;
1502
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1503 3
1504 1
    if ($timeout && $context === null) {
1505 1
      $context = stream_context_create(
1506 3
          array(
1507
              'http' =>
1508
                  array(
1509
                      'timeout' => $timeout,
1510 3
                  ),
1511 1
          )
1512
      );
1513
    }
1514 2
1515 2
    if (!$flags) {
1516 2
      $flags = false;
1517 2
    }
1518
1519 2
    if ($offset === null) {
1520
      $offset = 0;
1521
    }
1522
1523
    if (is_int($maxLength) === true) {
1524
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1525
    } else {
1526
      $data = file_get_contents($filename, $flags, $context, $offset);
1527
    }
1528
1529 1
    // return false on error
1530
    if ($data === false) {
1531 1
      return false;
1532
    }
1533
1534
    if ($convertToUtf8 === true) {
1535
      $data = self::encode('UTF-8', $data, false);
1536
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1537
    }
1538
1539
    return $data;
1540
  }
1541
1542
  /**
1543 9
   * Checks if a file starts with BOM (Byte Order Mark) character.
1544
   *
1545 9
   * @param string $file_path <p>Path to a valid file.</p>
1546 9
   *
1547 3
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1548
   */
1549 3
  public static function file_has_bom($file_path)
1550 3
  {
1551 3
    return self::string_has_bom(file_get_contents($file_path));
1552 9
  }
1553 2
1554 2
  /**
1555 2
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1556 2
   *
1557 9
   * @param mixed  $var
1558
   * @param int    $normalization_form
1559 8
   * @param string $leading_combining
1560
   *
1561 2
   * @return mixed
1562 2
   */
1563
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1564 8
  {
1565
    switch (gettype($var)) {
1566 8 View Code Duplication
      case 'array':
1567 6
        foreach ($var as $k => $v) {
1568 6
          /** @noinspection AlterInForeachInspection */
1569
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1570 6
        }
1571
        break;
1572 6 View Code Duplication
      case 'object':
1573 3
        foreach ($var as $k => $v) {
1574 3
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1575 5
        }
1576
        break;
1577
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1578
1579
        if (false !== strpos($var, "\r")) {
1580 8
          // Workaround https://bugs.php.net/65732
1581 8
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1582 6
        }
1583 8
1584 5
        if (self::is_ascii($var) === false) {
1585 8
          /** @noinspection PhpUndefinedClassInspection */
1586
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1587
            $n = '-';
1588 2
          } else {
1589 2
            /** @noinspection PhpUndefinedClassInspection */
1590 8
            $n = \Normalizer::normalize($var, $normalization_form);
1591
1592 8
            if (isset($n[0])) {
1593 9
              $var = $n;
1594
            } else {
1595 9
              $var = self::encode('UTF-8', $var, true);
1596
            }
1597
          }
1598
1599
          if (
1600
              $var[0] >= "\x80"
1601
              &&
1602
              isset($n[0], $leading_combining[0])
1603
              &&
1604
              preg_match('/^\p{Mn}/u', $var)
1605
          ) {
1606
            // Prevent leading combining chars
1607
            // for NFC-safe concatenations.
1608
            $var = $leading_combining . $var;
1609
          }
1610
        }
1611
1612
        break;
1613
    }
1614
1615
    return $var;
1616
  }
1617
1618
  /**
1619
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1620
   *
1621
   * Gets a specific external variable by name and optionally filters it
1622
   *
1623
   * @link  http://php.net/manual/en/function.filter-input.php
1624
   *
1625
   * @param int    $type          <p>
1626
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1627
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1628
   *                              <b>INPUT_ENV</b>.
1629
   *                              </p>
1630
   * @param string $variable_name <p>
1631
   *                              Name of a variable to get.
1632
   *                              </p>
1633
   * @param int    $filter        [optional] <p>
1634
   *                              The ID of the filter to apply. The
1635
   *                              manual page lists the available filters.
1636
   *                              </p>
1637
   * @param mixed  $options       [optional] <p>
1638
   *                              Associative array of options or bitwise disjunction of flags. If filter
1639
   *                              accepts options, flags can be provided in "flags" field of array.
1640
   *                              </p>
1641
   *
1642
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1643
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1644
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1645
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1646
   * @since 5.2.0
1647
   */
1648 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1649
  {
1650
    if (4 > func_num_args()) {
1651
      $var = filter_input($type, $variable_name, $filter);
1652
    } else {
1653
      $var = filter_input($type, $variable_name, $filter, $options);
1654
    }
1655
1656
    return self::filter($var);
1657
  }
1658
1659
  /**
1660
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1661
   *
1662
   * Gets external variables and optionally filters them
1663
   *
1664
   * @link  http://php.net/manual/en/function.filter-input-array.php
1665
   *
1666
   * @param int   $type       <p>
1667
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1668
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1669
   *                          <b>INPUT_ENV</b>.
1670
   *                          </p>
1671
   * @param mixed $definition [optional] <p>
1672
   *                          An array defining the arguments. A valid key is a string
1673
   *                          containing a variable name and a valid value is either a filter type, or an array
1674
   *                          optionally specifying the filter, flags and options. If the value is an
1675
   *                          array, valid keys are filter which specifies the
1676
   *                          filter type,
1677
   *                          flags which specifies any flags that apply to the
1678
   *                          filter, and options which specifies any options that
1679
   *                          apply to the filter. See the example below for a better understanding.
1680
   *                          </p>
1681
   *                          <p>
1682
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1683
   *                          input array are filtered by this filter.
1684
   *                          </p>
1685
   * @param bool  $add_empty  [optional] <p>
1686
   *                          Add missing keys as <b>NULL</b> to the return value.
1687
   *                          </p>
1688
   *
1689
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1690
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1691
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1692
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1693
   * fails.
1694
   * @since 5.2.0
1695
   */
1696 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1697
  {
1698
    if (2 > func_num_args()) {
1699
      $a = filter_input_array($type);
1700
    } else {
1701
      $a = filter_input_array($type, $definition, $add_empty);
1702
    }
1703
1704
    return self::filter($a);
1705
  }
1706
1707
  /**
1708
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1709
   *
1710
   * Filters a variable with a specified filter
1711
   *
1712
   * @link  http://php.net/manual/en/function.filter-var.php
1713
   *
1714
   * @param mixed $variable <p>
1715
   *                        Value to filter.
1716
   *                        </p>
1717
   * @param int   $filter   [optional] <p>
1718
   *                        The ID of the filter to apply. The
1719
   *                        manual page lists the available filters.
1720
   *                        </p>
1721
   * @param mixed $options  [optional] <p>
1722
   *                        Associative array of options or bitwise disjunction of flags. If filter
1723
   *                        accepts options, flags can be provided in "flags" field of array. For
1724
   *                        the "callback" filter, callable type should be passed. The
1725
   *                        callback must accept one argument, the value to be filtered, and return
1726
   *                        the value after filtering/sanitizing it.
1727
   *                        </p>
1728
   *                        <p>
1729
   *                        <code>
1730
   *                        // for filters that accept options, use this format
1731
   *                        $options = array(
1732
   *                        'options' => array(
1733
   *                        'default' => 3, // value to return if the filter fails
1734
   *                        // other options here
1735
   *                        'min_range' => 0
1736
   *                        ),
1737
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1738
   *                        );
1739
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1740
   *                        // for filter that only accept flags, you can pass them directly
1741
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1742
   *                        // for filter that only accept flags, you can also pass as an array
1743
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1744
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1745
   *                        // callback validate filter
1746 1
   *                        function foo($value)
1747
   *                        {
1748 1
   *                        // Expected format: Surname, GivenNames
1749 1
   *                        if (strpos($value, ", ") === false) return false;
1750 1
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1751 1
   *                        $empty = (empty($surname) || empty($givennames));
1752
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1753
   *                        if ($empty || $notstrings) {
1754 1
   *                        return false;
1755
   *                        } else {
1756
   *                        return $value;
1757
   *                        }
1758
   *                        }
1759
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1760
   *                        </code>
1761
   *                        </p>
1762
   *
1763
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1764
   * @since 5.2.0
1765
   */
1766 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1767
  {
1768
    if (3 > func_num_args()) {
1769
      $variable = filter_var($variable, $filter);
1770
    } else {
1771
      $variable = filter_var($variable, $filter, $options);
1772
    }
1773
1774
    return self::filter($variable);
1775
  }
1776
1777
  /**
1778
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1779
   *
1780
   * Gets multiple variables and optionally filters them
1781
   *
1782
   * @link  http://php.net/manual/en/function.filter-var-array.php
1783
   *
1784
   * @param array $data       <p>
1785
   *                          An array with string keys containing the data to filter.
1786
   *                          </p>
1787
   * @param mixed $definition [optional] <p>
1788
   *                          An array defining the arguments. A valid key is a string
1789
   *                          containing a variable name and a valid value is either a
1790
   *                          filter type, or an
1791 1
   *                          array optionally specifying the filter, flags and options.
1792
   *                          If the value is an array, valid keys are filter
1793 1
   *                          which specifies the filter type,
1794 1
   *                          flags which specifies any flags that apply to the
1795 1
   *                          filter, and options which specifies any options that
1796 1
   *                          apply to the filter. See the example below for a better understanding.
1797
   *                          </p>
1798
   *                          <p>
1799 1
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1800
   *                          input array are filtered by this filter.
1801
   *                          </p>
1802
   * @param bool  $add_empty  [optional] <p>
1803
   *                          Add missing keys as <b>NULL</b> to the return value.
1804
   *                          </p>
1805
   *
1806
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1807
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1808
   * the variable is not set.
1809
   * @since 5.2.0
1810 1
   */
1811 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1812 1
  {
1813
    if (2 > func_num_args()) {
1814
      $a = filter_var_array($data);
1815
    } else {
1816
      $a = filter_var_array($data, $definition, $add_empty);
1817
    }
1818
1819
    return self::filter($a);
1820
  }
1821
1822
  /**
1823
   * Check if the number of unicode characters are not more than the specified integer.
1824
   *
1825
   * @param string $str      The original string to be checked.
1826
   * @param int    $box_size The size in number of chars to be checked against string.
1827
   *
1828 26
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1829
   */
1830
  public static function fits_inside($str, $box_size)
1831 26
  {
1832
    return (self::strlen($str) <= $box_size);
1833 26
  }
1834 2
1835
  /**
1836
   * Try to fix simple broken UTF-8 strings.
1837 26
   *
1838 26
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1839
   *
1840 26
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1841 1
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1842 1
   * See: http://en.wikipedia.org/wiki/Windows-1252
1843 1
   *
1844
   * @param string $str <p>The input string</p>
1845 26
   *
1846
   * @return string
1847
   */
1848 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1849
  {
1850
    // init
1851
    $str = (string)$str;
1852
1853
    if (!isset($str[0])) {
1854
      return '';
1855
    }
1856 1
1857
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1858 1
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1859
1860
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1861 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1862
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1863
    }
1864 1
1865 1
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1866
  }
1867 1
1868
  /**
1869
   * Fix a double (or multiple) encoded UTF8 string.
1870 1
   *
1871 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1872 1
   *
1873 1
   * @return string|string[] <p>Will return the fixed input-"array" or
1874 1
   *                         the fixed input-"string".</p>
1875 1
   */
1876 1
  public static function fix_utf8($str)
1877
  {
1878 1
    if (is_array($str) === true) {
1879
1880
      /** @noinspection ForeachSourceInspection */
1881
      foreach ($str as $k => $v) {
1882
        /** @noinspection AlterInForeachInspection */
1883
        /** @noinspection OffsetOperationsInspection */
1884
        $str[$k] = self::fix_utf8($v);
1885
      }
1886
1887
      return $str;
1888 1
    }
1889
1890 1
    $last = '';
1891
    while ($last !== $str) {
1892
      $last = $str;
1893
      $str = self::to_utf8(
1894 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1893 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1895
      );
1896
    }
1897
1898
    return $str;
1899
  }
1900
1901
  /**
1902
   * Get character of a specific character.
1903
   *
1904
   * @param string $char
1905
   *
1906
   * @return string <p>'RTL' or 'LTR'</p>
1907
   */
1908
  public static function getCharDirection($char)
1909
  {
1910
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1911
      self::checkForSupport();
1912 1
    }
1913
1914 1
    if (self::$SUPPORT['intlChar'] === true) {
1915 1
      $tmpReturn = \IntlChar::charDirection($char);
1916
1917
      // from "IntlChar"-Class
1918 1
      $charDirection = array(
1919
          'RTL' => array(1, 13, 14, 15, 21),
1920 1
          'LTR' => array(0, 11, 12, 20),
1921 1
      );
1922 1
1923 1
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1924 1
        return 'LTR';
1925 1
      }
1926 1
1927 1
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1928 1
        return 'RTL';
1929 1
      }
1930 1
    }
1931 1
1932 1
    $c = static::chr_to_decimal($char);
1933 1
1934 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1935 1
      return 'LTR';
1936 1
    }
1937 1
1938 1
    if (0x85e >= $c) {
1939 1
1940 1
      if (0x5be === $c ||
1941 1
          0x5c0 === $c ||
1942 1
          0x5c3 === $c ||
1943 1
          0x5c6 === $c ||
1944 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1945 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1946 1
          0x608 === $c ||
1947 1
          0x60b === $c ||
1948 1
          0x60d === $c ||
1949
          0x61b === $c ||
1950 1
          (0x61e <= $c && 0x64a >= $c) ||
1951 1
          (0x66d <= $c && 0x66f >= $c) ||
1952
          (0x671 <= $c && 0x6d5 >= $c) ||
1953
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1954 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1955
          (0x6fa <= $c && 0x70d >= $c) ||
1956
          0x710 === $c ||
1957
          (0x712 <= $c && 0x72f >= $c) ||
1958 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1959
          0x7b1 === $c ||
1960 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1961 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1962 1
          0x7fa === $c ||
1963 1
          (0x800 <= $c && 0x815 >= $c) ||
1964 1
          0x81a === $c ||
1965 1
          0x824 === $c ||
1966 1
          0x828 === $c ||
1967 1
          (0x830 <= $c && 0x83e >= $c) ||
1968 1
          (0x840 <= $c && 0x858 >= $c) ||
1969 1
          0x85e === $c
1970 1
      ) {
1971 1
        return 'RTL';
1972 1
      }
1973 1
1974 1
    } elseif (0x200f === $c) {
1975 1
1976 1
      return 'RTL';
1977 1
1978 1
    } elseif (0xfb1d <= $c) {
1979 1
1980 1
      if (0xfb1d === $c ||
1981 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1982 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1983 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1984 1
          0xfb3e === $c ||
1985 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1986 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1987 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1988 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1989 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1990 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1991 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1992 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1993 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1994
          (0x10800 <= $c && 0x10805 >= $c) ||
1995 1
          0x10808 === $c ||
1996 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1997
          (0x10837 <= $c && 0x10838 >= $c) ||
1998
          0x1083c === $c ||
1999
          (0x1083f <= $c && 0x10855 >= $c) ||
2000 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2001
          (0x10900 <= $c && 0x1091b >= $c) ||
2002
          (0x10920 <= $c && 0x10939 >= $c) ||
2003
          0x1093f === $c ||
2004
          0x10a00 === $c ||
2005
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2006
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2007
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2008
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2009
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2010 4
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2011
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2012 4
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2013 4
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2014
          (0x10b78 <= $c && 0x10b7f >= $c)
2015 4
      ) {
2016
        return 'RTL';
2017
      }
2018 1
    }
2019
2020
    return 'LTR';
2021
  }
2022
2023
  /**
2024
   * get data from "/data/*.ser"
2025
   *
2026
   * @param string $file
2027
   *
2028
   * @return bool|string|array|int <p>Will return false on error.</p>
2029
   */
2030 7
  private static function getData($file)
2031
  {
2032 7
    $file = __DIR__ . '/data/' . $file . '.php';
2033
    if (file_exists($file)) {
2034
      /** @noinspection PhpIncludeInspection */
2035
      return require $file;
2036 7
    }
2037 2
2038
    return false;
2039
  }
2040 5
2041
  /**
2042
   * Check for php-support.
2043
   *
2044 5
   * @param string|null $key
2045
   *
2046
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2047
   *               return bool-value, if $key is used and available<br>
2048
   *               otherwise return null</p>
2049
   */
2050
  public static function getSupportInfo($key = null)
2051
  {
2052
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2053
      self::checkForSupport();
2054
    }
2055
2056
    if ($key === null) {
2057
      return self::$SUPPORT;
2058
    }
2059
2060
    if (!isset(self::$SUPPORT[$key])) {
2061
      return null;
2062
    }
2063
2064
    return self::$SUPPORT[$key];
2065
  }
2066
2067
  /**
2068
   * alias for "UTF8::string_has_bom()"
2069
   *
2070 2
   * @see UTF8::string_has_bom()
2071
   *
2072 2
   * @param string $str
2073
   *
2074
   * @return bool
2075
   *
2076
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2077
   */
2078
  public static function hasBom($str)
2079
  {
2080
    return self::string_has_bom($str);
2081
  }
2082
2083
  /**
2084 1
   * Converts a hexadecimal-value into an UTF-8 character.
2085
   *
2086 1
   * @param string $hexdec <p>The hexadecimal value.</p>
2087
   *
2088 1
   * @return string|false <p>One single UTF-8 character.</p>
2089 1
   */
2090
  public static function hex_to_chr($hexdec)
2091
  {
2092 1
    return self::decimal_to_chr(hexdec($hexdec));
2093 1
  }
2094
2095
  /**
2096 1
   * Converts hexadecimal U+xxxx code point representation to integer.
2097
   *
2098
   * INFO: opposite to UTF8::int_to_hex()
2099
   *
2100
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2101
   *
2102
   * @return int|false <p>The code point, or false on failure.</p>
2103
   */
2104
  public static function hex_to_int($hexDec)
2105
  {
2106
    $hexDec = (string)$hexDec;
2107
2108
    if (!isset($hexDec[0])) {
2109
      return false;
2110 1
    }
2111
2112 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2113
      return intval($match[1], 16);
2114
    }
2115
2116
    return false;
2117
  }
2118
2119
  /**
2120
   * alias for "UTF8::html_entity_decode()"
2121
   *
2122
   * @see UTF8::html_entity_decode()
2123
   *
2124
   * @param string $str
2125
   * @param int    $flags
2126 2
   * @param string $encoding
2127
   *
2128
   * @return string
2129 2
   */
2130
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2131 2
  {
2132 1
    return self::html_entity_decode($str, $flags, $encoding);
2133
  }
2134
2135 2
  /**
2136 1
   * Converts a UTF-8 string to a series of HTML numbered entities.
2137 1
   *
2138
   * INFO: opposite to UTF8::html_decode()
2139
   *
2140 2
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2141
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2142 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2143 2
   *
2144 1
   * @return string <p>HTML numbered entities.</p>
2145 1
   */
2146
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2147 2
  {
2148 2
    // init
2149 2
    $str = (string)$str;
2150
2151 2
    if (!isset($str[0])) {
2152
      return '';
2153
    }
2154
2155
    if ($encoding !== 'UTF-8') {
2156
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2157
    }
2158
2159
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2160
    if (function_exists('mb_encode_numericentity')) {
2161
2162
      $startCode = 0x00;
2163
      if ($keepAsciiChars === true) {
2164
        $startCode = 0x80;
2165
      }
2166
2167
      return mb_encode_numericentity(
2168
          $str,
2169
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2170
          $encoding
2171
      );
2172
    }
2173
2174
    return implode(
2175
        '',
2176
        array_map(
2177
            function ($data) use ($keepAsciiChars, $encoding) {
2178
              return self::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2179
            },
2180
            self::split($str)
2181
        )
2182
    );
2183
  }
2184
2185
  /**
2186
   * UTF-8 version of html_entity_decode()
2187
   *
2188
   * The reason we are not using html_entity_decode() by itself is because
2189
   * while it is not technically correct to leave out the semicolon
2190
   * at the end of an entity most browsers will still interpret the entity
2191
   * correctly. html_entity_decode() does not convert entities without
2192
   * semicolons, so we are left with our own little solution here. Bummer.
2193
   *
2194
   * Convert all HTML entities to their applicable characters
2195
   *
2196
   * INFO: opposite to UTF8::html_encode()
2197
   *
2198
   * @link http://php.net/manual/en/function.html-entity-decode.php
2199
   *
2200
   * @param string $str      <p>
2201
   *                         The input string.
2202
   *                         </p>
2203
   * @param int    $flags    [optional] <p>
2204
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2205
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2206
   *                         <table>
2207
   *                         Available <i>flags</i> constants
2208
   *                         <tr valign="top">
2209
   *                         <td>Constant Name</td>
2210
   *                         <td>Description</td>
2211
   *                         </tr>
2212
   *                         <tr valign="top">
2213
   *                         <td><b>ENT_COMPAT</b></td>
2214
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2215
   *                         </tr>
2216
   *                         <tr valign="top">
2217
   *                         <td><b>ENT_QUOTES</b></td>
2218
   *                         <td>Will convert both double and single quotes.</td>
2219
   *                         </tr>
2220
   *                         <tr valign="top">
2221
   *                         <td><b>ENT_NOQUOTES</b></td>
2222
   *                         <td>Will leave both double and single quotes unconverted.</td>
2223
   *                         </tr>
2224
   *                         <tr valign="top">
2225
   *                         <td><b>ENT_HTML401</b></td>
2226
   *                         <td>
2227
   *                         Handle code as HTML 4.01.
2228
   *                         </td>
2229
   *                         </tr>
2230
   *                         <tr valign="top">
2231
   *                         <td><b>ENT_XML1</b></td>
2232
   *                         <td>
2233
   *                         Handle code as XML 1.
2234 16
   *                         </td>
2235
   *                         </tr>
2236
   *                         <tr valign="top">
2237 16
   *                         <td><b>ENT_XHTML</b></td>
2238
   *                         <td>
2239 16
   *                         Handle code as XHTML.
2240 5
   *                         </td>
2241
   *                         </tr>
2242
   *                         <tr valign="top">
2243 16
   *                         <td><b>ENT_HTML5</b></td>
2244 9
   *                         <td>
2245
   *                         Handle code as HTML 5.
2246
   *                         </td>
2247
   *                         </tr>
2248 15
   *                         </table>
2249 15
   *                         </p>
2250
   * @param string $encoding [optional] <p>Encoding to use.</p>
2251 15
   *
2252 15
   * @return string <p>The decoded string.</p>
2253 9
   */
2254 9
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2255 15
  {
2256 8
    // init
2257
    $str = (string)$str;
2258
2259 15
    if (!isset($str[0])) {
2260 2
      return '';
2261 2
    }
2262
2263 15
    if (!isset($str[3])) { // examples: &; || &x;
2264 5
      return $str;
2265
    }
2266
2267 5
    if (
2268
        strpos($str, '&') === false
2269 5
        ||
2270
        (
2271
            strpos($str, '&#') === false
2272 15
            &&
2273
            strpos($str, ';') === false
2274 15
        )
2275 15
    ) {
2276
      return $str;
2277 13
    }
2278
2279 13
    if ($encoding !== 'UTF-8') {
2280 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2281
    }
2282
2283 6
    if ($flags === null) {
2284 15
      if (Bootup::is_php('5.4') === true) {
2285
        $flags = ENT_QUOTES | ENT_HTML5;
2286 15
      } else {
2287
        $flags = ENT_QUOTES;
2288
      }
2289 15
    }
2290 15
2291 15 View Code Duplication
    if (
2292
        $encoding !== 'UTF-8'
2293 15
        &&
2294
        $encoding !== 'WINDOWS-1252'
2295 15
        &&
2296
        self::$SUPPORT['mbstring'] === false
2297 15
    ) {
2298
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2299
    }
2300
2301
    do {
2302
      $str_compare = $str;
2303
2304
      $str = preg_replace_callback(
2305
          "/&#\d{2,6};/",
2306
          function ($matches) use ($encoding) {
2307
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2308
2309
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2310
              return $returnTmp;
2311
            }
2312
2313
            return $matches[0];
2314
          },
2315
          $str
2316
      );
2317
2318
      // decode numeric & UTF16 two byte entities
2319
      $str = html_entity_decode(
2320
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2321
          $flags,
2322
          $encoding
2323
      );
2324
2325
    } while ($str_compare !== $str);
2326
2327
    return $str;
2328
  }
2329
2330
  /**
2331
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2332
   *
2333
   * @link http://php.net/manual/en/function.htmlentities.php
2334
   *
2335
   * @param string $str           <p>
2336
   *                              The input string.
2337
   *                              </p>
2338
   * @param int    $flags         [optional] <p>
2339
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2340
   *                              invalid code unit sequences and the used document type. The default is
2341
   *                              ENT_COMPAT | ENT_HTML401.
2342
   *                              <table>
2343
   *                              Available <i>flags</i> constants
2344
   *                              <tr valign="top">
2345
   *                              <td>Constant Name</td>
2346
   *                              <td>Description</td>
2347
   *                              </tr>
2348
   *                              <tr valign="top">
2349
   *                              <td><b>ENT_COMPAT</b></td>
2350
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2351
   *                              </tr>
2352
   *                              <tr valign="top">
2353
   *                              <td><b>ENT_QUOTES</b></td>
2354
   *                              <td>Will convert both double and single quotes.</td>
2355
   *                              </tr>
2356
   *                              <tr valign="top">
2357
   *                              <td><b>ENT_NOQUOTES</b></td>
2358
   *                              <td>Will leave both double and single quotes unconverted.</td>
2359
   *                              </tr>
2360
   *                              <tr valign="top">
2361
   *                              <td><b>ENT_IGNORE</b></td>
2362
   *                              <td>
2363
   *                              Silently discard invalid code unit sequences instead of returning
2364
   *                              an empty string. Using this flag is discouraged as it
2365
   *                              may have security implications.
2366
   *                              </td>
2367
   *                              </tr>
2368
   *                              <tr valign="top">
2369
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2370
   *                              <td>
2371
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2372
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2373
   *                              </td>
2374
   *                              </tr>
2375
   *                              <tr valign="top">
2376
   *                              <td><b>ENT_DISALLOWED</b></td>
2377
   *                              <td>
2378
   *                              Replace invalid code points for the given document type with a
2379
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2380
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2381
   *                              instance, to ensure the well-formedness of XML documents with
2382
   *                              embedded external content.
2383
   *                              </td>
2384
   *                              </tr>
2385
   *                              <tr valign="top">
2386
   *                              <td><b>ENT_HTML401</b></td>
2387
   *                              <td>
2388
   *                              Handle code as HTML 4.01.
2389
   *                              </td>
2390
   *                              </tr>
2391
   *                              <tr valign="top">
2392
   *                              <td><b>ENT_XML1</b></td>
2393
   *                              <td>
2394
   *                              Handle code as XML 1.
2395
   *                              </td>
2396
   *                              </tr>
2397
   *                              <tr valign="top">
2398
   *                              <td><b>ENT_XHTML</b></td>
2399
   *                              <td>
2400
   *                              Handle code as XHTML.
2401
   *                              </td>
2402
   *                              </tr>
2403 2
   *                              <tr valign="top">
2404
   *                              <td><b>ENT_HTML5</b></td>
2405 2
   *                              <td>
2406 1
   *                              Handle code as HTML 5.
2407 1
   *                              </td>
2408
   *                              </tr>
2409 2
   *                              </table>
2410
   *                              </p>
2411
   * @param string $encoding      [optional] <p>
2412
   *                              Like <b>htmlspecialchars</b>,
2413
   *                              <b>htmlentities</b> takes an optional third argument
2414
   *                              <i>encoding</i> which defines encoding used in
2415
   *                              conversion.
2416
   *                              Although this argument is technically optional, you are highly
2417
   *                              encouraged to specify the correct value for your code.
2418
   *                              </p>
2419 2
   * @param bool   $double_encode [optional] <p>
2420
   *                              When <i>double_encode</i> is turned off PHP will not
2421 2
   *                              encode existing html entities. The default is to convert everything.
2422 1
   *                              </p>
2423
   *
2424
   *
2425 2
   * @return string the encoded string.
2426 2
   * </p>
2427 2
   * <p>
2428 2
   * If the input <i>string</i> contains an invalid code unit
2429 2
   * sequence within the given <i>encoding</i> an empty string
2430 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2431
   * <b>ENT_SUBSTITUTE</b> flags are set.
2432 1
   */
2433 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2434 1
  {
2435 1
    if ($encoding !== 'UTF-8') {
2436 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2437 2
    }
2438
2439 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2440
2441
    /**
2442
     * PHP doesn't replace a backslash to its html entity since this is something
2443
     * that's mostly used to escape characters when inserting in a database. Since
2444
     * we're using a decent database layer, we don't need this shit and we're replacing
2445
     * the double backslashes by its' html entity equivalent.
2446
     *
2447
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2448
     */
2449
    $str = str_replace('\\', '&#92;', $str);
2450
2451
    if ($encoding !== 'UTF-8') {
2452
      return $str;
2453
    }
2454
2455
    $byteLengths = self::chr_size_list($str);
2456
    $search = array();
2457
    $replacements = array();
2458
    foreach ($byteLengths as $counter => $byteLength) {
2459
      if ($byteLength >= 3) {
2460
        $char = self::access($str, $counter);
2461
2462
        if (!isset($replacements[$char])) {
2463
          $search[$char] = $char;
2464
          $replacements[$char] = self::html_encode($char);
2465
        }
2466
      }
2467
    }
2468
2469
    return str_replace($search, $replacements, $str);
2470
  }
2471
2472
  /**
2473
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2474
   *
2475
   * INFO: Take a look at "UTF8::htmlentities()"
2476
   *
2477
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2478
   *
2479
   * @param string $str           <p>
2480
   *                              The string being converted.
2481
   *                              </p>
2482
   * @param int    $flags         [optional] <p>
2483
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2484
   *                              invalid code unit sequences and the used document type. The default is
2485
   *                              ENT_COMPAT | ENT_HTML401.
2486
   *                              <table>
2487
   *                              Available <i>flags</i> constants
2488
   *                              <tr valign="top">
2489
   *                              <td>Constant Name</td>
2490
   *                              <td>Description</td>
2491
   *                              </tr>
2492
   *                              <tr valign="top">
2493
   *                              <td><b>ENT_COMPAT</b></td>
2494
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2495
   *                              </tr>
2496
   *                              <tr valign="top">
2497
   *                              <td><b>ENT_QUOTES</b></td>
2498
   *                              <td>Will convert both double and single quotes.</td>
2499
   *                              </tr>
2500
   *                              <tr valign="top">
2501
   *                              <td><b>ENT_NOQUOTES</b></td>
2502
   *                              <td>Will leave both double and single quotes unconverted.</td>
2503
   *                              </tr>
2504
   *                              <tr valign="top">
2505
   *                              <td><b>ENT_IGNORE</b></td>
2506
   *                              <td>
2507
   *                              Silently discard invalid code unit sequences instead of returning
2508
   *                              an empty string. Using this flag is discouraged as it
2509
   *                              may have security implications.
2510
   *                              </td>
2511
   *                              </tr>
2512
   *                              <tr valign="top">
2513
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2514
   *                              <td>
2515
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2516
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2517
   *                              </td>
2518
   *                              </tr>
2519
   *                              <tr valign="top">
2520
   *                              <td><b>ENT_DISALLOWED</b></td>
2521
   *                              <td>
2522
   *                              Replace invalid code points for the given document type with a
2523
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2524
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2525
   *                              instance, to ensure the well-formedness of XML documents with
2526
   *                              embedded external content.
2527
   *                              </td>
2528
   *                              </tr>
2529
   *                              <tr valign="top">
2530
   *                              <td><b>ENT_HTML401</b></td>
2531
   *                              <td>
2532
   *                              Handle code as HTML 4.01.
2533
   *                              </td>
2534
   *                              </tr>
2535
   *                              <tr valign="top">
2536
   *                              <td><b>ENT_XML1</b></td>
2537
   *                              <td>
2538
   *                              Handle code as XML 1.
2539
   *                              </td>
2540
   *                              </tr>
2541
   *                              <tr valign="top">
2542
   *                              <td><b>ENT_XHTML</b></td>
2543
   *                              <td>
2544
   *                              Handle code as XHTML.
2545
   *                              </td>
2546
   *                              </tr>
2547
   *                              <tr valign="top">
2548
   *                              <td><b>ENT_HTML5</b></td>
2549
   *                              <td>
2550
   *                              Handle code as HTML 5.
2551 1
   *                              </td>
2552
   *                              </tr>
2553 1
   *                              </table>
2554 1
   *                              </p>
2555 1
   * @param string $encoding      [optional] <p>
2556
   *                              Defines encoding used in conversion.
2557 1
   *                              </p>
2558
   *                              <p>
2559
   *                              For the purposes of this function, the encodings
2560
   *                              ISO-8859-1, ISO-8859-15,
2561
   *                              UTF-8, cp866,
2562
   *                              cp1251, cp1252, and
2563
   *                              KOI8-R are effectively equivalent, provided the
2564
   *                              <i>string</i> itself is valid for the encoding, as
2565 1
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2566
   *                              the same positions in all of these encodings.
2567 1
   *                              </p>
2568
   * @param bool   $double_encode [optional] <p>
2569
   *                              When <i>double_encode</i> is turned off PHP will not
2570
   *                              encode existing html entities, the default is to convert everything.
2571 1
   *                              </p>
2572
   *
2573 1
   * @return string The converted string.
2574 1
   * </p>
2575 1
   * <p>
2576 1
   * If the input <i>string</i> contains an invalid code unit
2577
   * sequence within the given <i>encoding</i> an empty string
2578 1
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2579
   * <b>ENT_SUBSTITUTE</b> flags are set.
2580
   */
2581
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2582
  {
2583
    if ($encoding !== 'UTF-8') {
2584
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2585
    }
2586
2587
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2588
  }
2589
2590 2
  /**
2591
   * Checks whether iconv is available on the server.
2592 2
   *
2593
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2594
   */
2595
  public static function iconv_loaded()
2596
  {
2597
    $return = extension_loaded('iconv') ? true : false;
2598
2599
    // INFO: "default_charset" is already set by the "Bootup"-class
2600
2601
    if (Bootup::is_php('5.6') === false) {
2602
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2603
      iconv_set_encoding('input_encoding', 'UTF-8');
2604
      iconv_set_encoding('output_encoding', 'UTF-8');
2605 3
      iconv_set_encoding('internal_encoding', 'UTF-8');
2606
    }
2607 3
2608 3
    return $return;
2609
  }
2610 3
2611
  /**
2612 3
   * alias for "UTF8::decimal_to_chr()"
2613
   *
2614
   * @see UTF8::decimal_to_chr()
2615 1
   *
2616
   * @param mixed $int
2617
   *
2618
   * @return string
2619
   */
2620
  public static function int_to_chr($int)
2621
  {
2622
    return self::decimal_to_chr($int);
2623 1
  }
2624
2625
  /**
2626 1
   * Converts Integer to hexadecimal U+xxxx code point representation.
2627 1
   *
2628
   * INFO: opposite to UTF8::hex_to_int()
2629 1
   *
2630
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2631
   * @param string $pfix [optional]
2632
   *
2633
   * @return string <p>The code point, or empty string on failure.</p>
2634
   */
2635
  public static function int_to_hex($int, $pfix = 'U+')
2636
  {
2637 4
    if ((int)$int === $int) {
2638
      $hex = dechex($int);
2639 4
2640
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2641
2642
      return $pfix . $hex;
2643
    }
2644
2645
    return '';
2646
  }
2647
2648
  /**
2649
   * Checks whether intl-char is available on the server.
2650
   *
2651
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2652
   */
2653
  public static function intlChar_loaded()
2654
  {
2655
    return (
2656
        Bootup::is_php('7.0') === true
2657
        &&
2658
        class_exists('IntlChar') === true
2659
    );
2660
  }
2661
2662
  /**
2663
   * Checks whether intl is available on the server.
2664
   *
2665
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2666
   */
2667
  public static function intl_loaded()
2668
  {
2669
    return extension_loaded('intl') ? true : false;
2670
  }
2671
2672
  /**
2673
   * alias for "UTF8::is_ascii()"
2674
   *
2675
   * @see UTF8::is_ascii()
2676
   *
2677
   * @param string $str
2678
   *
2679
   * @return boolean
2680
   *
2681
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2682
   */
2683
  public static function isAscii($str)
2684
  {
2685
    return self::is_ascii($str);
2686
  }
2687
2688
  /**
2689
   * alias for "UTF8::is_base64()"
2690
   *
2691
   * @see UTF8::is_base64()
2692
   *
2693
   * @param string $str
2694
   *
2695
   * @return bool
2696
   *
2697
   * @deprecated <p>use "UTF8::is_base64()"</p>
2698
   */
2699
  public static function isBase64($str)
2700
  {
2701
    return self::is_base64($str);
2702
  }
2703
2704
  /**
2705
   * alias for "UTF8::is_binary()"
2706
   *
2707
   * @see UTF8::is_binary()
2708
   *
2709
   * @param string $str
2710
   *
2711
   * @return bool
2712
   *
2713
   * @deprecated <p>use "UTF8::is_binary()"</p>
2714
   */
2715
  public static function isBinary($str)
2716
  {
2717
    return self::is_binary($str);
2718
  }
2719
2720
  /**
2721
   * alias for "UTF8::is_bom()"
2722
   *
2723
   * @see UTF8::is_bom()
2724
   *
2725
   * @param string $utf8_chr
2726
   *
2727
   * @return boolean
2728
   *
2729
   * @deprecated <p>use "UTF8::is_bom()"</p>
2730
   */
2731
  public static function isBom($utf8_chr)
2732
  {
2733
    return self::is_bom($utf8_chr);
2734
  }
2735
2736
  /**
2737
   * alias for "UTF8::is_html()"
2738
   *
2739
   * @see UTF8::is_html()
2740
   *
2741
   * @param string $str
2742
   *
2743
   * @return boolean
2744
   *
2745
   * @deprecated <p>use "UTF8::is_html()"</p>
2746
   */
2747
  public static function isHtml($str)
2748
  {
2749
    return self::is_html($str);
2750
  }
2751
2752
  /**
2753
   * alias for "UTF8::is_json()"
2754
   *
2755
   * @see UTF8::is_json()
2756
   *
2757
   * @param string $str
2758
   *
2759
   * @return bool
2760
   *
2761
   * @deprecated <p>use "UTF8::is_json()"</p>
2762
   */
2763
  public static function isJson($str)
2764
  {
2765
    return self::is_json($str);
2766
  }
2767
2768
  /**
2769
   * alias for "UTF8::is_utf16()"
2770
   *
2771
   * @see UTF8::is_utf16()
2772
   *
2773
   * @param string $str
2774
   *
2775
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2776
   *
2777
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2778
   */
2779
  public static function isUtf16($str)
2780
  {
2781
    return self::is_utf16($str);
2782
  }
2783
2784
  /**
2785
   * alias for "UTF8::is_utf32()"
2786
   *
2787
   * @see UTF8::is_utf32()
2788
   *
2789
   * @param string $str
2790
   *
2791
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2792
   *
2793
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2794
   */
2795
  public static function isUtf32($str)
2796
  {
2797 53
    return self::is_utf32($str);
2798
  }
2799 53
2800
  /**
2801 53
   * alias for "UTF8::is_utf8()"
2802 6
   *
2803
   * @see UTF8::is_utf8()
2804
   *
2805 52
   * @param string $str
2806
   * @param bool   $strict
2807
   *
2808
   * @return bool
2809
   *
2810
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2811
   */
2812
  public static function isUtf8($str, $strict = false)
2813
  {
2814
    return self::is_utf8($str, $strict);
2815 1
  }
2816
2817 1
  /**
2818
   * Checks if a string is 7 bit ASCII.
2819 1
   *
2820 1
   * @param string $str <p>The string to check.</p>
2821
   *
2822
   * @return bool <p>
2823 1
   *              <strong>true</strong> if it is ASCII<br>
2824 1
   *              <strong>false</strong> otherwise
2825 1
   *              </p>
2826
   */
2827
  public static function is_ascii($str)
2828 1
  {
2829
    $str = (string)$str;
2830
2831
    if (!isset($str[0])) {
2832
      return true;
2833
    }
2834
2835
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2836
  }
2837
2838 16
  /**
2839
   * Returns true if the string is base64 encoded, false otherwise.
2840 16
   *
2841
   * @param string $str <p>The input string.</p>
2842 16
   *
2843 4
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2844
   */
2845
  public static function is_base64($str)
2846 16
  {
2847 4
    $str = (string)$str;
2848
2849
    if (!isset($str[0])) {
2850 16
      return false;
2851 16
    }
2852 5
2853
    $base64String = (string)base64_decode($str, true);
2854
    if ($base64String && base64_encode($base64String) === $str) {
2855 15
      return true;
2856 1
    }
2857
2858
    return false;
2859 15
  }
2860
2861
  /**
2862
   * Check if the input is binary... (is look like a hack).
2863
   *
2864
   * @param mixed $input
2865
   *
2866
   * @return bool
2867
   */
2868
  public static function is_binary($input)
2869
  {
2870
    $input = (string)$input;
2871
2872
    if (!isset($input[0])) {
2873
      return false;
2874
    }
2875
2876
    if (preg_match('~^[01]+$~', $input)) {
2877
      return true;
2878
    }
2879
2880
    $testLength = strlen($input);
2881
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2882
      return true;
2883
    }
2884
2885
    if (substr_count($input, "\x00") > 0) {
2886
      return true;
2887
    }
2888
2889
    return false;
2890
  }
2891 1
2892
  /**
2893 1
   * Check if the file is binary.
2894 1
   *
2895 1
   * @param string $file
2896
   *
2897 1
   * @return boolean
2898
   */
2899 1
  public static function is_binary_file($file)
2900
  {
2901
    try {
2902
      $fp = fopen($file, 'rb');
2903
      $block = fread($fp, 512);
2904
      fclose($fp);
2905
    } catch (\Exception $e) {
2906
      $block = '';
2907
    }
2908
2909 1
    return self::is_binary($block);
2910
  }
2911 1
2912
  /**
2913 1
   * Checks if the given string is equal to any "Byte Order Mark".
2914 1
   *
2915
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2916
   *
2917
   * @param string $str <p>The input string.</p>
2918 1
   *
2919
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2920 1
   */
2921
  public static function is_bom($str)
2922 1
  {
2923 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2924
      if ($str === $bomString) {
2925
        return true;
2926 1
      }
2927
    }
2928
2929
    return false;
2930
  }
2931
2932
  /**
2933
   * Check if the string contains any html-tags <lall>.
2934
   *
2935
   * @param string $str <p>The input string.</p>
2936 1
   *
2937
   * @return boolean
2938 1
   */
2939
  public static function is_html($str)
2940 1
  {
2941
    $str = (string)$str;
2942
2943
    if (!isset($str[0])) {
2944 1
      return false;
2945
    }
2946
2947
    // init
2948 1
    $matches = array();
2949 1
2950 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2951 1
2952 1
    if (count($matches) === 0) {
2953 1
      return false;
2954 1
    }
2955 1
2956
    return true;
2957
  }
2958 1
2959
  /**
2960
   * Try to check if "$str" is an json-string.
2961
   *
2962
   * @param string $str <p>The input string.</p>
2963
   *
2964
   * @return bool
2965
   */
2966
  public static function is_json($str)
2967
  {
2968
    $str = (string)$str;
2969
2970
    if (!isset($str[0])) {
2971
      return false;
2972 5
    }
2973
2974 5
    $json = self::json_decode($str);
2975
2976 5
    if (
2977
        (
2978 5
            is_object($json) === true
2979 5
            ||
2980 5
            is_array($json) === true
2981 5
        )
2982 5
        &&
2983 5
        json_last_error() === JSON_ERROR_NONE
2984 5
    ) {
2985 5
      return true;
2986 4
    }
2987 2
2988 2
    return false;
2989 5
  }
2990 5
2991 5
  /**
2992
   * Check if the string is UTF-16.
2993 5
   *
2994 5
   * @param string $str <p>The input string.</p>
2995 5
   *
2996 5
   * @return int|false <p>
2997 5
   *                   <strong>false</strong> if is't not UTF-16,<br>
2998 5
   *                   <strong>1</strong> for UTF-16LE,<br>
2999 5
   *                   <strong>2</strong> for UTF-16BE.
3000 5
   *                   </p>
3001 4
   */
3002 3 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3003 3
  {
3004 5
    $str = self::remove_bom($str);
3005 5
3006 5
    if (self::is_binary($str) === true) {
3007
3008 5
      $maybeUTF16LE = 0;
3009 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3010 2
      if ($test) {
3011
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3012
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3013 3
        if ($test3 === $test) {
3014
          $strChars = self::count_chars($str, true);
3015
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3016 3
            if (in_array($test3char, $strChars, true) === true) {
3017
              $maybeUTF16LE++;
3018 3
            }
3019
          }
3020
        }
3021
      }
3022
3023
      $maybeUTF16BE = 0;
3024
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3025
      if ($test) {
3026
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3027
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3028
        if ($test3 === $test) {
3029
          $strChars = self::count_chars($str, true);
3030
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3031
            if (in_array($test3char, $strChars, true) === true) {
3032 3
              $maybeUTF16BE++;
3033
            }
3034 3
          }
3035
        }
3036 3
      }
3037
3038 3
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3039 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3040 3
          return 1;
3041 2
        }
3042 2
3043 2
        return 2;
3044 2
      }
3045 2
3046 2
    }
3047 1
3048 1
    return false;
3049 2
  }
3050 2
3051 2
  /**
3052
   * Check if the string is UTF-32.
3053 3
   *
3054 3
   * @param string $str
3055 3
   *
3056 2
   * @return int|false <p>
3057 2
   *                   <strong>false</strong> if is't not UTF-32,<br>
3058 2
   *                   <strong>1</strong> for UTF-32LE,<br>
3059 2
   *                   <strong>2</strong> for UTF-32BE.
3060 2
   *                   </p>
3061 2
   */
3062 1 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3063 1
  {
3064 2
    $str = self::remove_bom($str);
3065 2
3066 2
    if (self::is_binary($str) === true) {
3067
3068 3
      $maybeUTF32LE = 0;
3069 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3070 1
      if ($test) {
3071
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3072
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3073 1
        if ($test3 === $test) {
3074
          $strChars = self::count_chars($str, true);
3075
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3076 3
            if (in_array($test3char, $strChars, true) === true) {
3077
              $maybeUTF32LE++;
3078 3
            }
3079
          }
3080
        }
3081
      }
3082
3083
      $maybeUTF32BE = 0;
3084
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3085
      if ($test) {
3086
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3087
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3088
        if ($test3 === $test) {
3089
          $strChars = self::count_chars($str, true);
3090
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3091 60
            if (in_array($test3char, $strChars, true) === true) {
3092
              $maybeUTF32BE++;
3093 60
            }
3094
          }
3095 60
        }
3096 3
      }
3097
3098
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3099 58
        if ($maybeUTF32LE > $maybeUTF32BE) {
3100 1
          return 1;
3101 1
        }
3102
3103
        return 2;
3104
      }
3105
3106
    }
3107
3108
    return false;
3109 58
  }
3110
3111
  /**
3112
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3113
   *
3114
   * @see    http://hsivonen.iki.fi/php-utf8/
3115
   *
3116
   * @param string $str    <p>The string to be checked.</p>
3117
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3118 58
   *
3119
   * @return bool
3120 58
   */
3121 58
  public static function is_utf8($str, $strict = false)
3122
  {
3123 58
    $str = (string)$str;
3124
3125
    if (!isset($str[0])) {
3126
      return true;
3127 58
    }
3128
3129
    if ($strict === true) {
3130 58
      if (self::is_utf16($str) !== false) {
3131
        return false;
3132
      }
3133
3134 58
      if (self::is_utf32($str) !== false) {
3135 58
        return false;
3136 58
      }
3137
    }
3138
3139 58
    if (self::pcre_utf8_support() !== true) {
3140
3141 52
      // If even just the first character can be matched, when the /u
3142 58
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3143
      // invalid, nothing at all will match, even if the string contains
3144 48
      // some valid sequences
3145 48
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3146 48
    }
3147 48
3148 55
    $mState = 0; // cached expected number of octets after the current octet
3149
    // until the beginning of the next UTF8 character sequence
3150 29
    $mUcs4 = 0; // cached Unicode character
3151 29
    $mBytes = 1; // cached expected number of octets in the current sequence
3152 29
3153 29
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3154 46
      self::checkForSupport();
3155
    }
3156 11
3157 11 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3158 11
      $len = \mb_strlen($str, '8BIT');
3159 11
    } else {
3160 22
      $len = strlen($str);
3161
    }
3162
3163
    /** @noinspection ForeachInvariantsInspection */
3164
    for ($i = 0; $i < $len; $i++) {
3165
      $in = ord($str[$i]);
3166
      if ($mState === 0) {
3167
        // When mState is zero we expect either a US-ASCII character or a
3168
        // multi-octet sequence.
3169 4
        if (0 === (0x80 & $in)) {
3170 4
          // US-ASCII, pass straight through.
3171 4
          $mBytes = 1;
3172 4 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3173 12
          // First octet of 2 octet sequence.
3174
          $mUcs4 = $in;
3175 4
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3176 4
          $mState = 1;
3177 4
          $mBytes = 2;
3178 4
        } elseif (0xE0 === (0xF0 & $in)) {
3179 4
          // First octet of 3 octet sequence.
3180
          $mUcs4 = $in;
3181
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3182
          $mState = 2;
3183 6
          $mBytes = 3;
3184 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3185 57
          // First octet of 4 octet sequence.
3186
          $mUcs4 = $in;
3187
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3188 52
          $mState = 3;
3189
          $mBytes = 4;
3190 48
        } elseif (0xF8 === (0xFC & $in)) {
3191 48
          /* First octet of 5 octet sequence.
3192 48
          *
3193 48
          * This is illegal because the encoded codepoint must be either
3194
          * (a) not the shortest form or
3195
          * (b) outside the Unicode range of 0-0x10FFFF.
3196
          * Rather than trying to resynchronize, we will carry on until the end
3197
          * of the sequence and let the later error handling code catch it.
3198 48
          */
3199
          $mUcs4 = $in;
3200
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3201
          $mState = 4;
3202
          $mBytes = 5;
3203 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3204 48
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3205 48
          $mUcs4 = $in;
3206 48
          $mUcs4 = ($mUcs4 & 1) << 30;
3207 48
          $mState = 5;
3208
          $mBytes = 6;
3209 48
        } else {
3210
          /* Current octet is neither in the US-ASCII range nor a legal first
3211 48
           * octet of a multi-octet sequence.
3212 48
           */
3213 7
          return false;
3214
        }
3215
      } else {
3216 48
        // When mState is non-zero, we expect a continuation of the multi-octet
3217 48
        // sequence
3218 48
        if (0x80 === (0xC0 & $in)) {
3219 48
          // Legal continuation.
3220 48
          $shift = ($mState - 1) * 6;
3221
          $tmp = $in;
3222
          $tmp = ($tmp & 0x0000003F) << $shift;
3223
          $mUcs4 |= $tmp;
3224
          /**
3225 26
           * End of the multi-octet sequence. mUcs4 now contains the final
3226
           * Unicode code point to be output
3227
           */
3228 57
          if (0 === --$mState) {
3229
            /*
3230 27
            * Check for illegal sequences and code points.
3231
            */
3232
            // From Unicode 3.1, non-shortest form is illegal
3233
            if (
3234
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3235
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3236
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3237
                (4 < $mBytes) ||
3238
                // From Unicode 3.2, surrogate characters are illegal.
3239
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3240
                // Code points outside the Unicode range are illegal.
3241
                ($mUcs4 > 0x10FFFF)
3242
            ) {
3243
              return false;
3244
            }
3245
            // initialize UTF8 cache
3246
            $mState = 0;
3247
            $mUcs4 = 0;
3248
            $mBytes = 1;
3249
          }
3250
        } else {
3251
          /**
3252
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3253
           * Incomplete multi-octet sequence.
3254
           */
3255
          return false;
3256
        }
3257
      }
3258
    }
3259
3260
    return true;
3261
  }
3262
3263
  /**
3264
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3265
   * Decodes a JSON string
3266
   *
3267
   * @link http://php.net/manual/en/function.json-decode.php
3268
   *
3269 2
   * @param string $json    <p>
3270
   *                        The <i>json</i> string being decoded.
3271 2
   *                        </p>
3272
   *                        <p>
3273 2
   *                        This function only works with UTF-8 encoded strings.
3274
   *                        </p>
3275
   *                        <p>PHP implements a superset of
3276 2
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3277
   *                        only supports these values when they are nested inside an array or an object.
3278
   *                        </p>
3279 2
   * @param bool   $assoc   [optional] <p>
3280
   *                        When <b>TRUE</b>, returned objects will be converted into
3281
   *                        associative arrays.
3282
   *                        </p>
3283
   * @param int    $depth   [optional] <p>
3284
   *                        User specified recursion depth.
3285
   *                        </p>
3286
   * @param int    $options [optional] <p>
3287
   *                        Bitmask of JSON decode options. Currently only
3288
   *                        <b>JSON_BIGINT_AS_STRING</b>
3289
   *                        is supported (default is to cast large integers as floats)
3290
   *                        </p>
3291
   *
3292
   * @return mixed the value encoded in <i>json</i> in appropriate
3293
   * PHP type. Values true, false and
3294
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3295
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3296
   * <i>json</i> cannot be decoded or if the encoded
3297
   * data is deeper than the recursion limit.
3298
   */
3299 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3300
  {
3301
    $json = (string)self::filter($json);
3302
3303
    if (Bootup::is_php('5.4') === true) {
3304
      $json = json_decode($json, $assoc, $depth, $options);
3305
    } else {
3306
      $json = json_decode($json, $assoc, $depth);
3307
    }
3308
3309
    return $json;
3310
  }
3311
3312
  /**
3313
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3314
   * Returns the JSON representation of a value.
3315
   *
3316
   * @link http://php.net/manual/en/function.json-encode.php
3317
   *
3318 2
   * @param mixed $value   <p>
3319
   *                       The <i>value</i> being encoded. Can be any type except
3320 2
   *                       a resource.
3321
   *                       </p>
3322 2
   *                       <p>
3323
   *                       All string data must be UTF-8 encoded.
3324
   *                       </p>
3325 2
   *                       <p>PHP implements a superset of
3326
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3327
   *                       only supports these values when they are nested inside an array or an object.
3328 2
   *                       </p>
3329
   * @param int   $options [optional] <p>
3330
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3331
   *                       <b>JSON_HEX_TAG</b>,
3332
   *                       <b>JSON_HEX_AMP</b>,
3333
   *                       <b>JSON_HEX_APOS</b>,
3334
   *                       <b>JSON_NUMERIC_CHECK</b>,
3335
   *                       <b>JSON_PRETTY_PRINT</b>,
3336
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3337
   *                       <b>JSON_FORCE_OBJECT</b>,
3338
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3339
   *                       constants is described on
3340 7
   *                       the JSON constants page.
3341
   *                       </p>
3342 7
   * @param int   $depth   [optional] <p>
3343 7
   *                       Set the maximum depth. Must be greater than zero.
3344
   *                       </p>
3345
   *
3346
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3347 7
   */
3348 7 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3349 7
  {
3350
    $value = self::filter($value);
3351 7
3352
    if (Bootup::is_php('5.5') === true) {
3353 7
      $json = json_encode($value, $options, $depth);
3354
    } else {
3355
      $json = json_encode($value, $options);
3356
    }
3357
3358
    return $json;
3359
  }
3360
3361
  /**
3362
   * Makes string's first char lowercase.
3363
   *
3364
   * @param string $str <p>The input string</p>
3365
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3366
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3367 1
   *
3368
   * @return string <p>The resulting string</p>
3369 1
   */
3370
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3371
  {
3372
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3373
    if ($strPartTwo === false) {
3374
      $strPartTwo = '';
3375
    }
3376
3377
    $strPartOne = self::strtolower(
3378
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3379
        $encoding,
3380
        $cleanUtf8
3381
    );
3382
3383 1
    return $strPartOne . $strPartTwo;
3384
  }
3385 1
3386 1
  /**
3387
   * alias for "UTF8::lcfirst()"
3388
   *
3389 1
   * @see UTF8::lcfirst()
3390 1
   *
3391
   * @param string  $word
3392 1
   * @param string  $encoding
3393 1
   * @param boolean $cleanUtf8
3394 1
   *
3395 1
   * @return string
3396
   */
3397
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3398 1
  {
3399
    return self::lcfirst($word, $encoding, $cleanUtf8);
3400 1
  }
3401 1
3402
  /**
3403
   * Lowercase for all words in the string.
3404
   *
3405
   * @param string   $str        <p>The input string.</p>
3406 1
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3407
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3408
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3409 1
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3410 1
   *
3411 1
   * @return string
3412 1
   */
3413 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3414 1
  {
3415
    if (!$str) {
3416 1
      return '';
3417 1
    }
3418
3419 1
    $words = self::str_to_words($str, $charlist);
3420
    $newWords = array();
3421
3422
    if (count($exceptions) > 0) {
3423
      $useExceptions = true;
3424
    } else {
3425
      $useExceptions = false;
3426
    }
3427
3428 View Code Duplication
    foreach ($words as $word) {
3429
3430 24
      if (!$word) {
3431
        continue;
3432 24
      }
3433
3434 24
      if (
3435 2
          $useExceptions === false
3436
          ||
3437
          (
3438
              $useExceptions === true
3439 23
              &&
3440 2
              !in_array($word, $exceptions, true)
3441
          )
3442
      ) {
3443 23
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3444
      }
3445
3446
      $newWords[] = $word;
3447
    }
3448
3449
    return implode('', $newWords);
3450
  }
3451
3452
  /**
3453 1
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3454
   *
3455 1
   * @param string $str   <p>The string to be trimmed</p>
3456 1
   * @param string $chars <p>Optional characters to be stripped</p>
3457 1
   *
3458
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3459 1
   */
3460 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3461
  {
3462
    $str = (string)$str;
3463
3464
    if (!isset($str[0])) {
3465
      return '';
3466
    }
3467
3468
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3469
    if ($chars === INF || !$chars) {
3470 1
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3471
    }
3472 1
3473 1
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3474 1
  }
3475
3476
  /**
3477 1
   * Returns the UTF-8 character with the maximum code point in the given data.
3478
   *
3479
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3480
   *
3481
   * @return string <p>The character with the highest code point than others.</p>
3482
   */
3483 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3484
  {
3485 15
    if (is_array($arg) === true) {
3486
      $arg = implode('', $arg);
3487 15
    }
3488
3489 15
    return self::chr(max(self::codepoints($arg)));
3490 15
  }
3491 15
3492
  /**
3493 15
   * Calculates and returns the maximum number of bytes taken by any
3494
   * UTF-8 encoded character in the given string.
3495
   *
3496
   * @param string $str <p>The original Unicode string.</p>
3497
   *
3498
   * @return int <p>Max byte lengths of the given chars.</p>
3499
   */
3500
  public static function max_chr_width($str)
3501
  {
3502
    $bytes = self::chr_size_list($str);
3503 1
    if (count($bytes) > 0) {
3504
      return (int)max($bytes);
3505 1
    }
3506 1
3507 1
    return 0;
3508
  }
3509 1
3510
  /**
3511
   * Checks whether mbstring is available on the server.
3512
   *
3513
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3514
   */
3515
  public static function mbstring_loaded()
3516
  {
3517
    $return = extension_loaded('mbstring') ? true : false;
3518
3519
    if ($return === true) {
3520
      \mb_internal_encoding('UTF-8');
3521
    }
3522
3523
    return $return;
3524
  }
3525
3526
  /**
3527
   * Returns the UTF-8 character with the minimum code point in the given data.
3528
   *
3529
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3530
   *
3531
   * @return string <p>The character with the lowest code point than others.</p>
3532
   */
3533 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3534
  {
3535
    if (is_array($arg) === true) {
3536
      $arg = implode('', $arg);
3537 77
    }
3538
3539 77
    return self::chr(min(self::codepoints($arg)));
3540
  }
3541 77
3542 3
  /**
3543
   * alias for "UTF8::normalize_encoding()"
3544
   *
3545 76
   * @see UTF8::normalize_encoding()
3546 1
   *
3547
   * @param string $encoding
3548
   * @param mixed  $fallback
3549 76
   *
3550 6
   * @return string
3551
   *
3552
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3553 75
   */
3554 74
  public static function normalizeEncoding($encoding, $fallback = false)
3555
  {
3556
    return self::normalize_encoding($encoding, $fallback);
3557 5
  }
3558 5
3559 5
  /**
3560
   * Normalize the encoding-"name" input.
3561
   *
3562 5
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3563 5
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3564 5
   *
3565 5
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3566 5
   */
3567 5
  public static function normalize_encoding($encoding, $fallback = false)
3568 5
  {
3569 5
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3570 5
3571 5
    if (!$encoding) {
3572 5
      return $fallback;
3573 5
    }
3574 5
3575 5
    if ('UTF-8' === $encoding) {
3576 5
      return $encoding;
3577
    }
3578 5
3579 5
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3580 5
      return $encoding;
3581
    }
3582 5
3583
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3584 5
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3585
    }
3586
3587
    $encodingOrig = $encoding;
3588
    $encoding = strtoupper($encoding);
3589
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3590
3591
    $equivalences = array(
3592
        'ISO88591'    => 'ISO-8859-1',
3593
        'ISO8859'     => 'ISO-8859-1',
3594 16
        'ISO'         => 'ISO-8859-1',
3595
        'LATIN1'      => 'ISO-8859-1',
3596 16
        'LATIN'       => 'ISO-8859-1',
3597
        'WIN1252'     => 'ISO-8859-1',
3598 16
        'WINDOWS1252' => 'ISO-8859-1',
3599 1
        'UTF16'       => 'UTF-16',
3600
        'UTF32'       => 'UTF-32',
3601
        'UTF8'        => 'UTF-8',
3602 16
        'UTF'         => 'UTF-8',
3603 16
        'UTF7'        => 'UTF-7',
3604
        '8BIT'        => 'CP850',
3605 16
        'BINARY'      => 'CP850',
3606 1
    );
3607 1
3608 1
    if (!empty($equivalences[$encodingUpperHelper])) {
3609
      $encoding = $equivalences[$encodingUpperHelper];
3610 16
    }
3611
3612
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3613
3614
    return $encoding;
3615
  }
3616
3617
  /**
3618
   * Normalize some MS Word special characters.
3619
   *
3620
   * @param string $str <p>The string to be normalized.</p>
3621
   *
3622
   * @return string
3623 37
   */
3624 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3625 37
  {
3626
    $str = (string)$str;
3627 37
3628 4
    if (!isset($str[0])) {
3629
      return '';
3630
    }
3631 37
3632 37
    static $UTF8_MSWORD_KEYS_CACHE = null;
3633
    static $UTF8_MSWORD_VALUES_CACHE = null;
3634 37
3635
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3636 2
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3637
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3638 2
    }
3639
3640 1
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3641 1
  }
3642
3643 2
  /**
3644 2
   * Normalize the whitespace.
3645
   *
3646 37
   * @param string $str                     <p>The string to be normalized.</p>
3647 37
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3648
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3649 37
   *                                        bidirectional text chars.</p>
3650 1
   *
3651 1
   * @return string
3652
   */
3653 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3654 37
  {
3655
    $str = (string)$str;
3656 37
3657
    if (!isset($str[0])) {
3658
      return '';
3659
    }
3660
3661
    static $WHITESPACE_CACHE = array();
3662
    $cacheKey = (int)$keepNonBreakingSpace;
3663
3664
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3665
3666
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3667
3668 12
      if ($keepNonBreakingSpace === true) {
3669
        /** @noinspection OffsetOperationsInspection */
3670 12
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3671
      }
3672 12
3673 1
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3674
    }
3675
3676 11
    if ($keepBidiUnicodeControls === false) {
3677
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3678
3679
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3680
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3681
      }
3682
3683
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3684
    }
3685
3686
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3687
  }
3688
3689
  /**
3690
   * Strip all whitespace characters. This includes tabs and newline
3691
   * characters, as well as multibyte whitespace such as the thin space
3692
   * and ideographic space.
3693
   *
3694
   * @param string $str
3695
   *
3696
   * @return string
3697
   */
3698
  public static function strip_whitespace($str)
3699
  {
3700
    $str = (string)$str;
3701
3702
    if (!isset($str[0])) {
3703
      return '';
3704
    }
3705
3706
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3707
  }
3708
3709
  /**
3710
   * Format a number with grouped thousands.
3711
   *
3712
   * @param float  $number
3713
   * @param int    $decimals
3714
   * @param string $dec_point
3715
   * @param string $thousands_sep
3716
   *
3717
   * @return string
3718
   *
3719
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3720
   */
3721
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3722
  {
3723
    $thousands_sep = (string)$thousands_sep;
3724
    $dec_point = (string)$dec_point;
3725
    $number = (float)$number;
3726
3727
    if (
3728
        isset($thousands_sep[1], $dec_point[1])
3729
        &&
3730
        Bootup::is_php('5.4') === true
3731 23
    ) {
3732
      return str_replace(
3733
          array(
3734 23
              '.',
3735 1
              ',',
3736
          ),
3737
          array(
3738
              $dec_point,
3739 1
              $thousands_sep,
3740 1
          ),
3741 1
          number_format($number, $decimals, '.', ',')
3742 1
      );
3743
    }
3744 23
3745
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3746
  }
3747
3748 23
  /**
3749
   * Calculates Unicode code point of the given UTF-8 encoded character.
3750
   *
3751
   * INFO: opposite to UTF8::chr()
3752
   *
3753
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3754
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3755
   *
3756 23
   * @return int <p>
3757 23
   *             Unicode code point of the given character,<br>
3758 23
   *             0 on invalid UTF-8 byte sequence.
3759
   *             </p>
3760
   */
3761 10
  public static function ord($chr, $encoding = 'UTF-8')
3762
  {
3763 10
    // init
3764 10
    static $CHAR_CACHE = array();
3765
    $encoding = (string)$encoding;
3766 10
3767 1
    // save the original string
3768
    $chr_orig = $chr;
3769
3770 10
    if ($encoding !== 'UTF-8') {
3771 4
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3772
3773
      // check again, if it's still not UTF-8
3774 10
      /** @noinspection NotOptimalIfConditionsInspection */
3775 6
      if ($encoding !== 'UTF-8') {
3776
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3777
      }
3778 10
    }
3779
3780
    $cacheKey = $chr_orig . $encoding;
3781
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3782
      return $CHAR_CACHE[$cacheKey];
3783
    }
3784
3785
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3786
      self::checkForSupport();
3787
    }
3788
3789
    if (self::$SUPPORT['intlChar'] === true) {
3790
      $code = \IntlChar::ord($chr);
3791
      if ($code) {
3792
        return $CHAR_CACHE[$cacheKey] = $code;
3793
      }
3794
    }
3795 1
3796
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3797 1
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3798 1
    $code = $chr ? $chr[1] : 0;
3799 1
3800
    if (0xF0 <= $code && isset($chr[4])) {
3801
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3802 1
    }
3803 1
3804 1
    if (0xE0 <= $code && isset($chr[3])) {
3805
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3806
    }
3807 1
3808
    if (0xC0 <= $code && isset($chr[2])) {
3809
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3810
    }
3811
3812
    return $CHAR_CACHE[$cacheKey] = $code;
3813
  }
3814
3815 58
  /**
3816
   * Parses the string into an array (into the the second parameter).
3817
   *
3818
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3819 58
   *          if the second parameter is not set!
3820
   *
3821
   * @link http://php.net/manual/en/function.parse-str.php
3822
   *
3823
   * @param string  $str       <p>The input string.</p>
3824
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3825
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3826
   *
3827
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3828
   */
3829
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3830 1
  {
3831
    if ($cleanUtf8 === true) {
3832 1
      $str = self::clean($str);
3833 1
    }
3834
3835
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3836 1
    $return = \mb_parse_str($str, $result);
3837 1
    if ($return === false || empty($result)) {
3838 1
      return false;
3839
    }
3840
3841 1
    return true;
3842
  }
3843
3844 1
  /**
3845
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3846
   *
3847
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3848 1
   */
3849 1
  public static function pcre_utf8_support()
3850 1
  {
3851
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3852
    return (bool)@preg_match('//u', '');
3853 1
  }
3854
3855
  /**
3856 1
   * Create an array containing a range of UTF-8 characters.
3857
   *
3858
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3859
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3860 1
   *
3861
   * @return array
3862 1
   */
3863 1
  public static function range($var1, $var2)
3864 1
  {
3865 1
    if (!$var1 || !$var2) {
3866 1
      return array();
3867
    }
3868
3869 View Code Duplication
    if (ctype_digit((string)$var1)) {
3870
      $start = (int)$var1;
3871
    } elseif (ctype_xdigit($var1)) {
3872
      $start = (int)self::hex_to_int($var1);
3873
    } else {
3874
      $start = self::ord($var1);
3875
    }
3876
3877
    if (!$start) {
3878
      return array();
3879
    }
3880
3881 View Code Duplication
    if (ctype_digit((string)$var2)) {
3882
      $end = (int)$var2;
3883
    } elseif (ctype_xdigit($var2)) {
3884
      $end = (int)self::hex_to_int($var2);
3885
    } else {
3886
      $end = self::ord($var2);
3887
    }
3888 2
3889
    if (!$end) {
3890 2
      return array();
3891
    }
3892 2
3893 1
    return array_map(
3894
        array(
3895
            '\\voku\\helper\\UTF8',
3896 2
            'chr',
3897 2
        ),
3898 1
        range($start, $end)
3899 1
    );
3900
  }
3901 2
3902
  /**
3903
   * Multi decode html entity & fix urlencoded-win1252-chars.
3904 2
   *
3905
   * e.g:
3906 2
   * 'test+test'                     => 'test+test'
3907 2
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3908 2
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3909 2
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3910
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3911 2
   * 'Düsseldorf'                   => 'Düsseldorf'
3912 2
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3913 2
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3914
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3915 2
   *
3916
   * @param string $str          <p>The input string.</p>
3917 2
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3918
   *
3919
   * @return string
3920
   */
3921 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3922
  {
3923
    $str = (string)$str;
3924
3925
    if (!isset($str[0])) {
3926
      return '';
3927
    }
3928
3929
    $pattern = '/%u([0-9a-f]{3,4})/i';
3930
    if (preg_match($pattern, $str)) {
3931
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3932
    }
3933
3934
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3935
3936
    do {
3937
      $str_compare = $str;
3938
3939
      $str = self::fix_simple_utf8(
3940
          rawurldecode(
3941
              self::html_entity_decode(
3942
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3943 40
                  $flags
3944
              )
3945 40
          )
3946
      );
3947 40
3948 5
    } while ($multi_decode === true && $str_compare !== $str);
3949
3950
    return (string)$str;
3951 40
  }
3952 40
3953 5
  /**
3954 5
   * alias for "UTF8::remove_bom()"
3955
   *
3956
   * @see UTF8::remove_bom()
3957 5
   *
3958 5
   * @param string $str
3959 40
   *
3960
   * @return string
3961 40
   *
3962
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3963
   */
3964
  public static function removeBOM($str)
3965
  {
3966
    return self::remove_bom($str);
3967
  }
3968
3969
  /**
3970
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3971
   *
3972 1
   * @param string $str <p>The input string.</p>
3973
   *
3974 1
   * @return string <p>String without UTF-BOM</p>
3975 1
   */
3976 1
  public static function remove_bom($str)
3977
  {
3978 1
    $str = (string)$str;
3979
3980 1
    if (!isset($str[0])) {
3981 1
      return '';
3982 1
    }
3983 1
3984
    foreach (self::$BOM as $bomString => $bomByteLength) {
3985 1
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3986
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3987
        if ($strTmp === false) {
3988
          $strTmp = '';
3989
        }
3990
        $str = (string)$strTmp;
3991
      }
3992
    }
3993
3994
    return $str;
3995
  }
3996
3997
  /**
3998
   * Removes duplicate occurrences of a string in another string.
3999
   *
4000
   * @param string          $str  <p>The base string.</p>
4001 57
   * @param string|string[] $what <p>String to search for in the base string.</p>
4002
   *
4003
   * @return string <p>The result string with removed duplicates.</p>
4004 57
   */
4005
  public static function remove_duplicates($str, $what = ' ')
4006
  {
4007
    if (is_string($what) === true) {
4008 57
      $what = array($what);
4009 57
    }
4010 57
4011 57
    if (is_array($what) === true) {
4012
      /** @noinspection ForeachSourceInspection */
4013 57
      foreach ($what as $item) {
4014
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4015
      }
4016 57
    }
4017 57
4018
    return $str;
4019 57
  }
4020
4021
  /**
4022
   * Remove invisible characters from a string.
4023
   *
4024
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4025
   *
4026
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4027
   *
4028
   * @param string $str
4029
   * @param bool   $url_encoded
4030
   * @param string $replacement
4031 57
   *
4032
   * @return string
4033 57
   */
4034
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4035 57
  {
4036 4
    // init
4037
    $non_displayables = array();
4038
4039 57
    // every control character except newline (dec 10),
4040 57
    // carriage return (dec 13) and horizontal tab (dec 09)
4041 57
    if ($url_encoded) {
4042 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4043 57
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4044
    }
4045 57
4046
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4047
4048
    do {
4049 57
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4050 57
    } while ($count !== 0);
4051
4052 57
    return $str;
4053 57
  }
4054 57
4055
  /**
4056 57
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4057
   *
4058 57
   * @param string $str                <p>The input string</p>
4059 57
   * @param string $replacementChar    <p>The replacement character.</p>
4060 57
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4061
   *
4062 57
   * @return string
4063 57
   */
4064 57
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4065
  {
4066 57
    $str = (string)$str;
4067
4068
    if (!isset($str[0])) {
4069
      return '';
4070
    }
4071
4072
    if ($processInvalidUtf8 === true) {
4073
      $replacementCharHelper = $replacementChar;
4074
      if ($replacementChar === '') {
4075
        $replacementCharHelper = 'none';
4076
      }
4077 23
4078
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4079 23
        self::checkForSupport();
4080
      }
4081 23
4082 5
      $save = \mb_substitute_character();
4083
      \mb_substitute_character($replacementCharHelper);
4084
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4085
      \mb_substitute_character($save);
4086 19
    }
4087 3
4088
    return str_replace(
4089
        array(
4090 18
            "\xEF\xBF\xBD",
4091
            '�',
4092
        ),
4093
        array(
4094
            $replacementChar,
4095
            $replacementChar,
4096
        ),
4097
        $str
4098
    );
4099
  }
4100
4101 60
  /**
4102
   * Strip whitespace or other characters from end of a UTF-8 string.
4103 60
   *
4104
   * @param string $str   <p>The string to be trimmed.</p>
4105 60
   * @param string $chars <p>Optional characters to be stripped.</p>
4106
   *
4107 60
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4108 48
   */
4109 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4110
  {
4111
    $str = (string)$str;
4112 19
4113
    if (!isset($str[0])) {
4114
      return '';
4115 19
    }
4116 18
4117
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4118 18
    if ($chars === INF || !$chars) {
4119 18
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4120 18
    }
4121 2
4122 2
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4123
  }
4124
4125 19
  /**
4126
   * rxClass
4127 19
   *
4128 19
   * @param string $s
4129 19
   * @param string $class
4130
   *
4131 19
   * @return string
4132 19
   */
4133 19
  private static function rxClass($s, $class = '')
4134
  {
4135
    static $RX_CLASSS_CACHE = array();
4136
4137 19
    $cacheKey = $s . $class;
4138
4139 19
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4140
      return $RX_CLASSS_CACHE[$cacheKey];
4141
    }
4142
4143
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4144
    $class = array($class);
4145
4146
    /** @noinspection SuspiciousLoopInspection */
4147
    foreach (self::str_split($s) as $s) {
4148
      if ('-' === $s) {
4149
        $class[0] = '-' . $class[0];
4150
      } elseif (!isset($s[2])) {
4151
        $class[0] .= preg_quote($s, '/');
4152
      } elseif (1 === self::strlen($s)) {
4153
        $class[0] .= $s;
4154
      } else {
4155
        $class[] = $s;
4156
      }
4157
    }
4158
4159
    if ($class[0]) {
4160
      $class[0] = '[' . $class[0] . ']';
4161
    }
4162
4163
    if (1 === count($class)) {
4164
      $return = $class[0];
4165 1
    } else {
4166
      $return = '(?:' . implode('|', $class) . ')';
4167 1
    }
4168
4169 1
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4170 1
4171
    return $return;
4172
  }
4173
4174
  /**
4175 1
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4176 1
   */
4177 1
  public static function showSupport()
4178 1
  {
4179
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4180
      self::checkForSupport();
4181 1
    }
4182
4183
    foreach (self::$SUPPORT as $utf8Support) {
4184
      echo $utf8Support . "\n<br>";
4185 1
    }
4186
  }
4187
4188
  /**
4189
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4190
   *
4191
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4192
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4193
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4194
   *
4195
   * @return string <p>The HTML numbered entity.</p>
4196
   */
4197 39
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4198
  {
4199 39
    $char = (string)$char;
4200
4201 39
    if (!isset($char[0])) {
4202 3
      return '';
4203
    }
4204
4205
    if (
4206 38
        $keepAsciiChars === true
4207
        &&
4208 38
        self::is_ascii($char) === true
4209
    ) {
4210
      return $char;
4211
    }
4212 38
4213 7
    if ($encoding !== 'UTF-8') {
4214 7
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4215
    }
4216 38
4217
    return '&#' . self::ord($char, $encoding) . ';';
4218 38
  }
4219 38
4220 38
  /**
4221 38
   * Convert a string to an array of Unicode characters.
4222 38
   *
4223
   * @param string  $str       <p>The string to split into array.</p>
4224 38
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4225
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4226
   *
4227
   * @return string[] <p>An array containing chunks of the string.</p>
4228
   */
4229
  public static function split($str, $length = 1, $cleanUtf8 = false)
4230
  {
4231
    $str = (string)$str;
4232
4233
    if (!isset($str[0])) {
4234
      return array();
4235
    }
4236
4237
    // init
4238
    $ret = array();
4239
4240
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4241
      self::checkForSupport();
4242
    }
4243
4244
    if ($cleanUtf8 === true) {
4245
      $str = self::clean($str);
4246
    }
4247
4248
    if (self::$SUPPORT['pcre_utf8'] === true) {
4249
4250
      preg_match_all('/./us', $str, $retArray);
4251
      if (isset($retArray[0])) {
4252
        $ret = $retArray[0];
4253
      }
4254
      unset($retArray);
4255
4256
    } else {
4257
4258
      // fallback
4259
4260
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4261
        self::checkForSupport();
4262
      }
4263
4264 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4265
        $len = \mb_strlen($str, '8BIT');
4266
      } else {
4267
        $len = strlen($str);
4268
      }
4269
4270
      /** @noinspection ForeachInvariantsInspection */
4271
      for ($i = 0; $i < $len; $i++) {
4272
4273
        if (($str[$i] & "\x80") === "\x00") {
4274
4275
          $ret[] = $str[$i];
4276
4277
        } elseif (
4278
            isset($str[$i + 1])
4279
            &&
4280
            ($str[$i] & "\xE0") === "\xC0"
4281
        ) {
4282
4283
          if (($str[$i + 1] & "\xC0") === "\x80") {
4284
            $ret[] = $str[$i] . $str[$i + 1];
4285
4286
            $i++;
4287
          }
4288
4289 View Code Duplication
        } elseif (
4290
            isset($str[$i + 2])
4291
            &&
4292
            ($str[$i] & "\xF0") === "\xE0"
4293
        ) {
4294
4295 38
          if (
4296 5
              ($str[$i + 1] & "\xC0") === "\x80"
4297
              &&
4298 5
              ($str[$i + 2] & "\xC0") === "\x80"
4299
          ) {
4300 5
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4301 5
4302 5
            $i += 2;
4303
          }
4304
4305
        } elseif (
4306 34
            isset($str[$i + 3])
4307
            &&
4308
            ($str[$i] & "\xF8") === "\xF0"
4309
        ) {
4310 34
4311 View Code Duplication
          if (
4312
              ($str[$i + 1] & "\xC0") === "\x80"
4313
              &&
4314
              ($str[$i + 2] & "\xC0") === "\x80"
4315
              &&
4316
              ($str[$i + 3] & "\xC0") === "\x80"
4317
          ) {
4318
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4319
4320
            $i += 3;
4321
          }
4322
4323 12
        }
4324
      }
4325
    }
4326
4327
    if ($length > 1) {
4328
      $ret = array_chunk($ret, $length);
4329 12
4330
      return array_map(
4331 3
          function ($item) {
4332 1
            return implode('', $item);
4333
          }, $ret
4334
      );
4335 3
    }
4336 1
4337
    if (isset($ret[0]) && $ret[0] === '') {
4338
      return array();
4339 2
    }
4340
4341
    return $ret;
4342
  }
4343 2
4344
  /**
4345
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4346
   *
4347 2
   * @param string $str <p>The input string.</p>
4348
   *
4349
   * @return false|string <p>
4350
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4351
   *                      otherwise it will return false.
4352
   *                      </p>
4353 12
   */
4354 3
  public static function str_detect_encoding($str)
4355
  {
4356
    //
4357
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4358
    //
4359
4360
    if (self::is_binary($str) === true) {
4361 12
4362 9
      if (self::is_utf16($str) === 1) {
4363
        return 'UTF-16LE';
4364
      }
4365
4366
      if (self::is_utf16($str) === 2) {
4367
        return 'UTF-16BE';
4368
      }
4369
4370
      if (self::is_utf32($str) === 1) {
4371 7
        return 'UTF-32LE';
4372 7
      }
4373 7
4374 7
      if (self::is_utf32($str) === 2) {
4375 7
        return 'UTF-32BE';
4376 7
      }
4377 7
4378 7
    }
4379 7
4380 7
    //
4381 7
    // 2.) simple check for ASCII chars
4382 7
    //
4383 7
4384 7
    if (self::is_ascii($str) === true) {
4385 7
      return 'ASCII';
4386 7
    }
4387 7
4388 7
    //
4389 7
    // 3.) simple check for UTF-8 chars
4390 7
    //
4391 7
4392
    if (self::is_utf8($str) === true) {
4393 7
      return 'UTF-8';
4394 7
    }
4395 7
4396
    //
4397
    // 4.) check via "\mb_detect_encoding()"
4398
    //
4399
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4400
4401
    $detectOrder = array(
4402
        'ISO-8859-1',
4403
        'ISO-8859-2',
4404
        'ISO-8859-3',
4405
        'ISO-8859-4',
4406
        'ISO-8859-5',
4407
        'ISO-8859-6',
4408
        'ISO-8859-7',
4409
        'ISO-8859-8',
4410
        'ISO-8859-9',
4411
        'ISO-8859-10',
4412
        'ISO-8859-13',
4413
        'ISO-8859-14',
4414
        'ISO-8859-15',
4415
        'ISO-8859-16',
4416
        'WINDOWS-1251',
4417
        'WINDOWS-1252',
4418
        'WINDOWS-1254',
4419
        'ISO-2022-JP',
4420
        'JIS',
4421
        'EUC-JP',
4422 2
    );
4423
4424 2
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4425 2
    if ($encoding) {
4426
      return $encoding;
4427 2
    }
4428 1
4429
    //
4430
    // 5.) check via "iconv()"
4431 2
    //
4432 2
4433
    $md5 = md5($str);
4434
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4435
      # INFO: //IGNORE and //TRANSLIT still throw notice
4436 2
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4437 2
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4438
        return $encodingTmp;
4439
      }
4440 2
    }
4441
4442
    return false;
4443
  }
4444
4445
  /**
4446
   * Check if the string ends with the given substring.
4447
   *
4448
   * @param string $haystack <p>The string to search in.</p>
4449
   * @param string $needle   <p>The substring to search for.</p>
4450
   *
4451 2
   * @return bool
4452
   */
4453 2
  public static function str_ends_with($haystack, $needle)
4454 2
  {
4455
    $haystack = (string)$haystack;
4456 2
    $needle = (string)$needle;
4457 1
4458
    if (!isset($haystack[0], $needle[0])) {
4459
      return false;
4460 2
    }
4461 2
4462
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4463
    if ($haystackSub === false) {
4464 2
      return false;
4465
    }
4466
4467
    if ($needle === $haystackSub) {
4468
      return true;
4469
    }
4470
4471
    return false;
4472
  }
4473
4474
  /**
4475
   * Check if the string ends with the given substring, case insensitive.
4476
   *
4477
   * @param string $haystack <p>The string to search in.</p>
4478
   * @param string $needle   <p>The substring to search for.</p>
4479
   *
4480
   * @return bool
4481
   */
4482 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4483
  {
4484
    $haystack = (string)$haystack;
4485
    $needle = (string)$needle;
4486
4487
    if (!isset($haystack[0], $needle[0])) {
4488
      return false;
4489
    }
4490
4491
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4492 26
      return true;
4493
    }
4494 26
4495
    return false;
4496
  }
4497 26
4498 26
  /**
4499 2
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4500 2
   *
4501 24
   * @link  http://php.net/manual/en/function.str-ireplace.php
4502
   *
4503 26
   * @param mixed $search  <p>
4504
   *                       Every replacement with search array is
4505 26
   *                       performed on the result of previous replacement.
4506 26
   *                       </p>
4507
   * @param mixed $replace <p>
4508 26
   *                       </p>
4509
   * @param mixed $subject <p>
4510
   *                       If subject is an array, then the search and
4511
   *                       replace is performed with every entry of
4512
   *                       subject, and the return value is an array as
4513
   *                       well.
4514
   *                       </p>
4515
   * @param int   $count   [optional] <p>
4516
   *                       The number of matched and replaced needles will
4517
   *                       be returned in count which is passed by
4518
   *                       reference.
4519 2
   *                       </p>
4520
   *
4521 2
   * @return mixed <p>A string or an array of replacements.</p>
4522 2
   */
4523
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4524 2
  {
4525 1
    $search = (array)$search;
4526
4527
    /** @noinspection AlterInForeachInspection */
4528 2
    foreach ($search as &$s) {
4529 2
      if ('' === $s .= '') {
4530
        $s = '/^(?<=.)$/';
4531
      } else {
4532 2
        $s = '/' . preg_quote($s, '/') . '/ui';
4533
      }
4534
    }
4535
4536
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4537
    $count = $replace; // used as reference parameter
4538
4539
    return $subject;
4540
  }
4541
4542
  /**
4543
   * Check if the string starts with the given substring, case insensitive.
4544 1
   *
4545
   * @param string $haystack <p>The string to search in.</p>
4546 1
   * @param string $needle   <p>The substring to search for.</p>
4547
   *
4548 1
   * @return bool
4549 1
   */
4550 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4551
  {
4552 1
    $haystack = (string)$haystack;
4553
    $needle = (string)$needle;
4554 1
4555 1
    if (!isset($haystack[0], $needle[0])) {
4556
      return false;
4557
    }
4558 1
4559 1
    if (self::stripos($haystack, $needle) === 0) {
4560
      return true;
4561
    }
4562 1
4563 1
    return false;
4564 1
  }
4565 1
4566
  /**
4567 1
   * Limit the number of characters in a string, but also after the next word.
4568 1
   *
4569 1
   * @param string $str
4570 1
   * @param int    $length
4571
   * @param string $strAddOn
4572
   *
4573 1
   * @return string
4574
   */
4575
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4576
  {
4577
    $str = (string)$str;
4578
4579
    if (!isset($str[0])) {
4580
      return '';
4581
    }
4582
4583
    $length = (int)$length;
4584
4585
    if (self::strlen($str) <= $length) {
4586
      return $str;
4587
    }
4588
4589 2
    if (self::substr($str, $length - 1, 1) === ' ') {
4590
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4591 2
    }
4592
4593
    $str = (string)self::substr($str, 0, $length);
4594 2
    $array = explode(' ', $str);
4595 2
    array_pop($array);
4596
    $new_str = implode(' ', $array);
4597 2
4598
    if ($new_str === '') {
4599 2
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4600 2
    } else {
4601
      $str = $new_str . $strAddOn;
4602 2
    }
4603
4604
    return $str;
4605 2
  }
4606 2
4607 2
  /**
4608 2
   * Pad a UTF-8 string to given length with another string.
4609 2
   *
4610
   * @param string $str        <p>The input string.</p>
4611 2
   * @param int    $pad_length <p>The length of return string.</p>
4612 2
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4613 2
   * @param int    $pad_type   [optional] <p>
4614 2
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4615 2
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4616 2
   *                           </p>
4617
   *
4618 2
   * @return string <strong>Returns the padded string</strong>
4619 2
   */
4620 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4621 2
  {
4622 2
    $str_length = self::strlen($str);
4623 2
4624
    if (
4625 2
        is_int($pad_length) === true
4626
        &&
4627
        $pad_length > 0
4628 2
        &&
4629
        $pad_length >= $str_length
4630
    ) {
4631
      $ps_length = self::strlen($pad_string);
4632
4633
      $diff = $pad_length - $str_length;
4634
4635
      switch ($pad_type) {
4636 View Code Duplication
        case STR_PAD_LEFT:
4637
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4638
          $pre = (string)self::substr($pre, 0, $diff);
4639
          $post = '';
4640
          break;
4641
4642
        case STR_PAD_BOTH:
4643
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4644
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4645
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4646
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4647
          break;
4648
4649 1
        case STR_PAD_RIGHT:
4650 View Code Duplication
        default:
4651 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4652
          $post = (string)self::substr($post, 0, $diff);
4653 1
          $pre = '';
4654
      }
4655
4656
      return $pre . $str . $post;
4657
    }
4658
4659
    return $str;
4660
  }
4661
4662
  /**
4663
   * Repeat a string.
4664
   *
4665
   * @param string $str        <p>
4666
   *                           The string to be repeated.
4667
   *                           </p>
4668
   * @param int    $multiplier <p>
4669
   *                           Number of time the input string should be
4670
   *                           repeated.
4671
   *                           </p>
4672
   *                           <p>
4673
   *                           multiplier has to be greater than or equal to 0.
4674
   *                           If the multiplier is set to 0, the function
4675
   *                           will return an empty string.
4676
   *                           </p>
4677
   *
4678
   * @return string <p>The repeated string.</p>
4679
   */
4680
  public static function str_repeat($str, $multiplier)
4681
  {
4682
    $str = self::filter($str);
4683
4684
    return str_repeat($str, $multiplier);
4685 12
  }
4686
4687 12
  /**
4688
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4689
   *
4690
   * Replace all occurrences of the search string with the replacement string
4691
   *
4692
   * @link http://php.net/manual/en/function.str-replace.php
4693
   *
4694
   * @param mixed $search  <p>
4695
   *                       The value being searched for, otherwise known as the needle.
4696
   *                       An array may be used to designate multiple needles.
4697
   *                       </p>
4698
   * @param mixed $replace <p>
4699 1
   *                       The replacement value that replaces found search
4700
   *                       values. An array may be used to designate multiple replacements.
4701 1
   *                       </p>
4702
   * @param mixed $subject <p>
4703 1
   *                       The string or array being searched and replaced on,
4704 1
   *                       otherwise known as the haystack.
4705
   *                       </p>
4706
   *                       <p>
4707 1
   *                       If subject is an array, then the search and
4708
   *                       replace is performed with every entry of
4709
   *                       subject, and the return value is an array as
4710
   *                       well.
4711
   *                       </p>
4712
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4713
   *
4714
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4715
   */
4716
  public static function str_replace($search, $replace, $subject, &$count = null)
4717 1
  {
4718
    return str_replace($search, $replace, $subject, $count);
4719 1
  }
4720
4721 1
  /**
4722
   * Replace the first "$search"-term with the "$replace"-term.
4723 1
   *
4724
   * @param string $search
4725
   * @param string $replace
4726
   * @param string $subject
4727
   *
4728
   * @return string
4729
   */
4730
  public static function str_replace_first($search, $replace, $subject)
4731
  {
4732
    $pos = self::strpos($subject, $search);
4733
4734
    if ($pos !== false) {
4735 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4736
    }
4737 1
4738
    return $subject;
4739 1
  }
4740 1
4741 1
  /**
4742
   * Shuffles all the characters in the string.
4743 1
   *
4744 1
   * @param string $str <p>The input string</p>
4745 1
   *
4746 1
   * @return string <p>The shuffled string.</p>
4747
   */
4748
  public static function str_shuffle($str)
4749 1
  {
4750
    $array = self::split($str);
4751
4752
    shuffle($array);
4753
4754
    return implode('', $array);
4755
  }
4756
4757
  /**
4758
   * Sort all characters according to code points.
4759
   *
4760 22
   * @param string $str    <p>A UTF-8 string.</p>
4761
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4762 22
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4763
   *
4764 22
   * @return string <p>String of sorted characters.</p>
4765 1
   */
4766
  public static function str_sort($str, $unique = false, $desc = false)
4767
  {
4768 21
    $array = self::codepoints($str);
4769
4770 21
    if ($unique) {
4771
      $array = array_flip(array_flip($array));
4772
    }
4773
4774
    if ($desc) {
4775 21
      arsort($array);
4776 21
    } else {
4777
      asort($array);
4778 21
    }
4779 21
4780
    return self::string($array);
4781
  }
4782 1
4783 1
  /**
4784
   * Split a string into an array.
4785
   *
4786 1
   * @param string $str
4787 1
   * @param int    $len
4788 1
   *
4789 1
   * @return array
4790 1
   */
4791
  public static function str_split($str, $len = 1)
4792 1
  {
4793
    $str = (string)$str;
4794 1
4795
    if (!isset($str[0])) {
4796
      return array();
4797
    }
4798
4799
    $len = (int)$len;
4800
4801
    if ($len < 1) {
4802
      return str_split($str, $len);
4803
    }
4804
4805 2
    /** @noinspection PhpInternalEntityUsedInspection */
4806
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4807 2
    $a = $a[0];
4808 2
4809
    if ($len === 1) {
4810 2
      return $a;
4811 1
    }
4812
4813
    $arrayOutput = array();
4814 2
    $p = -1;
4815 2
4816
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4817
    foreach ($a as $l => $a) {
4818 2
      if ($l % $len) {
4819
        $arrayOutput[$p] .= $a;
4820
      } else {
4821
        $arrayOutput[++$p] = $a;
4822
      }
4823
    }
4824
4825
    return $arrayOutput;
4826
  }
4827
4828 1
  /**
4829
   * Check if the string starts with the given substring.
4830 1
   *
4831
   * @param string $haystack <p>The string to search in.</p>
4832 1
   * @param string $needle   <p>The substring to search for.</p>
4833
   *
4834 1
   * @return bool
4835
   */
4836 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4837
  {
4838
    $haystack = (string)$haystack;
4839
    $needle = (string)$needle;
4840
4841
    if (!isset($haystack[0], $needle[0])) {
4842
      return false;
4843
    }
4844
4845
    if (self::strpos($haystack, $needle) === 0) {
4846
      return true;
4847 10
    }
4848
4849 10
    return false;
4850
  }
4851 10
4852 1
  /**
4853 1
   * Get a binary representation of a specific string.
4854
   *
4855 10
   * @param string $str <p>The input string.</p>
4856 2
   *
4857
   * @return string
4858
   */
4859
  public static function str_to_binary($str)
4860 2
  {
4861
    $str = (string)$str;
4862
4863 10
    $value = unpack('H*', $str);
4864
4865 10
    return base_convert($value[1], 16, 2);
4866
  }
4867
4868
  /**
4869 10
   * Convert a string into an array of words.
4870
   *
4871 10
   * @param string   $str
4872 10
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4873
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4874
   * @param null|int $removeShortValues
4875 1
   *
4876 1
   * @return array
4877
   */
4878
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4879 1
  {
4880 1
    $str = (string)$str;
4881 1
4882 1
    if ($removeShortValues !== null) {
4883
      $removeShortValues = (int)$removeShortValues;
4884
    }
4885
4886
    if (!isset($str[0])) {
4887 1
      if ($removeEmptyValues === true) {
4888 1
        return array();
4889 1
      }
4890 1
4891
      return array('');
4892
    }
4893 1
4894 1
    $charList = self::rxClass($charList, '\pL');
4895
4896 1
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4897
4898
    if (
4899
        $removeShortValues === null
4900
        &&
4901
        $removeEmptyValues === false
4902
    ) {
4903
      return $return;
4904
    }
4905
4906
    $tmpReturn = array();
4907
    foreach ($return as $returnValue) {
4908
      if (
4909
          $removeShortValues !== null
4910 7
          &&
4911
          self::strlen($returnValue) <= $removeShortValues
4912 7
      ) {
4913
        continue;
4914
      }
4915
4916
      if (
4917
          $removeEmptyValues === true
4918
          &&
4919
          trim($returnValue) === ''
4920
      ) {
4921
        continue;
4922
      }
4923
4924
      $tmpReturn[] = $returnValue;
4925
    }
4926
4927
    return $tmpReturn;
4928 1
  }
4929
4930 1
  /**
4931
   * alias for "UTF8::to_ascii()"
4932 1
   *
4933
   * @see UTF8::to_ascii()
4934 1
   *
4935
   * @param string $str
4936 1
   * @param string $unknown
4937 1
   * @param bool   $strict
4938 1
   *
4939 1
   * @return string
4940
   */
4941 1
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4942
  {
4943 1
    return self::to_ascii($str, $unknown, $strict);
4944 1
  }
4945 1
4946 1
  /**
4947 1
   * Counts number of words in the UTF-8 string.
4948 1
   *
4949
   * @param string $str      <p>The input string.</p>
4950 1
   * @param int    $format   [optional] <p>
4951
   *                         <strong>0</strong> => return a number of words (default)<br>
4952 1
   *                         <strong>1</strong> => return an array of words<br>
4953
   *                         <strong>2</strong> => return an array of words with word-offset as key
4954
   *                         </p>
4955
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4956 1
   *
4957
   * @return array|int <p>The number of words in the string</p>
4958
   */
4959
  public static function str_word_count($str, $format = 0, $charlist = '')
4960
  {
4961
    $strParts = self::str_to_words($str, $charlist);
4962
4963
    $len = count($strParts);
4964
4965
    if ($format === 1) {
4966
4967
      $numberOfWords = array();
4968
      for ($i = 1; $i < $len; $i += 2) {
4969
        $numberOfWords[] = $strParts[$i];
4970
      }
4971
4972
    } elseif ($format === 2) {
4973 11
4974
      $numberOfWords = array();
4975 11
      $offset = self::strlen($strParts[0]);
4976
      for ($i = 1; $i < $len; $i += 2) {
4977
        $numberOfWords[$offset] = $strParts[$i];
4978
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4979
      }
4980
4981
    } else {
4982
4983
      $numberOfWords = ($len - 1) / 2;
4984
4985
    }
4986
4987
    return $numberOfWords;
4988
  }
4989
4990
  /**
4991 1
   * Case-insensitive string comparison.
4992
   *
4993 1
   * INFO: Case-insensitive version of UTF8::strcmp()
4994
   *
4995
   * @param string $str1
4996
   * @param string $str2
4997
   *
4998
   * @return int <p>
4999
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5000
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5001
   *             <strong>0</strong> if they are equal.
5002
   *             </p>
5003
   */
5004
  public static function strcasecmp($str1, $str2)
5005
  {
5006
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5007
  }
5008 14
5009
  /**
5010
   * alias for "UTF8::strstr()"
5011 14
   *
5012 13
   * @see UTF8::strstr()
5013 13
   *
5014 14
   * @param string  $haystack
5015
   * @param string  $needle
5016
   * @param bool    $before_needle
5017
   * @param string  $encoding
5018
   * @param boolean $cleanUtf8
5019
   *
5020
   * @return string|false
5021
   */
5022
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5023
  {
5024
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5025
  }
5026
5027 15
  /**
5028
   * Case-sensitive string comparison.
5029 15
   *
5030 1
   * @param string $str1
5031
   * @param string $str2
5032
   *
5033 14
   * @return int  <p>
5034 2
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5035 2
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5036
   *              <strong>0</strong> if they are equal.
5037
   *              </p>
5038 2
   */
5039 2
  public static function strcmp($str1, $str2)
5040
  {
5041 14
    /** @noinspection PhpUndefinedClassInspection */
5042 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5043 1
        \Normalizer::normalize($str1, \Normalizer::NFD),
5044
        \Normalizer::normalize($str2, \Normalizer::NFD)
5045
    );
5046 13
  }
5047
5048 13
  /**
5049
   * Find length of initial segment not matching mask.
5050
   *
5051 1
   * @param string $str
5052
   * @param string $charList
5053
   * @param int    $offset
5054
   * @param int    $length
5055
   *
5056
   * @return int|null
5057
   */
5058
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5059
  {
5060
    if ('' === $charList .= '') {
5061
      return null;
5062
    }
5063
5064 View Code Duplication
    if ($offset || $length !== null) {
5065
      $strTmp = self::substr($str, $offset, $length);
5066
      if ($strTmp === false) {
5067 1
        return null;
5068
      }
5069 1
      $str = (string)$strTmp;
5070
    }
5071
5072
    $str = (string)$str;
5073
    if (!isset($str[0])) {
5074
      return null;
5075
    }
5076
5077
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5078
      /** @noinspection OffsetOperationsInspection */
5079
      return self::strlen($length[1]);
5080
    }
5081 2
5082
    return self::strlen($str);
5083 2
  }
5084 2
5085 2
  /**
5086
   * alias for "UTF8::stristr()"
5087 2
   *
5088 2
   * @see UTF8::stristr()
5089 2
   *
5090
   * @param string  $haystack
5091 2
   * @param string  $needle
5092 2
   * @param bool    $before_needle
5093
   * @param string  $encoding
5094
   * @param boolean $cleanUtf8
5095
   *
5096
   * @return string|false
5097
   */
5098
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5099
  {
5100
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5101
  }
5102 3
5103
  /**
5104 3
   * Create a UTF-8 string from code points.
5105 3
   *
5106 3
   * INFO: opposite to UTF8::codepoints()
5107
   *
5108 3
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5109
   *
5110 3
   * @return string <p>UTF-8 encoded string.</p>
5111
   */
5112
  public static function string(array $array)
5113
  {
5114
    return implode(
5115
        '',
5116
        array_map(
5117
            array(
5118
                '\\voku\\helper\\UTF8',
5119
                'chr',
5120
            ),
5121
            $array
5122
        )
5123
    );
5124
  }
5125
5126
  /**
5127
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5128
   *
5129
   * @param string $str <p>The input string.</p>
5130
   *
5131
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5132
   */
5133 2
  public static function string_has_bom($str)
5134
  {
5135 2
    foreach (self::$BOM as $bomString => $bomByteLength) {
5136
      if (0 === strpos($str, $bomString)) {
5137 2
        return true;
5138 1
      }
5139
    }
5140
5141 2
    return false;
5142 1
  }
5143 1
5144
  /**
5145 2
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5146
   *
5147
   * @link http://php.net/manual/en/function.strip-tags.php
5148
   *
5149
   * @param string  $str            <p>
5150
   *                                The input string.
5151
   *                                </p>
5152
   * @param string  $allowable_tags [optional] <p>
5153
   *                                You can use the optional second parameter to specify tags which should
5154
   *                                not be stripped.
5155
   *                                </p>
5156
   *                                <p>
5157
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5158
   *                                can not be changed with allowable_tags.
5159
   *                                </p>
5160
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5161
   *
5162
   * @return string <p>The stripped string.</p>
5163
   */
5164 10 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5165
  {
5166 10
    $str = (string)$str;
5167 10
5168 10
    if (!isset($str[0])) {
5169
      return '';
5170 10
    }
5171 3
5172
    if ($cleanUtf8 === true) {
5173
      $str = self::clean($str);
5174 9
    }
5175
5176
    return strip_tags($str, $allowable_tags);
5177 1
  }
5178 1
5179 1
  /**
5180
   * Finds position of first occurrence of a string within another, case insensitive.
5181
   *
5182
   * @link http://php.net/manual/en/function.mb-stripos.php
5183 9
   *
5184 2
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5185 9
   * @param string  $needle    <p>The string to find in haystack.</p>
5186 9
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5187 9
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5188 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5189
   *
5190
   * @return int|false <p>
5191 9
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5192
   *                   or false if needle is not found.
5193
   *                   </p>
5194
   */
5195
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5196
  {
5197 9
    $haystack = (string)$haystack;
5198 9
    $needle = (string)$needle;
5199 9
    $offset = (int)$offset;
5200 9
5201 9
    if (!isset($haystack[0], $needle[0])) {
5202
      return false;
5203
    }
5204
5205
    if ($cleanUtf8 === true) {
5206 9
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5207
      // if invalid characters are found in $haystack before $needle
5208
      $haystack = self::clean($haystack);
5209
      $needle = self::clean($needle);
5210
    }
5211
5212 View Code Duplication
    if (
5213
        $encoding === 'UTF-8'
5214
        ||
5215
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5216
    ) {
5217
      $encoding = 'UTF-8';
5218
    } else {
5219
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5220
    }
5221
5222
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5223 17
      self::checkForSupport();
5224
    }
5225 17
5226 17
    if (
5227 17
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5228
        &&
5229 17
        self::$SUPPORT['intl'] === true
5230 6
        &&
5231
        Bootup::is_php('5.4') === true
5232
    ) {
5233 11
      return \grapheme_stripos($haystack, $needle, $offset);
5234 1
    }
5235 1
5236
    // fallback to "mb_"-function via polyfill
5237 11
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5238
  }
5239
5240 1
  /**
5241 1
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5242 1
   *
5243
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5244 11
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5245
   * @param bool    $before_needle [optional] <p>
5246
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5247
   *                               haystack before the first occurrence of the needle (excluding the needle).
5248 11
   *                               </p>
5249
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5250
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5251
   *
5252
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5253
   */
5254 11
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5255 1
  {
5256 11
    $haystack = (string)$haystack;
5257
    $needle = (string)$needle;
5258
    $before_needle = (bool)$before_needle;
5259
5260 11
    if (!isset($haystack[0], $needle[0])) {
5261 11
      return false;
5262
    }
5263
5264
    if ($encoding !== 'UTF-8') {
5265
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5266
    }
5267
5268
    if ($cleanUtf8 === true) {
5269
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5270
      // if invalid characters are found in $haystack before $needle
5271
      $needle = self::clean($needle);
5272
      $haystack = self::clean($haystack);
5273
    }
5274
5275
    if (!$needle) {
5276
      return $haystack;
5277
    }
5278
5279
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5280
      self::checkForSupport();
5281
    }
5282
5283 View Code Duplication
    if (
5284
        $encoding !== 'UTF-8'
5285
        &&
5286
        self::$SUPPORT['mbstring'] === false
5287
    ) {
5288
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5289
    }
5290
5291
    if (self::$SUPPORT['mbstring'] === true) {
5292
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5293
    }
5294
5295
    if (
5296
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5297
        &&
5298
        self::$SUPPORT['intl'] === true
5299
        &&
5300
        Bootup::is_php('5.4') === true
5301
    ) {
5302
      return \grapheme_stristr($haystack, $needle, $before_needle);
5303 88
    }
5304
5305 88
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
5306
      return stristr($haystack, $needle, $before_needle);
5307 88
    }
5308 5
5309
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5310
5311
    if (!isset($match[1])) {
5312
      return false;
5313 87
    }
5314 13
5315 87
    if ($before_needle) {
5316 78
      return $match[1];
5317 78
    }
5318 12
5319
    return self::substr($haystack, self::strlen($match[1]));
5320
  }
5321 87
5322
  /**
5323
   * Get the string length, not the byte-length!
5324
   *
5325
   * @link     http://php.net/manual/en/function.mb-strlen.php
5326 87
   *
5327 87
   * @param string  $str       <p>The string being checked for length.</p>
5328
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5329
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5330 10
   *
5331 10
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5332 10
   *             character counted as +1)</p>
5333 10
   */
5334
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5335
  {
5336
    $str = (string)$str;
5337
5338
    if (!isset($str[0])) {
5339 79
      return 0;
5340
    }
5341
5342 2 View Code Duplication
    if (
5343 2
        $encoding === 'UTF-8'
5344
        ||
5345
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5346
    ) {
5347 79
      $encoding = 'UTF-8';
5348 2
    } else {
5349 79
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5350
    }
5351 79
5352
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5353
      self::checkForSupport();
5354
    }
5355
5356
    switch ($encoding) {
5357 79
      case 'ASCII':
5358 2
      case 'CP850':
5359 79
      case '8BIT':
5360 2
        if (
5361 79
            $encoding === 'CP850'
5362
            &&
5363
            self::$SUPPORT['mbstring_func_overload'] === false
5364
        ) {
5365 79
          return strlen($str);
5366 79
        }
5367
5368
        return \mb_strlen($str, '8BIT');
5369
    }
5370
5371
    if ($cleanUtf8 === true) {
5372
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5373
      // if invalid characters are found in $str
5374
      $str = self::clean($str);
5375
    }
5376
5377 View Code Duplication
    if (
5378
        $encoding !== 'UTF-8'
5379
        &&
5380
        self::$SUPPORT['mbstring'] === false
5381
        &&
5382
        self::$SUPPORT['iconv'] === false
5383
    ) {
5384
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5385
    }
5386
5387
    if (
5388
        $encoding !== 'UTF-8'
5389
        &&
5390
        self::$SUPPORT['iconv'] === true
5391
        &&
5392
        self::$SUPPORT['mbstring'] === false
5393
    ) {
5394
      return \iconv_strlen($str, $encoding);
5395
    }
5396
5397
    if (self::$SUPPORT['mbstring'] === true) {
5398
      return \mb_strlen($str, $encoding);
5399
    }
5400
5401
    if (self::$SUPPORT['iconv'] === true) {
5402
      return \iconv_strlen($str, $encoding);
5403
    }
5404
5405
    if (
5406
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5407
        &&
5408
        self::$SUPPORT['intl'] === true
5409
        &&
5410 1
        Bootup::is_php('5.4') === true
5411
    ) {
5412 1
      return \grapheme_strlen($str);
5413
    }
5414
5415
    if (self::is_ascii($str)) {
5416
      return strlen($str);
5417
    }
5418
5419
    // fallback via vanilla php
5420
    preg_match_all('/./us', $str, $parts);
5421
    $returnTmp = count($parts[0]);
5422
    if ($returnTmp !== 0) {
5423
      return $returnTmp;
5424
    }
5425
5426
    // fallback to "mb_"-function via polyfill
5427
    return \mb_strlen($str, $encoding);
5428
  }
5429 2
5430
  /**
5431 2
   * Case insensitive string comparisons using a "natural order" algorithm.
5432
   *
5433
   * INFO: natural order version of UTF8::strcasecmp()
5434
   *
5435
   * @param string $str1 <p>The first string.</p>
5436
   * @param string $str2 <p>The second string.</p>
5437
   *
5438
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5439
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5440
   *             <strong>0</strong> if they are equal
5441
   */
5442
  public static function strnatcasecmp($str1, $str2)
5443
  {
5444
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5445
  }
5446
5447 1
  /**
5448
   * String comparisons using a "natural order" algorithm
5449 1
   *
5450
   * INFO: natural order version of UTF8::strcmp()
5451
   *
5452
   * @link  http://php.net/manual/en/function.strnatcmp.php
5453
   *
5454
   * @param string $str1 <p>The first string.</p>
5455
   * @param string $str2 <p>The second string.</p>
5456
   *
5457
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5458
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5459
   *             <strong>0</strong> if they are equal
5460
   */
5461
  public static function strnatcmp($str1, $str2)
5462
  {
5463
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5464
  }
5465 2
5466
  /**
5467 2
   * Case-insensitive string comparison of the first n characters.
5468 2
   *
5469
   * @link  http://php.net/manual/en/function.strncasecmp.php
5470 2
   *
5471
   * @param string $str1 <p>The first string.</p>
5472
   * @param string $str2 <p>The second string.</p>
5473
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5474
   *
5475
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5476
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5477
   *             <strong>0</strong> if they are equal
5478
   */
5479
  public static function strncasecmp($str1, $str2, $len)
5480
  {
5481
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5482
  }
5483 1
5484
  /**
5485 1
   * String comparison of the first n characters.
5486 1
   *
5487
   * @link  http://php.net/manual/en/function.strncmp.php
5488 1
   *
5489 1
   * @param string $str1 <p>The first string.</p>
5490
   * @param string $str2 <p>The second string.</p>
5491
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5492 1
   *
5493 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5494
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5495
   *             <strong>0</strong> if they are equal
5496 1
   */
5497
  public static function strncmp($str1, $str2, $len)
5498
  {
5499
    $str1 = (string)self::substr($str1, 0, $len);
5500
    $str2 = (string)self::substr($str2, 0, $len);
5501
5502
    return self::strcmp($str1, $str2);
5503
  }
5504
5505
  /**
5506
   * Search a string for any of a set of characters.
5507
   *
5508
   * @link  http://php.net/manual/en/function.strpbrk.php
5509
   *
5510
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5511
   * @param string $char_list <p>This parameter is case sensitive.</p>
5512
   *
5513
   * @return string String starting from the character found, or false if it is not found.
5514
   */
5515 58
  public static function strpbrk($haystack, $char_list)
5516
  {
5517 58
    $haystack = (string)$haystack;
5518 58
    $char_list = (string)$char_list;
5519
5520 58
    if (!isset($haystack[0], $char_list[0])) {
5521 3
      return false;
5522
    }
5523
5524
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5525 57
      return substr($haystack, strpos($haystack, $m[0]));
5526
    }
5527
5528
    return false;
5529 57
  }
5530
5531
  /**
5532
   * Find position of first occurrence of string in a string.
5533 57
   *
5534
   * @link http://php.net/manual/en/function.mb-strpos.php
5535
   *
5536 2
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5537 2
   * @param string  $needle    <p>The string to find in haystack.</p>
5538 2
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5539
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5540
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5541
   *
5542 57
   * @return int|false <p>
5543 42
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5544 57
   *                   If needle is not found it returns false.
5545 17
   *                   </p>
5546 17
   */
5547 41
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5548
  {
5549
    $haystack = (string)$haystack;
5550 57
    $needle = (string)$needle;
5551
5552
    if (!isset($haystack[0], $needle[0])) {
5553
      return false;
5554
    }
5555
5556 57
    // init
5557 41
    $offset = (int)$offset;
5558 57
5559 41
    // iconv and mbstring do not support integer $needle
5560
5561
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5562
      $needle = (string)self::chr($needle);
5563
    }
5564 17
5565 17
    if ($cleanUtf8 === true) {
5566 17
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5567 1
      // if invalid characters are found in $haystack before $needle
5568 17
      $needle = self::clean($needle);
5569
      $haystack = self::clean($haystack);
5570
    }
5571
5572 View Code Duplication
    if (
5573
        $encoding === 'UTF-8'
5574 17
        ||
5575
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5576 17
    ) {
5577 1
      $encoding = 'UTF-8';
5578 17
    } else {
5579
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5580 17
    }
5581
5582
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5583
      self::checkForSupport();
5584
    }
5585
5586 17
    if (
5587 17
        $encoding === 'CP850'
5588
        &&
5589
        self::$SUPPORT['mbstring_func_overload'] === false
5590
    ) {
5591
      return strpos($haystack, $needle, $offset);
5592
    }
5593
5594 View Code Duplication
    if (
5595
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5596
        &
5597
        self::$SUPPORT['iconv'] === true
5598
        &&
5599
        self::$SUPPORT['mbstring'] === false
5600
    ) {
5601
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5602
    }
5603
5604
    if (
5605
        $offset >= 0 // iconv_strpos() can't handle negative offset
5606
        &&
5607
        $encoding !== 'UTF-8'
5608
        &&
5609
        self::$SUPPORT['mbstring'] === false
5610
        &&
5611
        self::$SUPPORT['iconv'] === true
5612
    ) {
5613
      // ignore invalid negative offset to keep compatibility
5614
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5615
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5616
    }
5617
5618
    if (self::$SUPPORT['mbstring'] === true) {
5619
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5620
    }
5621
5622
    if (
5623
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5624
        &&
5625
        self::$SUPPORT['intl'] === true
5626
        &&
5627
        Bootup::is_php('5.4') === true
5628
    ) {
5629
      return \grapheme_strpos($haystack, $needle, $offset);
5630
    }
5631
5632
    if (
5633
        $offset >= 0 // iconv_strpos() can't handle negative offset
5634
        &&
5635
        self::$SUPPORT['iconv'] === true
5636
    ) {
5637
      // ignore invalid negative offset to keep compatibility
5638
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5639
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5640
    }
5641
5642
    $haystackIsAscii = self::is_ascii($haystack);
5643
    if ($haystackIsAscii && self::is_ascii($needle)) {
5644
      return strpos($haystack, $needle, $offset);
5645
    }
5646
5647
    // fallback via vanilla php
5648
5649
    if ($haystackIsAscii) {
5650
      $haystackTmp = substr($haystack, $offset);
5651
    } else {
5652
      $haystackTmp = self::substr($haystack, $offset);
5653
    }
5654
    if ($haystackTmp === false) {
5655
      $haystackTmp = '';
5656
    }
5657
    $haystack = (string)$haystackTmp;
5658
5659
    if ($offset < 0) {
5660
      $offset = 0;
5661
    }
5662
5663
    $pos = strpos($haystack, $needle);
5664
    if ($pos === false) {
5665
      return false;
5666
    }
5667
5668 1
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5669
    if ($returnTmp !== false) {
5670 1
      return $returnTmp;
5671 1
    }
5672 1
5673
    // fallback to "mb_"-function via polyfill
5674 1
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5675
  }
5676
5677
  /**
5678
   * Finds the last occurrence of a character in a string within another.
5679
   *
5680
   * @link http://php.net/manual/en/function.mb-strrchr.php
5681
   *
5682 1
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5683
   * @param string $needle        <p>The string to find in haystack</p>
5684
   * @param bool   $before_needle [optional] <p>
5685
   *                              Determines which portion of haystack
5686
   *                              this function returns.
5687
   *                              If set to true, it returns all of haystack
5688
   *                              from the beginning to the last occurrence of needle.
5689
   *                              If set to false, it returns all of haystack
5690
   *                              from the last occurrence of needle to the end,
5691
   *                              </p>
5692 4
   * @param string $encoding      [optional] <p>
5693
   *                              Character encoding name to use.
5694 4
   *                              If it is omitted, internal character encoding is used.
5695
   *                              </p>
5696 4
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5697 2
   *
5698
   * @return string|false The portion of haystack or false if needle is not found.
5699
   */
5700 3 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5701
  {
5702
    if ($encoding !== 'UTF-8') {
5703
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5704
    }
5705
5706
    if ($cleanUtf8 === true) {
5707
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5708
      // if invalid characters are found in $haystack before $needle
5709
      $needle = self::clean($needle);
5710
      $haystack = self::clean($haystack);
5711
    }
5712
5713
    // fallback to "mb_"-function via polyfill
5714
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5715
  }
5716
5717
  /**
5718
   * Reverses characters order in the string.
5719
   *
5720
   * @param string $str The input string
5721
   *
5722
   * @return string The string with characters in the reverse sequence
5723
   */
5724
  public static function strrev($str)
5725
  {
5726 1
    $str = (string)$str;
5727
5728 1
    if (!isset($str[0])) {
5729 1
      return '';
5730 1
    }
5731
5732 1
    return implode('', array_reverse(self::split($str)));
5733
  }
5734
5735
  /**
5736
   * Finds the last occurrence of a character in a string within another, case insensitive.
5737
   *
5738
   * @link http://php.net/manual/en/function.mb-strrichr.php
5739 1
   *
5740
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5741
   * @param string  $needle        <p>The string to find in haystack.</p>
5742
   * @param bool    $before_needle [optional] <p>
5743
   *                               Determines which portion of haystack
5744
   *                               this function returns.
5745
   *                               If set to true, it returns all of haystack
5746
   *                               from the beginning to the last occurrence of needle.
5747
   *                               If set to false, it returns all of haystack
5748
   *                               from the last occurrence of needle to the end,
5749
   *                               </p>
5750
   * @param string  $encoding      [optional] <p>
5751
   *                               Character encoding name to use.
5752
   *                               If it is omitted, internal character encoding is used.
5753
   *                               </p>
5754
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5755
   *
5756 1
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5757
   */
5758 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5759
  {
5760
    if ($encoding !== 'UTF-8') {
5761
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5762
    }
5763 1
5764 1
    if ($cleanUtf8 === true) {
5765 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5766
      // if invalid characters are found in $haystack before $needle
5767 1
      $needle = self::clean($needle);
5768
      $haystack = self::clean($haystack);
5769
    }
5770
5771
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5772
  }
5773 1
5774
  /**
5775 1
   * Find position of last occurrence of a case-insensitive string.
5776
   *
5777
   * @param string  $haystack  <p>The string to look in.</p>
5778
   * @param string  $needle    <p>The string to look for.</p>
5779
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5780
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5781
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5782
   *
5783
   * @return int|false <p>
5784 1
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5785
   *                   not found, it returns false.
5786 1
   *                   </p>
5787 1
   */
5788 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5789
  {
5790
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5791
      $needle = (string)self::chr($needle);
5792 1
    }
5793
5794
    // init
5795
    $haystack = (string)$haystack;
5796
    $needle = (string)$needle;
5797
    $offset = (int)$offset;
5798 1
5799
    if (!isset($haystack[0], $needle[0])) {
5800 1
      return false;
5801
    }
5802
5803 View Code Duplication
    if (
5804 1
        $cleanUtf8 === true
5805 1
        ||
5806
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5807
    ) {
5808
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5809
5810
      $needle = self::clean($needle);
5811
      $haystack = self::clean($haystack);
5812
    }
5813
5814 View Code Duplication
    if (
5815
        $encoding === 'UTF-8'
5816
        ||
5817
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5818
    ) {
5819
      $encoding = 'UTF-8';
5820
    } else {
5821
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5822
    }
5823
5824
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5825
      self::checkForSupport();
5826
    }
5827
5828 View Code Duplication
    if (
5829
        $encoding !== 'UTF-8'
5830
        &&
5831
        self::$SUPPORT['mbstring'] === false
5832
    ) {
5833
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5834
    }
5835
5836
    if (self::$SUPPORT['mbstring'] === true) {
5837
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5838
    }
5839
5840 10
    if (
5841
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5842 10
        &&
5843 2
        self::$SUPPORT['intl'] === true
5844 2
        &&
5845
        Bootup::is_php('5.4') === true
5846
    ) {
5847 10
      return \grapheme_strripos($haystack, $needle, $offset);
5848 10
    }
5849 10
5850
    // fallback via vanilla php
5851 10
5852 2
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5853
  }
5854
5855
  /**
5856
   * Find position of last occurrence of a string in a string.
5857 9
   *
5858
   * @link http://php.net/manual/en/function.mb-strrpos.php
5859 9
   *
5860
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5861 3
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5862 3
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5863 3
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5864
   *                              the end of the string.
5865
   *                              </p>
5866
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5867 9
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5868 1
   *
5869 9
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5870 9
   *                   is not found, it returns false.</p>
5871 9
   */
5872 1
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5873
  {
5874
    if ((int)$needle === $needle && $needle >= 0) {
5875 9
      $needle = (string)self::chr($needle);
5876
    }
5877
5878
    // init
5879
    $haystack = (string)$haystack;
5880
    $needle = (string)$needle;
5881 9
    $offset = (int)$offset;
5882 1
5883 9
    if (!isset($haystack[0], $needle[0])) {
5884
      return false;
5885
    }
5886
5887 9 View Code Duplication
    if (
5888 9
        $cleanUtf8 === true
5889
        ||
5890
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5891
    ) {
5892
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5893
      $needle = self::clean($needle);
5894
      $haystack = self::clean($haystack);
5895
    }
5896
5897 View Code Duplication
    if (
5898
        $encoding === 'UTF-8'
5899
        ||
5900
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5901
    ) {
5902
      $encoding = 'UTF-8';
5903
    } else {
5904
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5905
    }
5906
5907
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5908
      self::checkForSupport();
5909
    }
5910
5911 View Code Duplication
    if (
5912
        $encoding !== 'UTF-8'
5913
        &&
5914
        self::$SUPPORT['mbstring'] === false
5915
    ) {
5916
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5917
    }
5918
5919
    if (self::$SUPPORT['mbstring'] === true) {
5920
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5921
    }
5922
5923
    if (
5924
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5925
        &&
5926
        self::$SUPPORT['intl'] === true
5927
        &&
5928
        Bootup::is_php('5.4') === true
5929
    ) {
5930
      return \grapheme_strrpos($haystack, $needle, $offset);
5931
    }
5932
5933
    // fallback via vanilla php
5934
5935
    $haystackTmp = null;
5936
    if ($offset > 0) {
5937 10
      $haystackTmp = self::substr($haystack, $offset);
5938
    } elseif ($offset < 0) {
5939 10
      $haystackTmp = self::substr($haystack, 0, $offset);
5940 2
      $offset = 0;
5941 2
    }
5942
5943
    if ($haystackTmp !== null) {
5944 2
      if ($haystackTmp === false) {
5945 2
        $haystackTmp = '';
5946
      }
5947 10
      $haystack = (string)$haystackTmp;
5948 10
    }
5949 2
5950
    $pos = strrpos($haystack, $needle);
5951
    if ($pos === false) {
5952 8
      return false;
5953
    }
5954
5955
    return $offset + self::strlen(substr($haystack, 0, $pos));
5956
  }
5957
5958
  /**
5959
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5960
   * mask.
5961
   *
5962
   * @param string $str    <p>The input string.</p>
5963
   * @param string $mask   <p>The mask of chars</p>
5964
   * @param int    $offset [optional]
5965
   * @param int    $length [optional]
5966
   *
5967
   * @return int
5968
   */
5969 2
  public static function strspn($str, $mask, $offset = 0, $length = null)
5970
  {
5971 2 View Code Duplication
    if ($offset || $length !== null) {
5972 2
      $strTmp = self::substr($str, $offset, $length);
5973
      if ($strTmp === false) {
5974 2
        $strTmp = '';
5975 1
      }
5976
      $str = (string)$strTmp;
5977
    }
5978 2
5979
    $str = (string)$str;
5980
    if (!isset($str[0], $mask[0])) {
5981
      return 0;
5982
    }
5983
5984
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5985 2
  }
5986 1
5987 1
  /**
5988
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5989 2
   *
5990
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5991
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5992
   * @param bool    $before_needle [optional] <p>
5993
   *                               If <b>TRUE</b>, strstr() returns the part of the
5994
   *                               haystack before the first occurrence of the needle (excluding the needle).
5995 2
   *                               </p>
5996 1
   * @param string  $encoding      [optional] <p>Set the charset.</p>
5997 2
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5998
   *
5999
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6000
   */
6001 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6002 2
  {
6003
    $haystack = (string)$haystack;
6004
    $needle = (string)$needle;
6005
6006
    if (!isset($haystack[0], $needle[0])) {
6007
      return false;
6008
    }
6009
6010
    if ($cleanUtf8 === true) {
6011
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6012
      // if invalid characters are found in $haystack before $needle
6013
      $needle = self::clean($needle);
6014
      $haystack = self::clean($haystack);
6015
    }
6016
6017
    if ($encoding !== 'UTF-8') {
6018
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6019
    }
6020
6021
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6022
      self::checkForSupport();
6023
    }
6024
6025 View Code Duplication
    if (
6026
        $encoding !== 'UTF-8'
6027
        &&
6028
        self::$SUPPORT['mbstring'] === false
6029
    ) {
6030
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6031
    }
6032
6033
    if (self::$SUPPORT['mbstring'] === true) {
6034
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6035
    }
6036
6037
    if (
6038
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6039
        &&
6040
        self::$SUPPORT['intl'] === true
6041
        &&
6042 13
        Bootup::is_php('5.4') === true
6043
    ) {
6044
      return \grapheme_strstr($haystack, $needle, $before_needle);
6045 13
    }
6046
6047 13
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6048 4
6049
    if (!isset($match[1])) {
6050
      return false;
6051 12
    }
6052 12
6053
    if ($before_needle) {
6054 12
      return $match[1];
6055 1
    }
6056 1
6057 1
    return self::substr($haystack, self::strlen($match[1]));
6058
  }
6059 12
6060
  /**
6061 12
   * Unicode transformation for case-less matching.
6062
   *
6063 12
   * @link http://unicode.org/reports/tr21/tr21-5.html
6064
   *
6065 12
   * @param string  $str       <p>The input string.</p>
6066 1
   * @param bool    $full      [optional] <p>
6067 1
   *                           <b>true</b>, replace full case folding chars (default)<br>
6068
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6069
   *                           </p>
6070 12
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6071 12
   *
6072
   * @return string
6073 12
   */
6074 1
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6075 1
  {
6076
    // init
6077 12
    $str = (string)$str;
6078
6079
    if (!isset($str[0])) {
6080
      return '';
6081
    }
6082
6083
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6084
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6085
6086
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6087
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6088
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6089
    }
6090
6091
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6092 25
6093
    if ($full) {
6094
6095 25
      static $FULL_CASE_FOLD = null;
6096
6097 25
      if ($FULL_CASE_FOLD === null) {
6098 3
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6099
      }
6100
6101 23
      /** @noinspection OffsetOperationsInspection */
6102
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6103
    }
6104 1
6105 1
    if ($cleanUtf8 === true) {
6106
      $str = self::clean($str);
6107 23
    }
6108 2
6109 2
    return self::strtolower($str);
6110
  }
6111 23
6112
  /**
6113
   * Make a string lowercase.
6114
   *
6115
   * @link http://php.net/manual/en/function.mb-strtolower.php
6116
   *
6117
   * @param string      $str       <p>The string being lowercased.</p>
6118
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6119
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6120
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6121
   *
6122
   * @return string str with all alphabetic characters converted to lowercase.
6123
   */
6124 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6125
  {
6126
    // init
6127
    $str = (string)$str;
6128
6129
    if (!isset($str[0])) {
6130
      return '';
6131
    }
6132
6133
    if ($cleanUtf8 === true) {
6134
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6135 23
      // if invalid characters are found in $haystack before $needle
6136
      $str = self::clean($str);
6137
    }
6138
6139
    if ($encoding !== 'UTF-8') {
6140
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6141
    }
6142
6143
    if ($lang !== null) {
6144
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6145 3
        self::checkForSupport();
6146
      }
6147
6148 3
      if (
6149
          self::$SUPPORT['intl'] === true
6150
          &&
6151
          Bootup::is_php('5.4') === true
6152
      ) {
6153
6154
        $langCode = $lang . '-Lower';
6155
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6156
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6157
6158
          $langCode = 'Any-Lower';
6159
        }
6160
6161
        return transliterator_transliterate($langCode, $str);
6162
      }
6163 19
6164
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6165 19
    }
6166
6167 19
    return \mb_strtolower($str, $encoding);
6168 3
  }
6169
6170
  /**
6171 17
   * Generic case sensitive transformation for collation matching.
6172
   *
6173
   * @param string $str <p>The input string</p>
6174 1
   *
6175 1
   * @return string
6176
   */
6177 17
  private static function strtonatfold($str)
6178 2
  {
6179 2
    /** @noinspection PhpUndefinedClassInspection */
6180
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6181 17
  }
6182
6183
  /**
6184
   * Make a string uppercase.
6185
   *
6186
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6187
   *
6188
   * @param string      $str       <p>The string being uppercased.</p>
6189
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6190
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6191
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6192
   *
6193
   * @return string str with all alphabetic characters converted to uppercase.
6194
   */
6195 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6196
  {
6197
    $str = (string)$str;
6198
6199
    if (!isset($str[0])) {
6200
      return '';
6201
    }
6202
6203
    if ($cleanUtf8 === true) {
6204
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6205 17
      // if invalid characters are found in $haystack before $needle
6206
      $str = self::clean($str);
6207
    }
6208
6209
    if ($encoding !== 'UTF-8') {
6210
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6211
    }
6212
6213
    if ($lang !== null) {
6214
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6215
        self::checkForSupport();
6216
      }
6217
6218
      if (
6219
          self::$SUPPORT['intl'] === true
6220
          &&
6221
          Bootup::is_php('5.4') === true
6222 1
      ) {
6223
6224 1
        $langCode = $lang . '-Upper';
6225
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6226 1
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6227
6228
          $langCode = 'Any-Upper';
6229
        }
6230 1
6231
        return transliterator_transliterate($langCode, $str);
6232
      }
6233
6234 1
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6235 1
    }
6236 1
6237 1
    return \mb_strtoupper($str, $encoding);
6238 1
  }
6239
6240 1
  /**
6241 1
   * Translate characters or replace sub-strings.
6242 1
   *
6243 1
   * @link  http://php.net/manual/en/function.strtr.php
6244 1
   *
6245
   * @param string          $str  <p>The string being translated.</p>
6246 1
   * @param string|string[] $from <p>The string replacing from.</p>
6247 1
   * @param string|string[] $to   <p>The string being translated to to.</p>
6248
   *
6249 1
   * @return string <p>
6250 1
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6251
   *                corresponding character in to.
6252
   *                </p>
6253 1
   */
6254
  public static function strtr($str, $from, $to = INF)
6255
  {
6256
    $str = (string)$str;
6257
6258
    if (!isset($str[0])) {
6259
      return '';
6260
    }
6261
6262
    if ($from === $to) {
6263
      return $str;
6264
    }
6265 1
6266
    if (INF !== $to) {
6267 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6267 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6268 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6268 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6269 1
      $countFrom = count($from);
6270
      $countTo = count($to);
6271 1
6272
      if ($countFrom > $countTo) {
6273
        $from = array_slice($from, 0, $countTo);
6274 1
      } elseif ($countFrom < $countTo) {
6275 1
        $to = array_slice($to, 0, $countFrom);
6276
      }
6277
6278 1
      $from = array_combine($from, $to);
6279
    }
6280
6281
    if (is_string($from)) {
6282
      return str_replace($from, '', $str);
6283
    }
6284
6285
    return strtr($str, $from);
6286
  }
6287
6288
  /**
6289
   * Return the width of a string.
6290
   *
6291 1
   * @param string  $str       <p>The input string.</p>
6292
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6293 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6294
   *
6295
   * @return int
6296
   */
6297
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6298
  {
6299 1
    if ($encoding !== 'UTF-8') {
6300
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6301 1
    }
6302
6303
    if ($cleanUtf8 === true) {
6304
      // iconv and mbstring are not tolerant to invalid encoding
6305 1
      // further, their behaviour is inconsistent with that of PHP's substr
6306 1
      $str = self::clean($str);
6307 1
    }
6308 1
6309 1
    // fallback to "mb_"-function via polyfill
6310 1
    return \mb_strwidth($str, $encoding);
6311
  }
6312
6313 1
  /**
6314 1
   * Changes all keys in an array.
6315
   *
6316 1
   * @param array $array <p>The array to work on</p>
6317
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6318
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6319
   *
6320
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6321
   *                     input is not an array.</p>
6322
   */
6323
  public static function array_change_key_case($array, $case = CASE_LOWER)
6324
  {
6325
    if (!is_array($array)) {
6326
      return false;
6327
    }
6328
6329
    if (
6330
        $case !== CASE_LOWER
6331
        &&
6332
        $case !== CASE_UPPER
6333
    ) {
6334 76
      $case = CASE_UPPER;
6335
    }
6336
6337 76
    $return = array();
6338
    foreach ($array as $key => $value) {
6339 76
      if ($case  === CASE_LOWER) {
6340 10
        $key = self::strtolower($key);
6341
      } else {
6342
        $key = self::strtoupper($key);
6343
      }
6344 74
6345 3
      $return[$key] = $value;
6346
    }
6347
6348 73
    return $return;
6349
  }
6350
6351 1
  /**
6352 1
   * Get part of a string.
6353
   *
6354
   * @link http://php.net/manual/en/function.mb-substr.php
6355 73
   *
6356 1
   * @param string  $str       <p>The string being checked.</p>
6357
   * @param int     $offset    <p>The first position used in str.</p>
6358
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6359 72
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6360 72
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6361 49
   *
6362 49
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6363
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6364
   *                      characters long, <b>FALSE</b> will be returned.</p>
6365 72
   */
6366 2
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6367
  {
6368
    // init
6369 70
    $str = (string)$str;
6370 34
6371 34
    if (!isset($str[0])) {
6372 59
      return '';
6373
    }
6374
6375
    // Empty string
6376
    if ($length === 0) {
6377 70
      return '';
6378 23
    }
6379 70
6380 49
    if ($cleanUtf8 === true) {
6381 49
      // iconv and mbstring are not tolerant to invalid encoding
6382 22
      // further, their behaviour is inconsistent with that of PHP's substr
6383
      $str = self::clean($str);
6384
    }
6385 70
6386
    // Whole string
6387
    if (!$offset && $length === null) {
6388
      return $str;
6389
    }
6390
6391 70
    $str_length = 0;
6392 21
    if ($offset || $length === null) {
6393 70
      $str_length = (int)self::strlen($str, $encoding);
6394 21
    }
6395
6396
    // Impossible
6397
    if ($offset && $offset > $str_length) {
6398
      return false;
6399 49
    }
6400 1
6401 49
    if ($length === null) {
6402
      $length = $str_length;
6403
    } else {
6404
      $length = (int)$length;
6405 49
    }
6406 49
6407 View Code Duplication
    if (
6408
        $encoding === 'UTF-8'
6409
        ||
6410
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6411
    ) {
6412
      $encoding = 'UTF-8';
6413
    } else {
6414
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6415
    }
6416
6417
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6418
      self::checkForSupport();
6419
    }
6420
6421
    if (
6422
        $encoding === 'CP850'
6423
        &&
6424
        self::$SUPPORT['mbstring_func_overload'] === false
6425
    ) {
6426
      return substr($str, $offset, $length === null ? $str_length : $length);
6427
    }
6428
6429 View Code Duplication
    if (
6430
        $encoding !== 'UTF-8'
6431
        &&
6432
        self::$SUPPORT['mbstring'] === false
6433
    ) {
6434
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6435
    }
6436
6437
    if (self::$SUPPORT['mbstring'] === true) {
6438
      return \mb_substr($str, $offset, $length, $encoding);
6439
    }
6440
6441
    if (
6442
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6443
        &&
6444
        self::$SUPPORT['intl'] === true
6445
        &&
6446
        Bootup::is_php('5.4') === true
6447
    ) {
6448
      return \grapheme_substr($str, $offset, $length);
6449
    }
6450
6451
    if (
6452
        $length >= 0 // "iconv_substr()" can't handle negative length
6453
        &&
6454
        self::$SUPPORT['iconv'] === true
6455
    ) {
6456
      return \iconv_substr($str, $offset, $length);
6457
    }
6458
6459
    if (self::is_ascii($str)) {
6460 1
      return ($length === null) ?
6461
          substr($str, $offset) :
6462
          substr($str, $offset, $length);
6463
    }
6464 1
6465
    // fallback via vanilla php
6466 1
6467 1
    // split to array, and remove invalid characters
6468 1
    $array = self::split($str);
6469
6470
    // extract relevant part, and join to make sting again
6471 1
    return implode('', array_slice($array, $offset, $length));
6472
  }
6473 1
6474 1
  /**
6475
   * Binary safe comparison of two strings from an offset, up to length characters.
6476
   *
6477 1
   * @param string  $str1               <p>The main string being compared.</p>
6478 1
   * @param string  $str2               <p>The secondary string being compared.</p>
6479
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6480 1
   *                                    counting from the end of the string.</p>
6481 1
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6482
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6483
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6484 1
   *                                    insensitive.</p>
6485
   *
6486
   * @return int <p>
6487
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6488
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6489
   *             <strong>0</strong> if they are equal.
6490
   *             </p>
6491
   */
6492
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6493
  {
6494
    if (
6495
        $offset !== 0
6496
        ||
6497
        $length !== null
6498
    ) {
6499
      $str1Tmp = self::substr($str1, $offset, $length);
6500
      if ($str1Tmp === false) {
6501
        $str1Tmp = '';
6502
      }
6503
      $str1 = (string)$str1Tmp;
6504
6505 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6506
      if ($str2Tmp === false) {
6507
        $str2Tmp = '';
6508 1
      }
6509 1
      $str2 = (string)$str2Tmp;
6510
    }
6511 1
6512 1
    if ($case_insensitivity === true) {
6513
      return self::strcasecmp($str1, $str2);
6514
    }
6515 1
6516
    return self::strcmp($str1, $str2);
6517 1
  }
6518 1
6519 1
  /**
6520
   * Count the number of substring occurrences.
6521 1
   *
6522 1
   * @link  http://php.net/manual/en/function.substr-count.php
6523
   *
6524
   * @param string  $haystack  <p>The string to search in.</p>
6525
   * @param string  $needle    <p>The substring to search for.</p>
6526
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6527 1
   * @param int     $length    [optional] <p>
6528
   *                           The maximum length after the specified offset to search for the
6529 1
   *                           substring. It outputs a warning if the offset plus the length is
6530 1
   *                           greater than the haystack length.
6531 1
   *                           </p>
6532 1
   * @param string  $encoding  <p>Set the charset.</p>
6533 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6534 1
   *
6535 1
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6536
   */
6537
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6538 1
  {
6539 1
    // init
6540
    $haystack = (string)$haystack;
6541
    $needle = (string)$needle;
6542 1
6543 1
    if (!isset($haystack[0], $needle[0])) {
6544
      return false;
6545 1
    }
6546 1
6547 1
    if ($offset || $length !== null) {
6548
6549 1
      if ($length === null) {
6550
        $length = (int)self::strlen($haystack);
6551
      }
6552
6553
      $offset = (int)$offset;
6554
      $length = (int)$length;
6555
6556 1
      if (
6557
          (
6558
              $length !== 0
6559
              &&
6560
              $offset !== 0
6561
          )
6562 1
          &&
6563 1
          $length + $offset <= 0
6564 1
          &&
6565
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6566
      ) {
6567
        return false;
6568 1
      }
6569 1
6570
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6571
      if ($haystackTmp === false) {
6572
        $haystackTmp = '';
6573
      }
6574
      $haystack = (string)$haystackTmp;
6575
    }
6576
6577
    if ($encoding !== 'UTF-8') {
6578
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6579
    }
6580
6581
    if ($cleanUtf8 === true) {
6582
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6583
      // if invalid characters are found in $haystack before $needle
6584
      $needle = self::clean($needle);
6585 1
      $haystack = self::clean($haystack);
6586
    }
6587
6588 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6589 1
      self::checkForSupport();
6590
    }
6591 1
6592 1 View Code Duplication
    if (
6593
        $encoding !== 'UTF-8'
6594
        &&
6595 1
        self::$SUPPORT['mbstring'] === false
6596 1
    ) {
6597
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6598
    }
6599 1
6600 1
    if (self::$SUPPORT['mbstring'] === true) {
6601 1
      return \mb_substr_count($haystack, $needle, $encoding);
6602
    }
6603
6604 1
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6605 1
6606
    return count($matches);
6607 1
  }
6608
6609
  /**
6610
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6611
   *
6612
   * @param string $haystack <p>The string to search in.</p>
6613
   * @param string $needle   <p>The substring to search for.</p>
6614
   *
6615
   * @return string <p>Return the sub-string.</p>
6616
   */
6617 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6618 1
  {
6619
    // init
6620
    $haystack = (string)$haystack;
6621 1
    $needle = (string)$needle;
6622 1
6623
    if (!isset($haystack[0])) {
6624 1
      return '';
6625 1
    }
6626
6627
    if (!isset($needle[0])) {
6628 1
      return $haystack;
6629 1
    }
6630
6631
    if (self::str_istarts_with($haystack, $needle) === true) {
6632 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6633 1
      if ($haystackTmp === false) {
6634 1
        $haystackTmp = '';
6635
      }
6636
      $haystack = (string)$haystackTmp;
6637 1
    }
6638 1
6639
    return $haystack;
6640 1
  }
6641
6642
  /**
6643
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6644
   *
6645
   * @param string $haystack <p>The string to search in.</p>
6646
   * @param string $needle   <p>The substring to search for.</p>
6647
   *
6648
   * @return string <p>Return the sub-string.</p>
6649
   */
6650 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6651 1
  {
6652
    // init
6653
    $haystack = (string)$haystack;
6654 1
    $needle = (string)$needle;
6655 1
6656
    if (!isset($haystack[0])) {
6657 1
      return '';
6658 1
    }
6659
6660
    if (!isset($needle[0])) {
6661 1
      return $haystack;
6662 1
    }
6663
6664
    if (self::str_iends_with($haystack, $needle) === true) {
6665 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6666 1
      if ($haystackTmp === false) {
6667 1
        $haystackTmp = '';
6668
      }
6669
      $haystack = (string)$haystackTmp;
6670 1
    }
6671 1
6672
    return $haystack;
6673 1
  }
6674
6675
  /**
6676
   * Removes an prefix ($needle) from start of the string ($haystack).
6677
   *
6678
   * @param string $haystack <p>The string to search in.</p>
6679
   * @param string $needle   <p>The substring to search for.</p>
6680
   *
6681
   * @return string <p>Return the sub-string.</p>
6682
   */
6683 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6684
  {
6685
    // init
6686
    $haystack = (string)$haystack;
6687
    $needle = (string)$needle;
6688
6689
    if (!isset($haystack[0])) {
6690
      return '';
6691
    }
6692
6693
    if (!isset($needle[0])) {
6694
      return $haystack;
6695
    }
6696
6697
    if (self::str_starts_with($haystack, $needle) === true) {
6698
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6699
      if ($haystackTmp === false) {
6700 7
        $haystackTmp = '';
6701
      }
6702 7
      $haystack = (string)$haystackTmp;
6703 1
    }
6704
6705
    return $haystack;
6706 1
  }
6707 1
6708 1
  /**
6709 1
   * Replace text within a portion of a string.
6710
   *
6711
   * source: https://gist.github.com/stemar/8287074
6712
   *
6713 1
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6714 1
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6715 1
   * @param int|int[]       $offset           <p>
6716 1
   *                                          If start is positive, the replacing will begin at the start'th offset
6717 1
   *                                          into string.
6718 1
   *                                          <br><br>
6719 1
   *                                          If start is negative, the replacing will begin at the start'th character
6720 1
   *                                          from the end of string.
6721
   *                                          </p>
6722
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6723
   *                                          portion of string which is to be replaced. If it is negative, it
6724 1
   *                                          represents the number of characters from the end of string at which to
6725 1
   *                                          stop replacing. If it is not given, then it will default to strlen(
6726 1
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6727 1
   *                                          length is zero then this function will have the effect of inserting
6728 1
   *                                          replacement into string at the given start offset.</p>
6729 1
   *
6730 1
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6731 1
   */
6732
  public static function substr_replace($str, $replacement, $offset, $length = null)
6733
  {
6734 1
    if (is_array($str) === true) {
6735 1
      $num = count($str);
6736 1
6737 1
      // the replacement
6738
      if (is_array($replacement) === true) {
6739
        $replacement = array_slice($replacement, 0, $num);
6740
      } else {
6741 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6742
      }
6743
6744 7
      // the offset
6745 1 View Code Duplication
      if (is_array($offset) === true) {
6746 1
        $offset = array_slice($offset, 0, $num);
6747 1
        foreach ($offset as &$valueTmp) {
6748 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6749
        }
6750 1
        unset($valueTmp);
6751
      } else {
6752
        $offset = array_pad(array($offset), $num, $offset);
6753 7
      }
6754 7
6755
      // the length
6756 7
      if (!isset($length)) {
6757 1
        $length = array_fill(0, $num, 0);
6758 View Code Duplication
      } elseif (is_array($length) === true) {
6759
        $length = array_slice($length, 0, $num);
6760 6
        foreach ($length as &$valueTmpV2) {
6761 3
          if (isset($valueTmpV2)) {
6762 3
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6763 3
          } else {
6764
            $valueTmpV2 = 0;
6765
          }
6766 5
        }
6767 5
        unset($valueTmpV2);
6768
      } else {
6769 5
        $length = array_pad(array($length), $num, $length);
6770 3
      }
6771 3
6772
      // recursive call
6773 5
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6774
    }
6775 5
6776
    if (is_array($replacement) === true) {
6777
      if (count($replacement) > 0) {
6778
        $replacement = $replacement[0];
6779
      } else {
6780
        $replacement = '';
6781
      }
6782
    }
6783
6784
    // init
6785
    $str = (string)$str;
6786 1
    $replacement = (string)$replacement;
6787
6788 1
    if (!isset($str[0])) {
6789 1
      return $replacement;
6790
    }
6791 1
6792 1
    if (self::is_ascii($str)) {
6793
      return ($length === null) ?
6794
          substr_replace($str, $replacement, $offset) :
6795 1
          substr_replace($str, $replacement, $offset, $length);
6796 1
    }
6797
6798
    preg_match_all('/./us', $str, $smatches);
6799 1
    preg_match_all('/./us', $replacement, $rmatches);
6800 1
6801 1
    if ($length === null) {
6802
      $length = (int)self::strlen($str);
6803
    }
6804 1
6805 1
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6806
6807 1
    return implode('', $smatches[0]);
6808
  }
6809
6810
  /**
6811
   * Removes an suffix ($needle) from end of the string ($haystack).
6812
   *
6813
   * @param string $haystack <p>The string to search in.</p>
6814
   * @param string $needle   <p>The substring to search for.</p>
6815
   *
6816
   * @return string <p>Return the sub-string.</p>
6817
   */
6818 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6819 1
  {
6820
    $haystack = (string)$haystack;
6821 1
    $needle = (string)$needle;
6822
6823 1
    if (!isset($haystack[0])) {
6824 1
      return '';
6825
    }
6826
6827 1
    if (!isset($needle[0])) {
6828 1
      return $haystack;
6829 1
    }
6830
6831 1
    if (self::str_ends_with($haystack, $needle) === true) {
6832
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6833
      if ($haystackTmp === false) {
6834 1
        $haystackTmp = '';
6835 1
      }
6836
      $haystack = (string)$haystackTmp;
6837 1
    }
6838 1
6839
    return $haystack;
6840 1
  }
6841
6842 1
  /**
6843 1
   * Returns a case swapped version of the string.
6844
   *
6845
   * @param string  $str       <p>The input string.</p>
6846 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6847 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6848
   *
6849 1
   * @return string <p>Each character's case swapped.</p>
6850
   */
6851 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6852
  {
6853
    $str = (string)$str;
6854
6855
    if (!isset($str[0])) {
6856
      return '';
6857
    }
6858
6859
    if ($encoding !== 'UTF-8') {
6860
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6861
    }
6862
6863
    if ($cleanUtf8 === true) {
6864
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6865
      // if invalid characters are found in $haystack before $needle
6866
      $str = self::clean($str);
6867
    }
6868
6869
    $strSwappedCase = preg_replace_callback(
6870
        '/[\S]/u',
6871
        function ($match) use ($encoding) {
6872
          $marchToUpper = self::strtoupper($match[0], $encoding);
6873
6874
          if ($match[0] === $marchToUpper) {
6875
            return self::strtolower($match[0], $encoding);
6876
          }
6877
6878
          return $marchToUpper;
6879
        },
6880
        $str
6881
    );
6882
6883
    return $strSwappedCase;
6884
  }
6885
6886
  /**
6887
   * alias for "UTF8::to_ascii()"
6888
   *
6889
   * @see UTF8::to_ascii()
6890
   *
6891
   * @param string $s
6892
   * @param string $subst_chr
6893
   * @param bool   $strict
6894
   *
6895
   * @return string
6896
   *
6897
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6898
   */
6899
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6900
  {
6901
    return self::to_ascii($s, $subst_chr, $strict);
6902
  }
6903
6904
  /**
6905
   * alias for "UTF8::to_iso8859()"
6906
   *
6907
   * @see UTF8::to_iso8859()
6908
   *
6909
   * @param string $str
6910
   *
6911
   * @return string|string[]
6912
   *
6913
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6914
   */
6915
  public static function toIso8859($str)
6916
  {
6917
    return self::to_iso8859($str);
6918
  }
6919
6920
  /**
6921
   * alias for "UTF8::to_latin1()"
6922
   *
6923
   * @see UTF8::to_latin1()
6924
   *
6925
   * @param $str
6926
   *
6927
   * @return string
6928
   *
6929
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6930 21
   */
6931
  public static function toLatin1($str)
6932 21
  {
6933
    return self::to_latin1($str);
6934
  }
6935 21
6936
  /**
6937 21
   * alias for "UTF8::to_utf8()"
6938 4
   *
6939
   * @see UTF8::to_utf8()
6940
   *
6941
   * @param string $str
6942 18
   *
6943 6
   * @return string
6944
   *
6945
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6946 13
   */
6947
  public static function toUTF8($str)
6948
  {
6949 13
    return self::to_utf8($str);
6950 7
  }
6951
6952
  /**
6953 7
   * Convert a string into ASCII.
6954
   *
6955
   * @param string $str     <p>The input string.</p>
6956
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6957
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6958
   *                        performance</p>
6959
   *
6960
   * @return string
6961
   */
6962
  public static function to_ascii($str, $unknown = '?', $strict = false)
6963
  {
6964
    static $UTF8_TO_ASCII;
6965
6966
    // init
6967
    $str = (string)$str;
6968
6969
    if (!isset($str[0])) {
6970
      return '';
6971
    }
6972
6973
    // check if we only have ASCII, first (better performance)
6974
    if (self::is_ascii($str) === true) {
6975
      return $str;
6976
    }
6977
6978
    $str = self::clean($str, true, true, true);
6979
6980
    // check again, if we only have ASCII, now ...
6981 7
    if (self::is_ascii($str) === true) {
6982 7
      return $str;
6983 7
    }
6984
6985 7
    if ($strict === true) {
6986
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6987 7
        self::checkForSupport();
6988 7
      }
6989
6990
      if (
6991 7
          self::$SUPPORT['intl'] === true
6992
          &&
6993
          Bootup::is_php('5.4') === true
6994 7
      ) {
6995 7
6996 7
        // HACK for issue from "transliterator_transliterate()"
6997
        $str = str_replace(
6998 7
            'ℌ',
6999 2
            'H',
7000
            $str
7001 2
        );
7002 2
7003 2
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7004
7005 2
        // check again, if we only have ASCII, now ...
7006 1
        if (self::is_ascii($str) === true) {
7007
          return $str;
7008 1
        }
7009 1
7010 1
      }
7011
    }
7012 1
7013
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7014
    $chars = $ar[0];
7015
    foreach ($chars as &$c) {
7016
7017
      $ordC0 = ord($c[0]);
7018
7019
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7020
        continue;
7021
      }
7022
7023
      $ordC1 = ord($c[1]);
7024
7025
      // ASCII - next please
7026
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7027 1
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7028 2
      }
7029
7030 7
      if ($ordC0 >= 224) {
7031
        $ordC2 = ord($c[2]);
7032
7033
        if ($ordC0 <= 239) {
7034
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7035 7
        }
7036
7037
        if ($ordC0 >= 240) {
7038
          $ordC3 = ord($c[3]);
7039
7040 7
          if ($ordC0 <= 247) {
7041 7
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7042 3
          }
7043 3
7044 1
          if ($ordC0 >= 248) {
7045 1
            $ordC4 = ord($c[4]);
7046 3
7047 View Code Duplication
            if ($ordC0 <= 251) {
7048 7
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7049
            }
7050 7
7051
            if ($ordC0 >= 252) {
7052
              $ordC5 = ord($c[5]);
7053
7054 View Code Duplication
              if ($ordC0 <= 253) {
7055
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7056
              }
7057
            }
7058
          }
7059
        }
7060
      }
7061
7062 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7063 7
        $c = $unknown;
7064
        continue;
7065
      }
7066
7067
      if (!isset($ord)) {
7068
        $c = $unknown;
7069
        continue;
7070
      }
7071
7072
      $bank = $ord >> 8;
7073
      if (!isset($UTF8_TO_ASCII[$bank])) {
7074 1
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7075
        if ($UTF8_TO_ASCII[$bank] === false) {
7076 7
          $UTF8_TO_ASCII[$bank] = array();
7077
        }
7078 7
      }
7079
7080
      $newchar = $ord & 255;
7081
7082
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7083
7084
        // keep for debugging
7085
        /*
7086
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7087
        echo "char: " . $c . "\n";
7088 3
        echo "ord: " . $ord . "\n";
7089
        echo "newchar: " . $newchar . "\n";
7090 3
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7091
        echo "bank:" . $bank . "\n\n";
7092
        */
7093 1
7094
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7095
      } else {
7096 1
7097 1
        // keep for debugging missing chars
7098
        /*
7099 1
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7100
        echo "char: " . $c . "\n";
7101
        echo "ord: " . $ord . "\n";
7102 3
        echo "newchar: " . $newchar . "\n";
7103
        echo "bank:" . $bank . "\n\n";
7104 3
        */
7105 1
7106
        $c = $unknown;
7107
      }
7108 3
    }
7109
7110
    return implode('', $chars);
7111
  }
7112
7113
  /**
7114
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7115
   *
7116
   * @param string|string[] $str
7117
   *
7118
   * @return string|string[]
7119
   */
7120 1
  public static function to_iso8859($str)
7121
  {
7122 1
    if (is_array($str) === true) {
7123
7124
      /** @noinspection ForeachSourceInspection */
7125
      foreach ($str as $k => $v) {
7126
        /** @noinspection AlterInForeachInspection */
7127
        /** @noinspection OffsetOperationsInspection */
7128
        $str[$k] = self::to_iso8859($v);
7129
      }
7130
7131
      return $str;
7132
    }
7133
7134
    $str = (string)$str;
7135
7136
    if (!isset($str[0])) {
7137
      return '';
7138
    }
7139
7140 22
    return self::utf8_decode($str);
7141
  }
7142 22
7143
  /**
7144 2
   * alias for "UTF8::to_iso8859()"
7145
   *
7146
   * @see UTF8::to_iso8859()
7147 2
   *
7148 2
   * @param string|string[] $str
7149
   *
7150 2
   * @return string|string[]
7151
   */
7152
  public static function to_latin1($str)
7153 22
  {
7154
    return self::to_iso8859($str);
7155 22
  }
7156 3
7157
  /**
7158
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7159 22
   *
7160
   * <ul>
7161
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7162
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7163 22
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7164
   * case.</li>
7165
   * </ul>
7166 22
   *
7167
   * @param string|string[] $str                    <p>Any string or array.</p>
7168
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7169 22
   *
7170
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7171
   */
7172 22
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7173 22
  {
7174
    if (is_array($str) === true) {
7175 22
      /** @noinspection ForeachSourceInspection */
7176
      foreach ($str as $k => $v) {
7177 22
        /** @noinspection AlterInForeachInspection */
7178
        /** @noinspection OffsetOperationsInspection */
7179 20
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7180
      }
7181 20
7182 18
      return $str;
7183 18
    }
7184 18
7185 8
    $str = (string)$str;
7186
7187
    if (!isset($str[0])) {
7188 22
      return $str;
7189
    }
7190 21
7191 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7192
      self::checkForSupport();
7193 21
    }
7194 15
7195 15 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7196 15
      $max = \mb_strlen($str, '8BIT');
7197 11
    } else {
7198
      $max = strlen($str);
7199
    }
7200 22
7201
    $buf = '';
7202 12
7203 12
    /** @noinspection ForeachInvariantsInspection */
7204 12
    for ($i = 0; $i < $max; $i++) {
7205
      $c1 = $str[$i];
7206 12
7207 5
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7208 5
7209 5
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7210 9
7211
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7212
7213 12
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7214 9
            $buf .= $c1 . $c2;
7215
            $i++;
7216
          } else { // not valid UTF8 - convert it
7217 22
            $buf .= self::to_utf8_convert($c1);
7218
          }
7219 5
7220
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7221 5
7222 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7223
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7224 22
7225
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7226
            $buf .= $c1 . $c2 . $c3;
7227 22
            $i += 2;
7228 22
          } else { // not valid UTF8 - convert it
7229 22
            $buf .= self::to_utf8_convert($c1);
7230 4
          }
7231 22
7232
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7233 22
7234
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7235
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7236 22
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7237 1
7238 1
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7239
            $buf .= $c1 . $c2 . $c3 . $c4;
7240 22
            $i += 3;
7241
          } else { // not valid UTF8 - convert it
7242
            $buf .= self::to_utf8_convert($c1);
7243
          }
7244
7245
        } else { // doesn't look like UTF8, but should be converted
7246
          $buf .= self::to_utf8_convert($c1);
7247
        }
7248 16
7249
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7250 16
7251
        $buf .= self::to_utf8_convert($c1);
7252 16
7253 16
      } else { // it doesn't need conversion
7254 2
        $buf .= $c1;
7255 2
      }
7256 16
    }
7257 16
7258 16
    // decode unicode escape sequences
7259
    $buf = preg_replace_callback(
7260
        '/\\\\u([0-9a-f]{4})/i',
7261 16
        function ($match) {
7262
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7263
        },
7264
        $buf
7265
    );
7266
7267
    // decode UTF-8 codepoints
7268
    if ($decodeHtmlEntityToUtf8 === true) {
7269
      $buf = self::html_entity_decode($buf);
7270
    }
7271
7272
    return $buf;
7273
  }
7274
7275
  /**
7276
   * @param int $int
7277 26
   *
7278
   * @return string
7279 26
   */
7280
  private static function to_utf8_convert($int)
7281 26
  {
7282 5
    $buf = '';
7283
7284
    $ordC1 = ord($int);
7285
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7286 22
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7287 6
    } else {
7288
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7289
      $cc2 = ($int & "\x3F") | "\x80";
7290 16
      $buf .= $cc1 . $cc2;
7291
    }
7292
7293
    return $buf;
7294
  }
7295
7296
  /**
7297
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7298
   *
7299
   * INFO: This is slower then "trim()"
7300
   *
7301
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7302 14
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7303
   *
7304 14
   * @param string $str   <p>The string to be trimmed</p>
7305
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7306
   *
7307
   * @return string <p>The trimmed string.</p>
7308
   */
7309
  public static function trim($str = '', $chars = INF)
7310 14
  {
7311 14
    $str = (string)$str;
7312
7313
    if (!isset($str[0])) {
7314
      return '';
7315 14
    }
7316 14
7317 14
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7318
    if ($chars === INF || !$chars) {
7319 14
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7320
    }
7321 14
7322
    return self::rtrim(self::ltrim($str, $chars), $chars);
7323
  }
7324
7325
  /**
7326
   * Makes string's first char uppercase.
7327
   *
7328
   * @param string  $str       <p>The input string.</p>
7329
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7330
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7331
   *
7332
   * @return string <p>The resulting string</p>
7333
   */
7334
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7335 1
  {
7336
    if ($cleanUtf8 === true) {
7337 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7338
      // if invalid characters are found in $haystack before $needle
7339
      $str = self::clean($str);
7340
    }
7341
7342
    $strPartTwo = self::substr($str, 1, null, $encoding);
7343
    if ($strPartTwo === false) {
7344
      $strPartTwo = '';
7345
    }
7346
7347
    $strPartOne = self::strtoupper(
7348
        (string)self::substr($str, 0, 1, $encoding),
7349
        $encoding,
7350
        $cleanUtf8
7351 8
    );
7352
7353 8
    return $strPartOne . $strPartTwo;
7354 2
  }
7355
7356
  /**
7357
   * alias for "UTF8::ucfirst()"
7358
   *
7359
   * @see UTF8::ucfirst()
7360 7
   *
7361
   * @param string  $word
7362
   * @param string  $encoding
7363
   * @param boolean $cleanUtf8
7364
   *
7365
   * @return string
7366 7
   */
7367
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7368
  {
7369
    return self::ucfirst($word, $encoding, $cleanUtf8);
7370 7
  }
7371 7
7372 7
  /**
7373
   * Uppercase for all words in the string.
7374
   *
7375
   * @param string   $str        <p>The input string.</p>
7376 7
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7377 7
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7378
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7379 7
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7380 1
   *
7381 1
   * @return string
7382 7
   */
7383
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7384
  {
7385 7
    if (!$str) {
7386
      return '';
7387 7
    }
7388 7
7389
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7390
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7391
7392
    if ($cleanUtf8 === true) {
7393 7
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7394
      // if invalid characters are found in $haystack before $needle
7395
      $str = self::clean($str);
7396 1
    }
7397 1
7398 1
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7399 7
7400 7
    if (
7401 7
        $usePhpDefaultFunctions === true
7402
        &&
7403 7
        self::is_ascii($str) === true
7404 7
    ) {
7405
      return ucwords($str);
7406 7
    }
7407
7408
    $words = self::str_to_words($str, $charlist);
7409
    $newWords = array();
7410
7411
    if (count($exceptions) > 0) {
7412
      $useExceptions = true;
7413
    } else {
7414
      $useExceptions = false;
7415
    }
7416
7417 View Code Duplication
    foreach ($words as $word) {
7418
7419
      if (!$word) {
7420
        continue;
7421
      }
7422
7423
      if (
7424
          $useExceptions === false
7425
          ||
7426
          (
7427
              $useExceptions === true
7428 1
              &&
7429
              !in_array($word, $exceptions, true)
7430 1
          )
7431
      ) {
7432 1
        $word = self::ucfirst($word, $encoding);
7433 1
      }
7434
7435
      $newWords[] = $word;
7436 1
    }
7437 1
7438 1
    return implode('', $newWords);
7439 1
  }
7440
7441 1
  /**
7442
   * Multi decode html entity & fix urlencoded-win1252-chars.
7443
   *
7444 1
   * e.g:
7445
   * 'test+test'                     => 'test test'
7446 1
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7447 1
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7448 1
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7449 1
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7450
   * 'Düsseldorf'                   => 'Düsseldorf'
7451 1
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7452 1
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7453 1
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7454
   *
7455 1
   * @param string $str          <p>The input string.</p>
7456
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7457 1
   *
7458
   * @return string
7459
   */
7460 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7461
  {
7462
    $str = (string)$str;
7463
7464
    if (!isset($str[0])) {
7465
      return '';
7466
    }
7467
7468
    $pattern = '/%u([0-9a-f]{3,4})/i';
7469
    if (preg_match($pattern, $str)) {
7470
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7471
    }
7472
7473
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7474
7475
    do {
7476
      $str_compare = $str;
7477
7478
      $str = self::fix_simple_utf8(
7479
          urldecode(
7480
              self::html_entity_decode(
7481
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7482
                  $flags
7483
              )
7484
          )
7485
      );
7486
7487
    } while ($multi_decode === true && $str_compare !== $str);
7488
7489
    return (string)$str;
7490
  }
7491
7492
  /**
7493
   * Return a array with "urlencoded"-win1252 -> UTF-8
7494
   *
7495
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7496
   *
7497
   * @return array
7498
   */
7499
  public static function urldecode_fix_win1252_chars()
7500
  {
7501
    return array(
7502
        '%20' => ' ',
7503
        '%21' => '!',
7504
        '%22' => '"',
7505
        '%23' => '#',
7506
        '%24' => '$',
7507
        '%25' => '%',
7508
        '%26' => '&',
7509
        '%27' => "'",
7510
        '%28' => '(',
7511
        '%29' => ')',
7512
        '%2A' => '*',
7513
        '%2B' => '+',
7514
        '%2C' => ',',
7515
        '%2D' => '-',
7516
        '%2E' => '.',
7517
        '%2F' => '/',
7518
        '%30' => '0',
7519
        '%31' => '1',
7520
        '%32' => '2',
7521
        '%33' => '3',
7522
        '%34' => '4',
7523
        '%35' => '5',
7524
        '%36' => '6',
7525
        '%37' => '7',
7526
        '%38' => '8',
7527
        '%39' => '9',
7528
        '%3A' => ':',
7529
        '%3B' => ';',
7530
        '%3C' => '<',
7531
        '%3D' => '=',
7532
        '%3E' => '>',
7533
        '%3F' => '?',
7534
        '%40' => '@',
7535
        '%41' => 'A',
7536
        '%42' => 'B',
7537
        '%43' => 'C',
7538
        '%44' => 'D',
7539
        '%45' => 'E',
7540
        '%46' => 'F',
7541
        '%47' => 'G',
7542
        '%48' => 'H',
7543
        '%49' => 'I',
7544
        '%4A' => 'J',
7545
        '%4B' => 'K',
7546
        '%4C' => 'L',
7547
        '%4D' => 'M',
7548
        '%4E' => 'N',
7549
        '%4F' => 'O',
7550
        '%50' => 'P',
7551
        '%51' => 'Q',
7552
        '%52' => 'R',
7553
        '%53' => 'S',
7554
        '%54' => 'T',
7555
        '%55' => 'U',
7556
        '%56' => 'V',
7557
        '%57' => 'W',
7558
        '%58' => 'X',
7559
        '%59' => 'Y',
7560
        '%5A' => 'Z',
7561
        '%5B' => '[',
7562
        '%5C' => '\\',
7563
        '%5D' => ']',
7564
        '%5E' => '^',
7565
        '%5F' => '_',
7566
        '%60' => '`',
7567
        '%61' => 'a',
7568
        '%62' => 'b',
7569
        '%63' => 'c',
7570
        '%64' => 'd',
7571
        '%65' => 'e',
7572
        '%66' => 'f',
7573
        '%67' => 'g',
7574
        '%68' => 'h',
7575
        '%69' => 'i',
7576
        '%6A' => 'j',
7577
        '%6B' => 'k',
7578
        '%6C' => 'l',
7579
        '%6D' => 'm',
7580
        '%6E' => 'n',
7581
        '%6F' => 'o',
7582
        '%70' => 'p',
7583
        '%71' => 'q',
7584
        '%72' => 'r',
7585
        '%73' => 's',
7586
        '%74' => 't',
7587
        '%75' => 'u',
7588
        '%76' => 'v',
7589
        '%77' => 'w',
7590
        '%78' => 'x',
7591
        '%79' => 'y',
7592
        '%7A' => 'z',
7593
        '%7B' => '{',
7594
        '%7C' => '|',
7595
        '%7D' => '}',
7596
        '%7E' => '~',
7597
        '%7F' => '',
7598
        '%80' => '`',
7599
        '%81' => '',
7600
        '%82' => '‚',
7601
        '%83' => 'ƒ',
7602
        '%84' => '„',
7603
        '%85' => '…',
7604
        '%86' => '†',
7605
        '%87' => '‡',
7606
        '%88' => 'ˆ',
7607
        '%89' => '‰',
7608
        '%8A' => 'Š',
7609
        '%8B' => '‹',
7610
        '%8C' => 'Œ',
7611
        '%8D' => '',
7612
        '%8E' => 'Ž',
7613
        '%8F' => '',
7614
        '%90' => '',
7615
        '%91' => '‘',
7616
        '%92' => '’',
7617
        '%93' => '“',
7618
        '%94' => '”',
7619
        '%95' => '•',
7620
        '%96' => '–',
7621
        '%97' => '—',
7622
        '%98' => '˜',
7623
        '%99' => '™',
7624
        '%9A' => 'š',
7625
        '%9B' => '›',
7626
        '%9C' => 'œ',
7627
        '%9D' => '',
7628
        '%9E' => 'ž',
7629
        '%9F' => 'Ÿ',
7630
        '%A0' => '',
7631
        '%A1' => '¡',
7632
        '%A2' => '¢',
7633
        '%A3' => '£',
7634
        '%A4' => '¤',
7635
        '%A5' => '¥',
7636
        '%A6' => '¦',
7637
        '%A7' => '§',
7638
        '%A8' => '¨',
7639
        '%A9' => '©',
7640
        '%AA' => 'ª',
7641
        '%AB' => '«',
7642
        '%AC' => '¬',
7643
        '%AD' => '',
7644
        '%AE' => '®',
7645
        '%AF' => '¯',
7646
        '%B0' => '°',
7647
        '%B1' => '±',
7648
        '%B2' => '²',
7649
        '%B3' => '³',
7650
        '%B4' => '´',
7651
        '%B5' => 'µ',
7652
        '%B6' => '¶',
7653
        '%B7' => '·',
7654
        '%B8' => '¸',
7655
        '%B9' => '¹',
7656
        '%BA' => 'º',
7657
        '%BB' => '»',
7658
        '%BC' => '¼',
7659
        '%BD' => '½',
7660
        '%BE' => '¾',
7661
        '%BF' => '¿',
7662
        '%C0' => 'À',
7663
        '%C1' => 'Á',
7664
        '%C2' => 'Â',
7665
        '%C3' => 'Ã',
7666
        '%C4' => 'Ä',
7667
        '%C5' => 'Å',
7668
        '%C6' => 'Æ',
7669
        '%C7' => 'Ç',
7670
        '%C8' => 'È',
7671
        '%C9' => 'É',
7672
        '%CA' => 'Ê',
7673
        '%CB' => 'Ë',
7674
        '%CC' => 'Ì',
7675
        '%CD' => 'Í',
7676
        '%CE' => 'Î',
7677
        '%CF' => 'Ï',
7678
        '%D0' => 'Ð',
7679
        '%D1' => 'Ñ',
7680
        '%D2' => 'Ò',
7681
        '%D3' => 'Ó',
7682
        '%D4' => 'Ô',
7683
        '%D5' => 'Õ',
7684
        '%D6' => 'Ö',
7685
        '%D7' => '×',
7686
        '%D8' => 'Ø',
7687
        '%D9' => 'Ù',
7688
        '%DA' => 'Ú',
7689
        '%DB' => 'Û',
7690
        '%DC' => 'Ü',
7691
        '%DD' => 'Ý',
7692
        '%DE' => 'Þ',
7693
        '%DF' => 'ß',
7694
        '%E0' => 'à',
7695
        '%E1' => 'á',
7696
        '%E2' => 'â',
7697
        '%E3' => 'ã',
7698
        '%E4' => 'ä',
7699
        '%E5' => 'å',
7700
        '%E6' => 'æ',
7701
        '%E7' => 'ç',
7702
        '%E8' => 'è',
7703
        '%E9' => 'é',
7704 6
        '%EA' => 'ê',
7705
        '%EB' => 'ë',
7706
        '%EC' => 'ì',
7707 6
        '%ED' => 'í',
7708
        '%EE' => 'î',
7709 6
        '%EF' => 'ï',
7710 3
        '%F0' => 'ð',
7711
        '%F1' => 'ñ',
7712
        '%F2' => 'ò',
7713 6
        '%F3' => 'ó',
7714
        '%F4' => 'ô',
7715 6
        '%F5' => 'õ',
7716 6
        '%F6' => 'ö',
7717
        '%F7' => '÷',
7718 6
        '%F8' => 'ø',
7719 1
        '%F9' => 'ù',
7720 1
        '%FA' => 'ú',
7721 1
        '%FB' => 'û',
7722
        '%FC' => 'ü',
7723
        '%FD' => 'ý',
7724 6
        '%FE' => 'þ',
7725
        '%FF' => 'ÿ',
7726 6
    );
7727
  }
7728
7729
  /**
7730 6
   * Decodes an UTF-8 string to ISO-8859-1.
7731
   *
7732
   * @param string $str <p>The input string.</p>
7733 6
   *
7734
   * @return string
7735
   */
7736
  public static function utf8_decode($str)
7737 6
  {
7738 6
    // init
7739 6
    $str = (string)$str;
7740 6
7741 6
    if (!isset($str[0])) {
7742 6
      return '';
7743 6
    }
7744
7745
    $str = (string)self::to_utf8($str);
7746 6
7747
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7748 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7749 4
7750 4
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7751 4
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7752
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7753 6
    }
7754 6
7755 6
    /** @noinspection PhpInternalEntityUsedInspection */
7756 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7757
7758 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7759
      self::checkForSupport();
7760
    }
7761
7762 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7763
      $len = \mb_strlen($str, '8BIT');
7764
    } else {
7765
      $len = strlen($str);
7766
    }
7767
7768 7
    /** @noinspection ForeachInvariantsInspection */
7769
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7770
      switch ($str[$i] & "\xF0") {
7771 7
        case "\xC0":
7772
        case "\xD0":
7773 7
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7774 7
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7775
          break;
7776
7777 7
        /** @noinspection PhpMissingBreakStatementInspection */
7778 7
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7779
          ++$i;
7780
        case "\xE0":
7781
          $str[$j] = '?';
7782 7
          $i += 2;
7783 7
          break;
7784 3
7785
        default:
7786
          $str[$j] = $str[$i];
7787 6
      }
7788 6
    }
7789
7790 6
    return (string)self::substr($str, 0, $j, '8BIT');
7791 1
  }
7792 1
7793 1
  /**
7794
   * Encodes an ISO-8859-1 string to UTF-8.
7795 6
   *
7796
   * @param string $str <p>The input string.</p>
7797
   *
7798
   * @return string
7799
   */
7800
  public static function utf8_encode($str)
7801
  {
7802
    // init
7803
    $str = (string)$str;
7804
7805
    if (!isset($str[0])) {
7806
      return '';
7807
    }
7808
7809
    $strTmp = \utf8_encode($str);
7810
    if ($strTmp === false) {
7811
      return '';
7812
    }
7813
7814
    $str = (string)$strTmp;
7815
    if (false === strpos($str, "\xC2")) {
7816
      return $str;
7817
    }
7818
7819
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7820
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7821
7822
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7823
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7824 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7825
    }
7826 1
7827
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7828
  }
7829
7830
  /**
7831
   * fix -> utf8-win1252 chars
7832
   *
7833
   * @param string $str <p>The input string.</p>
7834
   *
7835
   * @return string
7836
   *
7837
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7838 1
   */
7839
  public static function utf8_fix_win1252_chars($str)
7840 1
  {
7841
    return self::fix_simple_utf8($str);
7842 1
  }
7843 1
7844
  /**
7845
   * Returns an array with all utf8 whitespace characters.
7846
   *
7847 1
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7848
   *
7849 1
   * @author: Derek E. [email protected]
7850 1
   *
7851
   * @return array <p>
7852
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7853 1
   *               as defined in above URL.
7854
   *               </p>
7855
   */
7856 1
  public static function whitespace_table()
7857 1
  {
7858 1
    return self::$WHITESPACE_TABLE;
7859 1
  }
7860 1
7861
  /**
7862
   * Limit the number of words in a string.
7863 1
   *
7864
   * @param string $str      <p>The input string.</p>
7865
   * @param int    $limit    <p>The limit of words as integer.</p>
7866
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7867
   *
7868
   * @return string
7869
   */
7870
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7871
  {
7872
    $str = (string)$str;
7873
7874
    if (!isset($str[0])) {
7875
      return '';
7876
    }
7877
7878
    // init
7879
    $limit = (int)$limit;
7880
7881
    if ($limit < 1) {
7882 10
      return '';
7883
    }
7884 10
7885 10
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7886
7887 10
    if (
7888 3
        !isset($matches[0])
7889
        ||
7890
        self::strlen($str) === self::strlen($matches[0])
7891 8
    ) {
7892 8
      return $str;
7893 8
    }
7894
7895 8
    return self::rtrim($matches[0]) . $strAddOn;
7896
  }
7897 8
7898
  /**
7899 8
   * Wraps a string to a given number of characters
7900 1
   *
7901 1
   * @link  http://php.net/manual/en/function.wordwrap.php
7902 1
   *
7903
   * @param string $str   <p>The input string.</p>
7904 8
   * @param int    $width [optional] <p>The column width.</p>
7905 8
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7906
   * @param bool   $cut   [optional] <p>
7907 8
   *                      If the cut is set to true, the string is
7908 8
   *                      always wrapped at or before the specified width. So if you have
7909 8
   *                      a word that is larger than the given width, it is broken apart.
7910 8
   *                      </p>
7911 8
   *
7912
   * @return string <p>The given string wrapped at the specified column.</p>
7913 8
   */
7914 8
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7915 8
  {
7916 8
    $str = (string)$str;
7917
    $break = (string)$break;
7918 8
7919 6
    if (!isset($str[0], $break[0])) {
7920 6
      return '';
7921 6
    }
7922 6
7923
    $w = '';
7924 6
    $strSplit = explode($break, $str);
7925 3
    $count = count($strSplit);
7926 3
7927
    $chars = array();
7928 6
    /** @noinspection ForeachInvariantsInspection */
7929 6
    for ($i = 0; $i < $count; ++$i) {
7930
7931 8
      if ($i) {
7932
        $chars[] = $break;
7933
        $w .= '#';
7934
      }
7935
7936
      $c = $strSplit[$i];
7937
      unset($strSplit[$i]);
7938
7939 1
      foreach (self::split($c) as $c) {
7940
        $chars[] = $c;
7941 1
        $w .= ' ' === $c ? ' ' : '?';
7942
      }
7943
    }
7944
7945
    $strReturn = '';
7946
    $j = 0;
7947
    $b = $i = -1;
7948
    $w = wordwrap($w, $width, '#', $cut);
7949
7950
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7951
      for (++$i; $i < $b; ++$i) {
7952
        $strReturn .= $chars[$j];
7953
        unset($chars[$j++]);
7954
      }
7955
7956
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7957
        unset($chars[$j++]);
7958
      }
7959
7960
      $strReturn .= $break;
7961
    }
7962
7963
    return $strReturn . implode('', $chars);
7964
  }
7965
7966
  /**
7967
   * Returns an array of Unicode White Space characters.
7968
   *
7969
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7970
   */
7971
  public static function ws()
7972
  {
7973
    return self::$WHITESPACE;
7974
  }
7975
7976
}
7977