Completed
Push — master ( 7733a3...6963af )
by Lars
05:10
created

UTF8::hex_to_chr()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * @var null|array
815
   */
816
  private static $ORD = null;
817
818
  /**
819
   * @var null|array
820
   */
821
  private static $CHR = null;
822
823
  /**
824
   * __construct()
825
   */
826 16
  public function __construct()
827
  {
828 16
    self::checkForSupport();
829 16
  }
830
831
  /**
832
   * Return the character at the specified position: $str[1] like functionality.
833
   *
834
   * @param string $str <p>A UTF-8 string.</p>
835
   * @param int    $pos <p>The position of character to return.</p>
836
   *
837
   * @return string <p>Single Multi-Byte character.</p>
838
   */
839 3
  public static function access($str, $pos)
840
  {
841 3
    $str = (string)$str;
842
843 3
    if (!isset($str[0])) {
844 1
      return '';
845
    }
846
847 3
    $pos = (int)$pos;
848
849 3
    if ($pos < 0) {
850 1
      return '';
851
    }
852
853 3
    return (string)self::substr($str, $pos, 1);
854
  }
855
856
  /**
857
   * Prepends UTF-8 BOM character to the string and returns the whole string.
858
   *
859
   * INFO: If BOM already existed there, the Input string is returned.
860
   *
861
   * @param string $str <p>The input string.</p>
862
   *
863
   * @return string <p>The output string that contains BOM.</p>
864
   */
865 1
  public static function add_bom_to_string($str)
866
  {
867 1
    if (self::string_has_bom($str) === false) {
868 1
      $str = self::bom() . $str;
869 1
    }
870
871 1
    return $str;
872
  }
873
874
  /**
875
   * Convert binary into an string.
876
   *
877
   * @param mixed $bin 1|0
878
   *
879
   * @return string
880
   */
881 1
  public static function binary_to_str($bin)
882
  {
883 1
    if (!isset($bin[0])) {
884
      return '';
885
    }
886
887 1
    $convert = base_convert($bin, 2, 16);
888 1
    if ($convert === '0') {
889 1
      return '';
890
    }
891
892 1
    return pack('H*', $convert);
893
  }
894
895
  /**
896
   * Returns the UTF-8 Byte Order Mark Character.
897
   *
898
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
899
   *
900
   * @return string UTF-8 Byte Order Mark
901
   */
902 2
  public static function bom()
903
  {
904 2
    return "\xef\xbb\xbf";
905
  }
906
907
  /**
908
   * @alias of UTF8::chr_map()
909
   *
910
   * @see   UTF8::chr_map()
911
   *
912
   * @param string|array $callback
913
   * @param string       $str
914
   *
915
   * @return array
916
   */
917 1
  public static function callback($callback, $str)
918
  {
919 1
    return self::chr_map($callback, $str);
920
  }
921
922
  /**
923
   * This method will auto-detect your server environment for UTF-8 support.
924
   *
925
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
926
   */
927 19
  public static function checkForSupport()
928
  {
929 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
930
931 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
932
933
      // http://php.net/manual/en/book.mbstring.php
934 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
935 1
      self::$SUPPORT['mbstring_func_overload'] = self::mbstring_overloaded();
936
937
      // http://php.net/manual/en/book.iconv.php
938 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
939
940
      // http://php.net/manual/en/book.intl.php
941 1
      self::$SUPPORT['intl'] = self::intl_loaded();
942 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
943
      if (
944 1
          self::$SUPPORT['intl'] === true
945 1
          &&
946 1
          function_exists('transliterator_list_ids') === true
947 1
      ) {
948 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
949 1
      }
950
951
      // http://php.net/manual/en/class.intlchar.php
952 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
953
954
      // http://php.net/manual/en/book.pcre.php
955 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
956 1
    }
957 19
  }
958
959
  /**
960
   * Generates a UTF-8 encoded character from the given code point.
961
   *
962
   * INFO: opposite to UTF8::ord()
963
   *
964
   * @param int    $code_point <p>The code point for which to generate a character.</p>
965
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
966
   *
967
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
968
   */
969 10
  public static function chr($code_point, $encoding = 'UTF-8')
970
  {
971
    // init
972 10
    static $CHAR_CACHE = array();
973
974 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
975
      self::checkForSupport();
976
    }
977
978 10
    if ($encoding !== 'UTF-8') {
979 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
980 2
    }
981
982 View Code Duplication
    if (
983
        $encoding !== 'UTF-8'
984 10
        &&
985
        $encoding !== 'WINDOWS-1252'
986 10
        &&
987 1
        self::$SUPPORT['mbstring'] === false
988 10
    ) {
989
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
990
    }
991
992 10
    $cacheKey = $code_point . $encoding;
993 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
994 8
      return $CHAR_CACHE[$cacheKey];
995
    }
996
997 9
    if (self::$SUPPORT['intlChar'] === true) {
998
      $str = \IntlChar::chr($code_point);
999
1000
      if ($encoding !== 'UTF-8') {
1001
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1002
      }
1003
1004
      // add into static cache
1005
      $CHAR_CACHE[$cacheKey] = $str;
1006
      return $str;
1007
    }
1008
1009
    // check type of code_point, only if there is no support for "\IntlChar"
1010 9
    if ((int)$code_point !== $code_point) {
1011 1
      $CHAR_CACHE[$cacheKey] = null;
1012 1
      return null;
1013
    }
1014
1015 9
    if (self::$CHR === null) {
1016
      self::$CHR = self::getData('chr');
1017
    }
1018
1019 9
    if ($code_point <= 0x7F) {
1020 7
      $str = self::$CHR[$code_point];
1021 9
    } elseif ($code_point <= 0x7FF) {
1022 6
      $str = self::$CHR[($code_point >> 6) + 0xC0] .
1023 6
             self::$CHR[($code_point & 0x3F) + 0x80];
1024 7
    } elseif ($code_point <= 0xFFFF) {
1025 7
      $str = self::$CHR[($code_point >> 12) + 0xE0] .
1026 7
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
1027 7
             self::$CHR[($code_point & 0x3F) + 0x80];
1028 7
    } else {
1029 1
      $str = self::$CHR[($code_point >> 18) + 0xF0] .
1030 1
             self::$CHR[(($code_point >> 12) & 0x3F) + 0x80] .
1031 1
             self::$CHR[(($code_point >> 6) & 0x3F) + 0x80] .
1032 1
             self::$CHR[($code_point & 0x3F) + 0x80];
1033
    }
1034
1035 9
    if ($encoding !== 'UTF-8') {
1036 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1037 1
    }
1038
1039
    // add into static cache
1040 9
    $CHAR_CACHE[$cacheKey] = $str;
1041
1042 9
    return $str;
1043
  }
1044
1045
  /**
1046
   * Applies callback to all characters of a string.
1047
   *
1048
   * @param string|array $callback <p>The callback function.</p>
1049
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1050
   *
1051
   * @return array <p>The outcome of callback.</p>
1052
   */
1053 1
  public static function chr_map($callback, $str)
1054
  {
1055 1
    $chars = self::split($str);
1056
1057 1
    return array_map($callback, $chars);
1058
  }
1059
1060
  /**
1061
   * Generates an array of byte length of each character of a Unicode string.
1062
   *
1063
   * 1 byte => U+0000  - U+007F
1064
   * 2 byte => U+0080  - U+07FF
1065
   * 3 byte => U+0800  - U+FFFF
1066
   * 4 byte => U+10000 - U+10FFFF
1067
   *
1068
   * @param string $str <p>The original Unicode string.</p>
1069
   *
1070
   * @return array <p>An array of byte lengths of each character.</p>
1071
   */
1072 4
  public static function chr_size_list($str)
1073
  {
1074 4
    $str = (string)$str;
1075
1076 4
    if (!isset($str[0])) {
1077 3
      return array();
1078
    }
1079
1080 4
    return array_map(
1081
        function ($data) {
1082 4
          return UTF8::strlen($data, '8BIT');
1083 4
        },
1084 4
        self::split($str)
1085 4
    );
1086
  }
1087
1088
  /**
1089
   * Get a decimal code representation of a specific character.
1090
   *
1091
   * @param string $char <p>The input character.</p>
1092
   *
1093
   * @return int
1094
   */
1095 2
  public static function chr_to_decimal($char)
1096
  {
1097 2
    $char = (string)$char;
1098 2
    $code = self::ord($char[0]);
1099 2
    $bytes = 1;
1100
1101 2
    if (!($code & 0x80)) {
1102
      // 0xxxxxxx
1103 2
      return $code;
1104
    }
1105
1106 2
    if (($code & 0xe0) === 0xc0) {
1107
      // 110xxxxx
1108 2
      $bytes = 2;
1109 2
      $code &= ~0xc0;
1110 2
    } elseif (($code & 0xf0) === 0xe0) {
1111
      // 1110xxxx
1112 2
      $bytes = 3;
1113 2
      $code &= ~0xe0;
1114 2
    } elseif (($code & 0xf8) === 0xf0) {
1115
      // 11110xxx
1116 1
      $bytes = 4;
1117 1
      $code &= ~0xf0;
1118 1
    }
1119
1120 2
    for ($i = 2; $i <= $bytes; $i++) {
1121
      // 10xxxxxx
1122 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1123 2
    }
1124
1125 2
    return $code;
1126
  }
1127
1128
  /**
1129
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1130
   *
1131
   * @param string $char <p>The input character</p>
1132
   * @param string $pfix [optional]
1133
   *
1134
   * @return string <p>The code point encoded as U+xxxx<p>
1135
   */
1136 1
  public static function chr_to_hex($char, $pfix = 'U+')
1137
  {
1138 1
    $char = (string)$char;
1139
1140 1
    if (!isset($char[0])) {
1141 1
      return '';
1142
    }
1143
1144 1
    if ($char === '&#0;') {
1145 1
      $char = '';
1146 1
    }
1147
1148 1
    return self::int_to_hex(self::ord($char), $pfix);
1149
  }
1150
1151
  /**
1152
   * alias for "UTF8::chr_to_decimal()"
1153
   *
1154
   * @see UTF8::chr_to_decimal()
1155
   *
1156
   * @param string $chr
1157
   *
1158
   * @return int
1159
   */
1160 1
  public static function chr_to_int($chr)
1161
  {
1162 1
    return self::chr_to_decimal($chr);
1163
  }
1164
1165
  /**
1166
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1167
   *
1168
   * @param string $body     <p>The original string to be split.</p>
1169
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1170
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1171
   *
1172
   * @return string <p>The chunked string</p>
1173
   */
1174 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1175
  {
1176 1
    return implode($end, self::split($body, $chunklen));
1177
  }
1178
1179
  /**
1180
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1181
   *
1182
   * @param string $str                     <p>The string to be sanitized.</p>
1183
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1184
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1185
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1186
   *                                        => "..."</p>
1187
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1188
   *                                        $normalize_whitespace</p>
1189
   *
1190
   * @return string <p>Clean UTF-8 encoded string.</p>
1191
   */
1192 61
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1193
  {
1194
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1195
    // caused connection reset problem on larger strings
1196
1197
    $regx = '/
1198
      (
1199
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1200
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1201
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1202
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1203
        ){1,100}                      # ...one or more times
1204
      )
1205
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1206
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1207 61
    /x';
1208 61
    $str = preg_replace($regx, '$1', $str);
1209
1210 61
    $str = self::replace_diamond_question_mark($str, '');
1211 61
    $str = self::remove_invisible_characters($str);
1212
1213 61
    if ($normalize_whitespace === true) {
1214 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1215 36
    }
1216
1217 61
    if ($normalize_msword === true) {
1218 15
      $str = self::normalize_msword($str);
1219 15
    }
1220
1221 61
    if ($remove_bom === true) {
1222 35
      $str = self::remove_bom($str);
1223 35
    }
1224
1225 61
    return $str;
1226
  }
1227
1228
  /**
1229
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1230
   *
1231
   * @param string $str <p>The input string.</p>
1232
   *
1233
   * @return string
1234
   */
1235 22 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1236
  {
1237 22
    $str = (string)$str;
1238
1239 22
    if (!isset($str[0])) {
1240 2
      return '';
1241
    }
1242
1243
    // fixed ISO <-> UTF-8 Errors
1244 22
    $str = self::fix_simple_utf8($str);
1245
1246
    // remove all none UTF-8 symbols
1247
    // && remove diamond question mark (�)
1248
    // && remove remove invisible characters (e.g. "\0")
1249
    // && remove BOM
1250
    // && normalize whitespace chars (but keep non-breaking-spaces)
1251 22
    $str = self::clean($str, true, true, false, true);
1252
1253 22
    return (string)$str;
1254
  }
1255
1256
  /**
1257
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1258
   *
1259
   * INFO: opposite to UTF8::string()
1260
   *
1261
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1262
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1263
   *                                    default, code points will be returned as integers.</p>
1264
   *
1265
   * @return array <p>The array of code points.</p>
1266
   */
1267 7
  public static function codepoints($arg, $u_style = false)
1268
  {
1269 7
    if (is_string($arg) === true) {
1270 7
      $arg = self::split($arg);
1271 7
    }
1272
1273 7
    $arg = array_map(
1274
        array(
1275 7
            '\\voku\\helper\\UTF8',
1276 7
            'ord',
1277 7
        ),
1278
        $arg
1279 7
    );
1280
1281 7
    if ($u_style) {
1282 1
      $arg = array_map(
1283
          array(
1284 1
              '\\voku\\helper\\UTF8',
1285 1
              'int_to_hex',
1286 1
          ),
1287
          $arg
1288 1
      );
1289 1
    }
1290
1291 7
    return $arg;
1292
  }
1293
1294
  /**
1295
   * Returns count of characters used in a string.
1296
   *
1297
   * @param string $str       <p>The input string.</p>
1298
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1299
   *
1300
   * @return array <p>An associative array of Character as keys and
1301
   *               their count as values.</p>
1302
   */
1303 7
  public static function count_chars($str, $cleanUtf8 = false)
1304
  {
1305 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1306
  }
1307
1308
  /**
1309
   * Converts a int-value into an UTF-8 character.
1310
   *
1311
   * @param mixed $int
1312
   *
1313
   * @return string
1314
   */
1315 5
  public static function decimal_to_chr($int)
1316
  {
1317 5
    if (Bootup::is_php('5.4') === true) {
1318 5
      $flags = ENT_QUOTES | ENT_HTML5;
1319 5
    } else {
1320
      $flags = ENT_QUOTES;
1321
    }
1322
1323 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1324
  }
1325
1326
  /**
1327
   * Encode a string with a new charset-encoding.
1328
   *
1329
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1330
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1331
   *
1332
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1333
   * @param string $str      <p>The input string</p>
1334
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1335
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1336
   *
1337
   * @return string
1338
   */
1339 12
  public static function encode($encoding, $str, $force = true)
1340
  {
1341 12
    $str = (string)$str;
1342 12
    $encoding = (string)$encoding;
1343
1344 12
    if (!isset($str[0], $encoding[0])) {
1345 5
      return $str;
1346
    }
1347
1348 12
    if ($encoding !== 'UTF-8') {
1349 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1350 2
    }
1351
1352 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1353
      self::checkForSupport();
1354
    }
1355
1356 12
    $encodingDetected = self::str_detect_encoding($str);
1357
1358
    if (
1359
        $encodingDetected !== false
1360 12
        &&
1361
        (
1362
            $force === true
1363 12
            ||
1364
            $encodingDetected !== $encoding
1365 4
        )
1366 12
    ) {
1367
1368 View Code Duplication
      if (
1369
          $encoding === 'UTF-8'
1370 12
          &&
1371
          (
1372
              $force === true
1373 12
              || $encodingDetected === 'UTF-8'
1374 3
              || $encodingDetected === 'WINDOWS-1252'
1375 3
              || $encodingDetected === 'ISO-8859-1'
1376 3
          )
1377 12
      ) {
1378 10
        return self::to_utf8($str);
1379
      }
1380
1381 View Code Duplication
      if (
1382
          $encoding === 'ISO-8859-1'
1383 5
          &&
1384
          (
1385
              $force === true
1386 2
              || $encodingDetected === 'ISO-8859-1'
1387 1
              || $encodingDetected === 'WINDOWS-1252'
1388 1
              || $encodingDetected === 'UTF-8'
1389 1
          )
1390 5
      ) {
1391 2
        return self::to_iso8859($str);
1392
      }
1393
1394 View Code Duplication
      if (
1395
          $encoding !== 'UTF-8'
1396 4
          &&
1397
          $encoding !== 'WINDOWS-1252'
1398 4
          &&
1399 1
          self::$SUPPORT['mbstring'] === false
1400 4
      ) {
1401
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1402
      }
1403
1404 4
      $strEncoded = \mb_convert_encoding(
1405 4
          $str,
1406 4
          $encoding,
1407
          $encodingDetected
1408 4
      );
1409
1410 4
      if ($strEncoded) {
1411 4
        return $strEncoded;
1412
      }
1413
    }
1414
1415 2
    return $str;
1416
  }
1417
1418
  /**
1419
   * Reads entire file into a string.
1420
   *
1421
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1422
   *
1423
   * @link http://php.net/manual/en/function.file-get-contents.php
1424
   *
1425
   * @param string        $filename      <p>
1426
   *                                     Name of the file to read.
1427
   *                                     </p>
1428
   * @param int|false     $flags         [optional] <p>
1429
   *                                     Prior to PHP 6, this parameter is called
1430
   *                                     use_include_path and is a bool.
1431
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1432
   *                                     to trigger include path
1433
   *                                     search.
1434
   *                                     </p>
1435
   *                                     <p>
1436
   *                                     The value of flags can be any combination of
1437
   *                                     the following flags (with some restrictions), joined with the
1438
   *                                     binary OR (|)
1439
   *                                     operator.
1440
   *                                     </p>
1441
   *                                     <p>
1442
   *                                     <table>
1443
   *                                     Available flags
1444
   *                                     <tr valign="top">
1445
   *                                     <td>Flag</td>
1446
   *                                     <td>Description</td>
1447
   *                                     </tr>
1448
   *                                     <tr valign="top">
1449
   *                                     <td>
1450
   *                                     FILE_USE_INCLUDE_PATH
1451
   *                                     </td>
1452
   *                                     <td>
1453
   *                                     Search for filename in the include directory.
1454
   *                                     See include_path for more
1455
   *                                     information.
1456
   *                                     </td>
1457
   *                                     </tr>
1458
   *                                     <tr valign="top">
1459
   *                                     <td>
1460
   *                                     FILE_TEXT
1461
   *                                     </td>
1462
   *                                     <td>
1463
   *                                     As of PHP 6, the default encoding of the read
1464
   *                                     data is UTF-8. You can specify a different encoding by creating a
1465
   *                                     custom context or by changing the default using
1466
   *                                     stream_default_encoding. This flag cannot be
1467
   *                                     used with FILE_BINARY.
1468
   *                                     </td>
1469
   *                                     </tr>
1470
   *                                     <tr valign="top">
1471
   *                                     <td>
1472
   *                                     FILE_BINARY
1473
   *                                     </td>
1474
   *                                     <td>
1475
   *                                     With this flag, the file is read in binary mode. This is the default
1476
   *                                     setting and cannot be used with FILE_TEXT.
1477
   *                                     </td>
1478
   *                                     </tr>
1479
   *                                     </table>
1480
   *                                     </p>
1481
   * @param resource|null $context       [optional] <p>
1482
   *                                     A valid context resource created with
1483
   *                                     stream_context_create. If you don't need to use a
1484
   *                                     custom context, you can skip this parameter by &null;.
1485
   *                                     </p>
1486
   * @param int|null $offset             [optional] <p>
1487
   *                                     The offset where the reading starts.
1488
   *                                     </p>
1489
   * @param int|null $maxLength          [optional] <p>
1490
   *                                     Maximum length of data read. The default is to read until end
1491
   *                                     of file is reached.
1492
   *                                     </p>
1493
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1494
   *
1495
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1496
   *                                     or pdf, because they used non default utf-8 chars</p>
1497
   *
1498
   * @return string <p>The function returns the read data or false on failure.</p>
1499
   */
1500 4
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1501
  {
1502
    // init
1503 4
    $timeout = (int)$timeout;
1504 4
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1505
1506 4
    if ($timeout && $context === null) {
1507 3
      $context = stream_context_create(
1508
          array(
1509
              'http' =>
1510
                  array(
1511 3
                      'timeout' => $timeout,
1512 3
                  ),
1513
          )
1514 3
      );
1515 3
    }
1516
1517 4
    if (!$flags) {
1518 4
      $flags = false;
1519 4
    }
1520
1521 4
    if ($offset === null) {
1522 4
      $offset = 0;
1523 4
    }
1524
1525 4
    if (is_int($maxLength) === true) {
1526 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1527 1
    } else {
1528 4
      $data = file_get_contents($filename, $flags, $context, $offset);
1529
    }
1530
1531
    // return false on error
1532 4
    if ($data === false) {
1533 1
      return false;
1534
    }
1535
1536 3
    if ($convertToUtf8 === true) {
1537 3
      $data = self::encode('UTF-8', $data, false);
1538 3
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1539 3
    }
1540
1541 3
    return $data;
1542
  }
1543
1544
  /**
1545
   * Checks if a file starts with BOM (Byte Order Mark) character.
1546
   *
1547
   * @param string $file_path <p>Path to a valid file.</p>
1548
   *
1549
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1550
   */
1551 1
  public static function file_has_bom($file_path)
1552
  {
1553 1
    return self::string_has_bom(file_get_contents($file_path));
1554
  }
1555
1556
  /**
1557
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1558
   *
1559
   * @param mixed  $var
1560
   * @param int    $normalization_form
1561
   * @param string $leading_combining
1562
   *
1563
   * @return mixed
1564
   */
1565 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1566
  {
1567 9
    switch (gettype($var)) {
1568 9 View Code Duplication
      case 'array':
1569 3
        foreach ($var as $k => $v) {
1570
          /** @noinspection AlterInForeachInspection */
1571 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1572 3
        }
1573 3
        break;
1574 9 View Code Duplication
      case 'object':
1575 2
        foreach ($var as $k => $v) {
1576 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1577 2
        }
1578 2
        break;
1579 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1580
1581 8
        if (false !== strpos($var, "\r")) {
1582
          // Workaround https://bugs.php.net/65732
1583 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1584 2
        }
1585
1586 8
        if (self::is_ascii($var) === false) {
1587
          /** @noinspection PhpUndefinedClassInspection */
1588 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1589 6
            $n = '-';
1590 6
          } else {
1591
            /** @noinspection PhpUndefinedClassInspection */
1592 6
            $n = \Normalizer::normalize($var, $normalization_form);
1593
1594 6
            if (isset($n[0])) {
1595 3
              $var = $n;
1596 3
            } else {
1597 5
              $var = self::encode('UTF-8', $var, true);
1598
            }
1599
          }
1600
1601
          if (
1602 8
              $var[0] >= "\x80"
1603 8
              &&
1604 6
              isset($n[0], $leading_combining[0])
1605 8
              &&
1606 5
              preg_match('/^\p{Mn}/u', $var)
1607 8
          ) {
1608
            // Prevent leading combining chars
1609
            // for NFC-safe concatenations.
1610 2
            $var = $leading_combining . $var;
1611 2
          }
1612 8
        }
1613
1614 8
        break;
1615 9
    }
1616
1617 9
    return $var;
1618
  }
1619
1620
  /**
1621
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1622
   *
1623
   * Gets a specific external variable by name and optionally filters it
1624
   *
1625
   * @link  http://php.net/manual/en/function.filter-input.php
1626
   *
1627
   * @param int    $type          <p>
1628
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1629
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1630
   *                              <b>INPUT_ENV</b>.
1631
   *                              </p>
1632
   * @param string $variable_name <p>
1633
   *                              Name of a variable to get.
1634
   *                              </p>
1635
   * @param int    $filter        [optional] <p>
1636
   *                              The ID of the filter to apply. The
1637
   *                              manual page lists the available filters.
1638
   *                              </p>
1639
   * @param mixed  $options       [optional] <p>
1640
   *                              Associative array of options or bitwise disjunction of flags. If filter
1641
   *                              accepts options, flags can be provided in "flags" field of array.
1642
   *                              </p>
1643
   *
1644
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1645
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1646
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1647
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1648
   * @since 5.2.0
1649
   */
1650 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1651
  {
1652
    if (4 > func_num_args()) {
1653
      $var = filter_input($type, $variable_name, $filter);
1654
    } else {
1655
      $var = filter_input($type, $variable_name, $filter, $options);
1656
    }
1657
1658
    return self::filter($var);
1659
  }
1660
1661
  /**
1662
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1663
   *
1664
   * Gets external variables and optionally filters them
1665
   *
1666
   * @link  http://php.net/manual/en/function.filter-input-array.php
1667
   *
1668
   * @param int   $type       <p>
1669
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1670
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1671
   *                          <b>INPUT_ENV</b>.
1672
   *                          </p>
1673
   * @param mixed $definition [optional] <p>
1674
   *                          An array defining the arguments. A valid key is a string
1675
   *                          containing a variable name and a valid value is either a filter type, or an array
1676
   *                          optionally specifying the filter, flags and options. If the value is an
1677
   *                          array, valid keys are filter which specifies the
1678
   *                          filter type,
1679
   *                          flags which specifies any flags that apply to the
1680
   *                          filter, and options which specifies any options that
1681
   *                          apply to the filter. See the example below for a better understanding.
1682
   *                          </p>
1683
   *                          <p>
1684
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1685
   *                          input array are filtered by this filter.
1686
   *                          </p>
1687
   * @param bool  $add_empty  [optional] <p>
1688
   *                          Add missing keys as <b>NULL</b> to the return value.
1689
   *                          </p>
1690
   *
1691
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1692
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1693
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1694
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1695
   * fails.
1696
   * @since 5.2.0
1697
   */
1698 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1699
  {
1700
    if (2 > func_num_args()) {
1701
      $a = filter_input_array($type);
1702
    } else {
1703
      $a = filter_input_array($type, $definition, $add_empty);
1704
    }
1705
1706
    return self::filter($a);
1707
  }
1708
1709
  /**
1710
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1711
   *
1712
   * Filters a variable with a specified filter
1713
   *
1714
   * @link  http://php.net/manual/en/function.filter-var.php
1715
   *
1716
   * @param mixed $variable <p>
1717
   *                        Value to filter.
1718
   *                        </p>
1719
   * @param int   $filter   [optional] <p>
1720
   *                        The ID of the filter to apply. The
1721
   *                        manual page lists the available filters.
1722
   *                        </p>
1723
   * @param mixed $options  [optional] <p>
1724
   *                        Associative array of options or bitwise disjunction of flags. If filter
1725
   *                        accepts options, flags can be provided in "flags" field of array. For
1726
   *                        the "callback" filter, callable type should be passed. The
1727
   *                        callback must accept one argument, the value to be filtered, and return
1728
   *                        the value after filtering/sanitizing it.
1729
   *                        </p>
1730
   *                        <p>
1731
   *                        <code>
1732
   *                        // for filters that accept options, use this format
1733
   *                        $options = array(
1734
   *                        'options' => array(
1735
   *                        'default' => 3, // value to return if the filter fails
1736
   *                        // other options here
1737
   *                        'min_range' => 0
1738
   *                        ),
1739
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1740
   *                        );
1741
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1742
   *                        // for filter that only accept flags, you can pass them directly
1743
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1744
   *                        // for filter that only accept flags, you can also pass as an array
1745
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1746
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1747
   *                        // callback validate filter
1748
   *                        function foo($value)
1749
   *                        {
1750
   *                        // Expected format: Surname, GivenNames
1751
   *                        if (strpos($value, ", ") === false) return false;
1752
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1753
   *                        $empty = (empty($surname) || empty($givennames));
1754
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1755
   *                        if ($empty || $notstrings) {
1756
   *                        return false;
1757
   *                        } else {
1758
   *                        return $value;
1759
   *                        }
1760
   *                        }
1761
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1762
   *                        </code>
1763
   *                        </p>
1764
   *
1765
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1766
   * @since 5.2.0
1767
   */
1768 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1769
  {
1770 1
    if (3 > func_num_args()) {
1771 1
      $variable = filter_var($variable, $filter);
1772 1
    } else {
1773 1
      $variable = filter_var($variable, $filter, $options);
1774
    }
1775
1776 1
    return self::filter($variable);
1777
  }
1778
1779
  /**
1780
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1781
   *
1782
   * Gets multiple variables and optionally filters them
1783
   *
1784
   * @link  http://php.net/manual/en/function.filter-var-array.php
1785
   *
1786
   * @param array $data       <p>
1787
   *                          An array with string keys containing the data to filter.
1788
   *                          </p>
1789
   * @param mixed $definition [optional] <p>
1790
   *                          An array defining the arguments. A valid key is a string
1791
   *                          containing a variable name and a valid value is either a
1792
   *                          filter type, or an
1793
   *                          array optionally specifying the filter, flags and options.
1794
   *                          If the value is an array, valid keys are filter
1795
   *                          which specifies the filter type,
1796
   *                          flags which specifies any flags that apply to the
1797
   *                          filter, and options which specifies any options that
1798
   *                          apply to the filter. See the example below for a better understanding.
1799
   *                          </p>
1800
   *                          <p>
1801
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1802
   *                          input array are filtered by this filter.
1803
   *                          </p>
1804
   * @param bool  $add_empty  [optional] <p>
1805
   *                          Add missing keys as <b>NULL</b> to the return value.
1806
   *                          </p>
1807
   *
1808
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1809
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1810
   * the variable is not set.
1811
   * @since 5.2.0
1812
   */
1813 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1814
  {
1815 1
    if (2 > func_num_args()) {
1816 1
      $a = filter_var_array($data);
1817 1
    } else {
1818 1
      $a = filter_var_array($data, $definition, $add_empty);
1819
    }
1820
1821 1
    return self::filter($a);
1822
  }
1823
1824
  /**
1825
   * Check if the number of unicode characters are not more than the specified integer.
1826
   *
1827
   * @param string $str      The original string to be checked.
1828
   * @param int    $box_size The size in number of chars to be checked against string.
1829
   *
1830
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1831
   */
1832 1
  public static function fits_inside($str, $box_size)
1833
  {
1834 1
    return (self::strlen($str) <= $box_size);
1835
  }
1836
1837
  /**
1838
   * Try to fix simple broken UTF-8 strings.
1839
   *
1840
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1841
   *
1842
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1843
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1844
   * See: http://en.wikipedia.org/wiki/Windows-1252
1845
   *
1846
   * @param string $str <p>The input string</p>
1847
   *
1848
   * @return string
1849
   */
1850 27 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1851
  {
1852
    // init
1853 27
    $str = (string)$str;
1854
1855 27
    if (!isset($str[0])) {
1856 2
      return '';
1857
    }
1858
1859 27
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1860 27
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1861
1862 27
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1863 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1864 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1865 1
    }
1866
1867 27
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1868
  }
1869
1870
  /**
1871
   * Fix a double (or multiple) encoded UTF8 string.
1872
   *
1873
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1874
   *
1875
   * @return string|string[] <p>Will return the fixed input-"array" or
1876
   *                         the fixed input-"string".</p>
1877
   */
1878 1
  public static function fix_utf8($str)
1879
  {
1880 1
    if (is_array($str) === true) {
1881
1882
      /** @noinspection ForeachSourceInspection */
1883 1
      foreach ($str as $k => $v) {
1884
        /** @noinspection AlterInForeachInspection */
1885
        /** @noinspection OffsetOperationsInspection */
1886 1
        $str[$k] = self::fix_utf8($v);
1887 1
      }
1888
1889 1
      return $str;
1890
    }
1891
1892 1
    $last = '';
1893 1
    while ($last !== $str) {
1894 1
      $last = $str;
1895 1
      $str = self::to_utf8(
1896 1
          self::utf8_decode($str, true)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str, true)) on line 1895 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1897 1
      );
1898 1
    }
1899
1900 1
    return $str;
1901
  }
1902
1903
  /**
1904
   * Get character of a specific character.
1905
   *
1906
   * @param string $char
1907
   *
1908
   * @return string <p>'RTL' or 'LTR'</p>
1909
   */
1910 1
  public static function getCharDirection($char)
1911
  {
1912 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1913
      self::checkForSupport();
1914
    }
1915
1916 1
    if (self::$SUPPORT['intlChar'] === true) {
1917
      $tmpReturn = \IntlChar::charDirection($char);
1918
1919
      // from "IntlChar"-Class
1920
      $charDirection = array(
1921
          'RTL' => array(1, 13, 14, 15, 21),
1922
          'LTR' => array(0, 11, 12, 20),
1923
      );
1924
1925
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1926
        return 'LTR';
1927
      }
1928
1929
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1930
        return 'RTL';
1931
      }
1932
    }
1933
1934 1
    $c = static::chr_to_decimal($char);
1935
1936 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1937 1
      return 'LTR';
1938
    }
1939
1940 1
    if (0x85e >= $c) {
1941
1942 1
      if (0x5be === $c ||
1943 1
          0x5c0 === $c ||
1944 1
          0x5c3 === $c ||
1945 1
          0x5c6 === $c ||
1946 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1947 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1948 1
          0x608 === $c ||
1949 1
          0x60b === $c ||
1950 1
          0x60d === $c ||
1951 1
          0x61b === $c ||
1952 1
          (0x61e <= $c && 0x64a >= $c) ||
1953 1
          (0x66d <= $c && 0x66f >= $c) ||
1954 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1955 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1956 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1957 1
          (0x6fa <= $c && 0x70d >= $c) ||
1958 1
          0x710 === $c ||
1959 1
          (0x712 <= $c && 0x72f >= $c) ||
1960 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1961 1
          0x7b1 === $c ||
1962 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1963 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1964 1
          0x7fa === $c ||
1965 1
          (0x800 <= $c && 0x815 >= $c) ||
1966 1
          0x81a === $c ||
1967 1
          0x824 === $c ||
1968 1
          0x828 === $c ||
1969 1
          (0x830 <= $c && 0x83e >= $c) ||
1970 1
          (0x840 <= $c && 0x858 >= $c) ||
1971
          0x85e === $c
1972 1
      ) {
1973 1
        return 'RTL';
1974
      }
1975
1976 1
    } elseif (0x200f === $c) {
1977
1978
      return 'RTL';
1979
1980 1
    } elseif (0xfb1d <= $c) {
1981
1982 1
      if (0xfb1d === $c ||
1983 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1984 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1985 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1986 1
          0xfb3e === $c ||
1987 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1988 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1989 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1990 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1991 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1992 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1993 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1994 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1995 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1996 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1997 1
          0x10808 === $c ||
1998 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1999 1
          (0x10837 <= $c && 0x10838 >= $c) ||
2000 1
          0x1083c === $c ||
2001 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2002 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2003 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2004 1
          (0x10920 <= $c && 0x10939 >= $c) ||
2005 1
          0x1093f === $c ||
2006 1
          0x10a00 === $c ||
2007 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2008 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2009 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2010 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2011 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2012 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2013 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2014 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2015 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2016
          (0x10b78 <= $c && 0x10b7f >= $c)
2017 1
      ) {
2018 1
        return 'RTL';
2019
      }
2020
    }
2021
2022 1
    return 'LTR';
2023
  }
2024
2025
  /**
2026
   * get data from "/data/*.ser"
2027
   *
2028
   * @param string $file
2029
   *
2030
   * @return bool|string|array|int <p>Will return false on error.</p>
2031
   */
2032 5
  private static function getData($file)
2033
  {
2034 5
    $file = __DIR__ . '/data/' . $file . '.php';
2035 5
    if (file_exists($file)) {
2036
      /** @noinspection PhpIncludeInspection */
2037 5
      return require $file;
2038
    }
2039
2040 1
    return false;
2041
  }
2042
2043
  /**
2044
   * Check for php-support.
2045
   *
2046
   * @param string|null $key
2047
   *
2048
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2049
   *               return bool-value, if $key is used and available<br>
2050
   *               otherwise return null</p>
2051
   */
2052 19
  public static function getSupportInfo($key = null)
2053
  {
2054 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2055
      self::checkForSupport();
2056
    }
2057
2058 19
    if ($key === null) {
2059 2
      return self::$SUPPORT;
2060
    }
2061
2062 18
    if (!isset(self::$SUPPORT[$key])) {
2063 1
      return null;
2064
    }
2065
2066 17
    return self::$SUPPORT[$key];
2067
  }
2068
2069
  /**
2070
   * alias for "UTF8::string_has_bom()"
2071
   *
2072
   * @see UTF8::string_has_bom()
2073
   *
2074
   * @param string $str
2075
   *
2076
   * @return bool
2077
   *
2078
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2079
   */
2080
  public static function hasBom($str)
2081
  {
2082
    return self::string_has_bom($str);
2083
  }
2084
2085
  /**
2086
   * Converts a hexadecimal-value into an UTF-8 character.
2087
   *
2088
   * @param string $hexdec <p>The hexadecimal value.</p>
2089
   *
2090
   * @return string|false <p>One single UTF-8 character.</p>
2091
   */
2092 2
  public static function hex_to_chr($hexdec)
2093
  {
2094 2
    return self::decimal_to_chr(hexdec($hexdec));
2095
  }
2096
2097
  /**
2098
   * Converts hexadecimal U+xxxx code point representation to integer.
2099
   *
2100
   * INFO: opposite to UTF8::int_to_hex()
2101
   *
2102
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2103
   *
2104
   * @return int|false <p>The code point, or false on failure.</p>
2105
   */
2106 1
  public static function hex_to_int($hexDec)
2107
  {
2108 1
    $hexDec = (string)$hexDec;
2109
2110 1
    if (!isset($hexDec[0])) {
2111 1
      return false;
2112
    }
2113
2114 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2115 1
      return intval($match[1], 16);
2116
    }
2117
2118 1
    return false;
2119
  }
2120
2121
  /**
2122
   * alias for "UTF8::html_entity_decode()"
2123
   *
2124
   * @see UTF8::html_entity_decode()
2125
   *
2126
   * @param string $str
2127
   * @param int    $flags
2128
   * @param string $encoding
2129
   *
2130
   * @return string
2131
   */
2132 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2133
  {
2134 1
    return self::html_entity_decode($str, $flags, $encoding);
2135
  }
2136
2137
  /**
2138
   * Converts a UTF-8 string to a series of HTML numbered entities.
2139
   *
2140
   * INFO: opposite to UTF8::html_decode()
2141
   *
2142
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2143
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2144
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2145
   *
2146
   * @return string <p>HTML numbered entities.</p>
2147
   */
2148 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2149
  {
2150
    // init
2151 2
    $str = (string)$str;
2152
2153 2
    if (!isset($str[0])) {
2154 1
      return '';
2155
    }
2156
2157 2
    if ($encoding !== 'UTF-8') {
2158 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2159 1
    }
2160
2161
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2162 2
    if (function_exists('mb_encode_numericentity')) {
2163
2164 2
      $startCode = 0x00;
2165 2
      if ($keepAsciiChars === true) {
2166 1
        $startCode = 0x80;
2167 1
      }
2168
2169 2
      return mb_encode_numericentity(
2170 2
          $str,
2171 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2172
          $encoding
2173 2
      );
2174
    }
2175
2176
    return implode(
2177
        '',
2178
        array_map(
2179
            function ($data) use ($keepAsciiChars, $encoding) {
2180
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2181
            },
2182
            self::split($str)
2183
        )
2184
    );
2185
  }
2186
2187
  /**
2188
   * UTF-8 version of html_entity_decode()
2189
   *
2190
   * The reason we are not using html_entity_decode() by itself is because
2191
   * while it is not technically correct to leave out the semicolon
2192
   * at the end of an entity most browsers will still interpret the entity
2193
   * correctly. html_entity_decode() does not convert entities without
2194
   * semicolons, so we are left with our own little solution here. Bummer.
2195
   *
2196
   * Convert all HTML entities to their applicable characters
2197
   *
2198
   * INFO: opposite to UTF8::html_encode()
2199
   *
2200
   * @link http://php.net/manual/en/function.html-entity-decode.php
2201
   *
2202
   * @param string $str      <p>
2203
   *                         The input string.
2204
   *                         </p>
2205
   * @param int    $flags    [optional] <p>
2206
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2207
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2208
   *                         <table>
2209
   *                         Available <i>flags</i> constants
2210
   *                         <tr valign="top">
2211
   *                         <td>Constant Name</td>
2212
   *                         <td>Description</td>
2213
   *                         </tr>
2214
   *                         <tr valign="top">
2215
   *                         <td><b>ENT_COMPAT</b></td>
2216
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2217
   *                         </tr>
2218
   *                         <tr valign="top">
2219
   *                         <td><b>ENT_QUOTES</b></td>
2220
   *                         <td>Will convert both double and single quotes.</td>
2221
   *                         </tr>
2222
   *                         <tr valign="top">
2223
   *                         <td><b>ENT_NOQUOTES</b></td>
2224
   *                         <td>Will leave both double and single quotes unconverted.</td>
2225
   *                         </tr>
2226
   *                         <tr valign="top">
2227
   *                         <td><b>ENT_HTML401</b></td>
2228
   *                         <td>
2229
   *                         Handle code as HTML 4.01.
2230
   *                         </td>
2231
   *                         </tr>
2232
   *                         <tr valign="top">
2233
   *                         <td><b>ENT_XML1</b></td>
2234
   *                         <td>
2235
   *                         Handle code as XML 1.
2236
   *                         </td>
2237
   *                         </tr>
2238
   *                         <tr valign="top">
2239
   *                         <td><b>ENT_XHTML</b></td>
2240
   *                         <td>
2241
   *                         Handle code as XHTML.
2242
   *                         </td>
2243
   *                         </tr>
2244
   *                         <tr valign="top">
2245
   *                         <td><b>ENT_HTML5</b></td>
2246
   *                         <td>
2247
   *                         Handle code as HTML 5.
2248
   *                         </td>
2249
   *                         </tr>
2250
   *                         </table>
2251
   *                         </p>
2252
   * @param string $encoding [optional] <p>Encoding to use.</p>
2253
   *
2254
   * @return string <p>The decoded string.</p>
2255
   */
2256 17
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2257
  {
2258
    // init
2259 17
    $str = (string)$str;
2260
2261 17
    if (!isset($str[0])) {
2262 6
      return '';
2263
    }
2264
2265 17
    if (!isset($str[3])) { // examples: &; || &x;
2266 10
      return $str;
2267
    }
2268
2269
    if (
2270 16
        strpos($str, '&') === false
2271 16
        ||
2272
        (
2273 16
            strpos($str, '&#') === false
2274 16
            &&
2275 10
            strpos($str, ';') === false
2276 10
        )
2277 16
    ) {
2278 9
      return $str;
2279
    }
2280
2281 16
    if ($encoding !== 'UTF-8') {
2282 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2283 2
    }
2284
2285 16
    if ($flags === null) {
2286 5
      if (Bootup::is_php('5.4') === true) {
2287 5
        $flags = ENT_QUOTES | ENT_HTML5;
2288 5
      } else {
2289
        $flags = ENT_QUOTES;
2290
      }
2291 5
    }
2292
2293 View Code Duplication
    if (
2294
        $encoding !== 'UTF-8'
2295 16
        &&
2296
        $encoding !== 'WINDOWS-1252'
2297 16
        &&
2298 2
        self::$SUPPORT['mbstring'] === false
2299 16
    ) {
2300
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2301
    }
2302
2303
    do {
2304 16
      $str_compare = $str;
2305
2306 16
      $str = preg_replace_callback(
2307 16
          "/&#\d{2,6};/",
2308
          function ($matches) use ($encoding) {
2309 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2310
2311 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2312 13
              return $returnTmp;
2313
            }
2314
2315 7
            return $matches[0];
2316 16
          },
2317
          $str
2318 16
      );
2319
2320
      // decode numeric & UTF16 two byte entities
2321 16
      $str = html_entity_decode(
2322 16
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2323 16
          $flags,
2324
          $encoding
2325 16
      );
2326
2327 16
    } while ($str_compare !== $str);
2328
2329 16
    return $str;
2330
  }
2331
2332
  /**
2333
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2334
   *
2335
   * @link http://php.net/manual/en/function.htmlentities.php
2336
   *
2337
   * @param string $str           <p>
2338
   *                              The input string.
2339
   *                              </p>
2340
   * @param int    $flags         [optional] <p>
2341
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2342
   *                              invalid code unit sequences and the used document type. The default is
2343
   *                              ENT_COMPAT | ENT_HTML401.
2344
   *                              <table>
2345
   *                              Available <i>flags</i> constants
2346
   *                              <tr valign="top">
2347
   *                              <td>Constant Name</td>
2348
   *                              <td>Description</td>
2349
   *                              </tr>
2350
   *                              <tr valign="top">
2351
   *                              <td><b>ENT_COMPAT</b></td>
2352
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2353
   *                              </tr>
2354
   *                              <tr valign="top">
2355
   *                              <td><b>ENT_QUOTES</b></td>
2356
   *                              <td>Will convert both double and single quotes.</td>
2357
   *                              </tr>
2358
   *                              <tr valign="top">
2359
   *                              <td><b>ENT_NOQUOTES</b></td>
2360
   *                              <td>Will leave both double and single quotes unconverted.</td>
2361
   *                              </tr>
2362
   *                              <tr valign="top">
2363
   *                              <td><b>ENT_IGNORE</b></td>
2364
   *                              <td>
2365
   *                              Silently discard invalid code unit sequences instead of returning
2366
   *                              an empty string. Using this flag is discouraged as it
2367
   *                              may have security implications.
2368
   *                              </td>
2369
   *                              </tr>
2370
   *                              <tr valign="top">
2371
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2372
   *                              <td>
2373
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2374
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2375
   *                              </td>
2376
   *                              </tr>
2377
   *                              <tr valign="top">
2378
   *                              <td><b>ENT_DISALLOWED</b></td>
2379
   *                              <td>
2380
   *                              Replace invalid code points for the given document type with a
2381
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2382
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2383
   *                              instance, to ensure the well-formedness of XML documents with
2384
   *                              embedded external content.
2385
   *                              </td>
2386
   *                              </tr>
2387
   *                              <tr valign="top">
2388
   *                              <td><b>ENT_HTML401</b></td>
2389
   *                              <td>
2390
   *                              Handle code as HTML 4.01.
2391
   *                              </td>
2392
   *                              </tr>
2393
   *                              <tr valign="top">
2394
   *                              <td><b>ENT_XML1</b></td>
2395
   *                              <td>
2396
   *                              Handle code as XML 1.
2397
   *                              </td>
2398
   *                              </tr>
2399
   *                              <tr valign="top">
2400
   *                              <td><b>ENT_XHTML</b></td>
2401
   *                              <td>
2402
   *                              Handle code as XHTML.
2403
   *                              </td>
2404
   *                              </tr>
2405
   *                              <tr valign="top">
2406
   *                              <td><b>ENT_HTML5</b></td>
2407
   *                              <td>
2408
   *                              Handle code as HTML 5.
2409
   *                              </td>
2410
   *                              </tr>
2411
   *                              </table>
2412
   *                              </p>
2413
   * @param string $encoding      [optional] <p>
2414
   *                              Like <b>htmlspecialchars</b>,
2415
   *                              <b>htmlentities</b> takes an optional third argument
2416
   *                              <i>encoding</i> which defines encoding used in
2417
   *                              conversion.
2418
   *                              Although this argument is technically optional, you are highly
2419
   *                              encouraged to specify the correct value for your code.
2420
   *                              </p>
2421
   * @param bool   $double_encode [optional] <p>
2422
   *                              When <i>double_encode</i> is turned off PHP will not
2423
   *                              encode existing html entities. The default is to convert everything.
2424
   *                              </p>
2425
   *
2426
   *
2427
   * @return string the encoded string.
2428
   * </p>
2429
   * <p>
2430
   * If the input <i>string</i> contains an invalid code unit
2431
   * sequence within the given <i>encoding</i> an empty string
2432
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2433
   * <b>ENT_SUBSTITUTE</b> flags are set.
2434
   */
2435 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2436
  {
2437 2
    if ($encoding !== 'UTF-8') {
2438 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2439 1
    }
2440
2441 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2442
2443
    /**
2444
     * PHP doesn't replace a backslash to its html entity since this is something
2445
     * that's mostly used to escape characters when inserting in a database. Since
2446
     * we're using a decent database layer, we don't need this shit and we're replacing
2447
     * the double backslashes by its' html entity equivalent.
2448
     *
2449
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2450
     */
2451 2
    $str = str_replace('\\', '&#92;', $str);
2452
2453 2
    if ($encoding !== 'UTF-8') {
2454 1
      return $str;
2455
    }
2456
2457 2
    $byteLengths = self::chr_size_list($str);
2458 2
    $search = array();
2459 2
    $replacements = array();
2460 2
    foreach ($byteLengths as $counter => $byteLength) {
2461 2
      if ($byteLength >= 3) {
2462 1
        $char = self::access($str, $counter);
2463
2464 1
        if (!isset($replacements[$char])) {
2465 1
          $search[$char] = $char;
2466 1
          $replacements[$char] = self::html_encode($char);
2467 1
        }
2468 1
      }
2469 2
    }
2470
2471 2
    return str_replace($search, $replacements, $str);
2472
  }
2473
2474
  /**
2475
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2476
   *
2477
   * INFO: Take a look at "UTF8::htmlentities()"
2478
   *
2479
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2480
   *
2481
   * @param string $str           <p>
2482
   *                              The string being converted.
2483
   *                              </p>
2484
   * @param int    $flags         [optional] <p>
2485
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2486
   *                              invalid code unit sequences and the used document type. The default is
2487
   *                              ENT_COMPAT | ENT_HTML401.
2488
   *                              <table>
2489
   *                              Available <i>flags</i> constants
2490
   *                              <tr valign="top">
2491
   *                              <td>Constant Name</td>
2492
   *                              <td>Description</td>
2493
   *                              </tr>
2494
   *                              <tr valign="top">
2495
   *                              <td><b>ENT_COMPAT</b></td>
2496
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2497
   *                              </tr>
2498
   *                              <tr valign="top">
2499
   *                              <td><b>ENT_QUOTES</b></td>
2500
   *                              <td>Will convert both double and single quotes.</td>
2501
   *                              </tr>
2502
   *                              <tr valign="top">
2503
   *                              <td><b>ENT_NOQUOTES</b></td>
2504
   *                              <td>Will leave both double and single quotes unconverted.</td>
2505
   *                              </tr>
2506
   *                              <tr valign="top">
2507
   *                              <td><b>ENT_IGNORE</b></td>
2508
   *                              <td>
2509
   *                              Silently discard invalid code unit sequences instead of returning
2510
   *                              an empty string. Using this flag is discouraged as it
2511
   *                              may have security implications.
2512
   *                              </td>
2513
   *                              </tr>
2514
   *                              <tr valign="top">
2515
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2516
   *                              <td>
2517
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2518
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2519
   *                              </td>
2520
   *                              </tr>
2521
   *                              <tr valign="top">
2522
   *                              <td><b>ENT_DISALLOWED</b></td>
2523
   *                              <td>
2524
   *                              Replace invalid code points for the given document type with a
2525
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2526
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2527
   *                              instance, to ensure the well-formedness of XML documents with
2528
   *                              embedded external content.
2529
   *                              </td>
2530
   *                              </tr>
2531
   *                              <tr valign="top">
2532
   *                              <td><b>ENT_HTML401</b></td>
2533
   *                              <td>
2534
   *                              Handle code as HTML 4.01.
2535
   *                              </td>
2536
   *                              </tr>
2537
   *                              <tr valign="top">
2538
   *                              <td><b>ENT_XML1</b></td>
2539
   *                              <td>
2540
   *                              Handle code as XML 1.
2541
   *                              </td>
2542
   *                              </tr>
2543
   *                              <tr valign="top">
2544
   *                              <td><b>ENT_XHTML</b></td>
2545
   *                              <td>
2546
   *                              Handle code as XHTML.
2547
   *                              </td>
2548
   *                              </tr>
2549
   *                              <tr valign="top">
2550
   *                              <td><b>ENT_HTML5</b></td>
2551
   *                              <td>
2552
   *                              Handle code as HTML 5.
2553
   *                              </td>
2554
   *                              </tr>
2555
   *                              </table>
2556
   *                              </p>
2557
   * @param string $encoding      [optional] <p>
2558
   *                              Defines encoding used in conversion.
2559
   *                              </p>
2560
   *                              <p>
2561
   *                              For the purposes of this function, the encodings
2562
   *                              ISO-8859-1, ISO-8859-15,
2563
   *                              UTF-8, cp866,
2564
   *                              cp1251, cp1252, and
2565
   *                              KOI8-R are effectively equivalent, provided the
2566
   *                              <i>string</i> itself is valid for the encoding, as
2567
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2568
   *                              the same positions in all of these encodings.
2569
   *                              </p>
2570
   * @param bool   $double_encode [optional] <p>
2571
   *                              When <i>double_encode</i> is turned off PHP will not
2572
   *                              encode existing html entities, the default is to convert everything.
2573
   *                              </p>
2574
   *
2575
   * @return string The converted string.
2576
   * </p>
2577
   * <p>
2578
   * If the input <i>string</i> contains an invalid code unit
2579
   * sequence within the given <i>encoding</i> an empty string
2580
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2581
   * <b>ENT_SUBSTITUTE</b> flags are set.
2582
   */
2583 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2584
  {
2585 1
    if ($encoding !== 'UTF-8') {
2586 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2587 1
    }
2588
2589 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2590
  }
2591
2592
  /**
2593
   * Checks whether iconv is available on the server.
2594
   *
2595
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2596
   */
2597 1
  public static function iconv_loaded()
2598
  {
2599 1
    $return = extension_loaded('iconv') ? true : false;
2600
2601
    // INFO: "default_charset" is already set by the "Bootup"-class
2602
2603 1
    if (Bootup::is_php('5.6') === false) {
2604
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2605 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2606 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2607 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2608 1
    }
2609
2610 1
    return $return;
2611
  }
2612
2613
  /**
2614
   * alias for "UTF8::decimal_to_chr()"
2615
   *
2616
   * @see UTF8::decimal_to_chr()
2617
   *
2618
   * @param mixed $int
2619
   *
2620
   * @return string
2621
   */
2622 2
  public static function int_to_chr($int)
2623
  {
2624 2
    return self::decimal_to_chr($int);
2625
  }
2626
2627
  /**
2628
   * Converts Integer to hexadecimal U+xxxx code point representation.
2629
   *
2630
   * INFO: opposite to UTF8::hex_to_int()
2631
   *
2632
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2633
   * @param string $pfix [optional]
2634
   *
2635
   * @return string <p>The code point, or empty string on failure.</p>
2636
   */
2637 3
  public static function int_to_hex($int, $pfix = 'U+')
2638
  {
2639 3
    if ((int)$int === $int) {
2640 3
      $hex = dechex($int);
2641
2642 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2643
2644 3
      return $pfix . $hex;
2645
    }
2646
2647 1
    return '';
2648
  }
2649
2650
  /**
2651
   * Checks whether intl-char is available on the server.
2652
   *
2653
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2654
   */
2655 1
  public static function intlChar_loaded()
2656
  {
2657
    return (
2658 1
        Bootup::is_php('7.0') === true
2659 1
        &&
2660
        class_exists('IntlChar') === true
2661 1
    );
2662
  }
2663
2664
  /**
2665
   * Checks whether intl is available on the server.
2666
   *
2667
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2668
   */
2669 4
  public static function intl_loaded()
2670
  {
2671 4
    return extension_loaded('intl') ? true : false;
2672
  }
2673
2674
  /**
2675
   * alias for "UTF8::is_ascii()"
2676
   *
2677
   * @see UTF8::is_ascii()
2678
   *
2679
   * @param string $str
2680
   *
2681
   * @return boolean
2682
   *
2683
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2684
   */
2685
  public static function isAscii($str)
2686
  {
2687
    return self::is_ascii($str);
2688
  }
2689
2690
  /**
2691
   * alias for "UTF8::is_base64()"
2692
   *
2693
   * @see UTF8::is_base64()
2694
   *
2695
   * @param string $str
2696
   *
2697
   * @return bool
2698
   *
2699
   * @deprecated <p>use "UTF8::is_base64()"</p>
2700
   */
2701
  public static function isBase64($str)
2702
  {
2703
    return self::is_base64($str);
2704
  }
2705
2706
  /**
2707
   * alias for "UTF8::is_binary()"
2708
   *
2709
   * @see UTF8::is_binary()
2710
   *
2711
   * @param string $str
2712
   *
2713
   * @return bool
2714
   *
2715
   * @deprecated <p>use "UTF8::is_binary()"</p>
2716
   */
2717
  public static function isBinary($str)
2718
  {
2719
    return self::is_binary($str);
2720
  }
2721
2722
  /**
2723
   * alias for "UTF8::is_bom()"
2724
   *
2725
   * @see UTF8::is_bom()
2726
   *
2727
   * @param string $utf8_chr
2728
   *
2729
   * @return boolean
2730
   *
2731
   * @deprecated <p>use "UTF8::is_bom()"</p>
2732
   */
2733
  public static function isBom($utf8_chr)
2734
  {
2735
    return self::is_bom($utf8_chr);
2736
  }
2737
2738
  /**
2739
   * alias for "UTF8::is_html()"
2740
   *
2741
   * @see UTF8::is_html()
2742
   *
2743
   * @param string $str
2744
   *
2745
   * @return boolean
2746
   *
2747
   * @deprecated <p>use "UTF8::is_html()"</p>
2748
   */
2749
  public static function isHtml($str)
2750
  {
2751
    return self::is_html($str);
2752
  }
2753
2754
  /**
2755
   * alias for "UTF8::is_json()"
2756
   *
2757
   * @see UTF8::is_json()
2758
   *
2759
   * @param string $str
2760
   *
2761
   * @return bool
2762
   *
2763
   * @deprecated <p>use "UTF8::is_json()"</p>
2764
   */
2765
  public static function isJson($str)
2766
  {
2767
    return self::is_json($str);
2768
  }
2769
2770
  /**
2771
   * alias for "UTF8::is_utf16()"
2772
   *
2773
   * @see UTF8::is_utf16()
2774
   *
2775
   * @param string $str
2776
   *
2777
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2778
   *
2779
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2780
   */
2781
  public static function isUtf16($str)
2782
  {
2783
    return self::is_utf16($str);
2784
  }
2785
2786
  /**
2787
   * alias for "UTF8::is_utf32()"
2788
   *
2789
   * @see UTF8::is_utf32()
2790
   *
2791
   * @param string $str
2792
   *
2793
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2794
   *
2795
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2796
   */
2797
  public static function isUtf32($str)
2798
  {
2799
    return self::is_utf32($str);
2800
  }
2801
2802
  /**
2803
   * alias for "UTF8::is_utf8()"
2804
   *
2805
   * @see UTF8::is_utf8()
2806
   *
2807
   * @param string $str
2808
   * @param bool   $strict
2809
   *
2810
   * @return bool
2811
   *
2812
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2813
   */
2814
  public static function isUtf8($str, $strict = false)
2815
  {
2816
    return self::is_utf8($str, $strict);
2817
  }
2818
2819
  /**
2820
   * Checks if a string is 7 bit ASCII.
2821
   *
2822
   * @param string $str <p>The string to check.</p>
2823
   *
2824
   * @return bool <p>
2825
   *              <strong>true</strong> if it is ASCII<br>
2826
   *              <strong>false</strong> otherwise
2827
   *              </p>
2828
   */
2829 54
  public static function is_ascii($str)
2830
  {
2831 54
    $str = (string)$str;
2832
2833 54
    if (!isset($str[0])) {
2834 6
      return true;
2835
    }
2836
2837 53
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2838
  }
2839
2840
  /**
2841
   * Returns true if the string is base64 encoded, false otherwise.
2842
   *
2843
   * @param string $str <p>The input string.</p>
2844
   *
2845
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2846
   */
2847 1
  public static function is_base64($str)
2848
  {
2849 1
    $str = (string)$str;
2850
2851 1
    if (!isset($str[0])) {
2852 1
      return false;
2853
    }
2854
2855 1
    $base64String = (string)base64_decode($str, true);
2856 1
    if ($base64String && base64_encode($base64String) === $str) {
2857 1
      return true;
2858
    }
2859
2860 1
    return false;
2861
  }
2862
2863
  /**
2864
   * Check if the input is binary... (is look like a hack).
2865
   *
2866
   * @param mixed $input
2867
   *
2868
   * @return bool
2869
   */
2870 17
  public static function is_binary($input)
2871
  {
2872 17
    $input = (string)$input;
2873
2874 17
    if (!isset($input[0])) {
2875 4
      return false;
2876
    }
2877
2878 17
    if (preg_match('~^[01]+$~', $input)) {
2879 4
      return true;
2880
    }
2881
2882 17
    $testLength = strlen($input);
2883 17
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2884 5
      return true;
2885
    }
2886
2887 16
    if (substr_count($input, "\x00") > 0) {
2888 1
      return true;
2889
    }
2890
2891 16
    return false;
2892
  }
2893
2894
  /**
2895
   * Check if the file is binary.
2896
   *
2897
   * @param string $file
2898
   *
2899
   * @return boolean
2900
   */
2901 1
  public static function is_binary_file($file)
2902
  {
2903
    try {
2904 1
      $fp = fopen($file, 'rb');
2905 1
      $block = fread($fp, 512);
2906 1
      fclose($fp);
2907 1
    } catch (\Exception $e) {
2908
      $block = '';
2909
    }
2910
2911 1
    return self::is_binary($block);
2912
  }
2913
2914
  /**
2915
   * Checks if the given string is equal to any "Byte Order Mark".
2916
   *
2917
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2918
   *
2919
   * @param string $str <p>The input string.</p>
2920
   *
2921
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2922
   */
2923 1
  public static function is_bom($str)
2924
  {
2925 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2926 1
      if ($str === $bomString) {
2927 1
        return true;
2928
      }
2929 1
    }
2930
2931 1
    return false;
2932
  }
2933
2934
  /**
2935
   * Check if the string contains any html-tags <lall>.
2936
   *
2937
   * @param string $str <p>The input string.</p>
2938
   *
2939
   * @return boolean
2940
   */
2941 1
  public static function is_html($str)
2942
  {
2943 1
    $str = (string)$str;
2944
2945 1
    if (!isset($str[0])) {
2946 1
      return false;
2947
    }
2948
2949
    // init
2950 1
    $matches = array();
2951
2952 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2953
2954 1
    if (count($matches) === 0) {
2955 1
      return false;
2956
    }
2957
2958 1
    return true;
2959
  }
2960
2961
  /**
2962
   * Try to check if "$str" is an json-string.
2963
   *
2964
   * @param string $str <p>The input string.</p>
2965
   *
2966
   * @return bool
2967
   */
2968 1
  public static function is_json($str)
2969
  {
2970 1
    $str = (string)$str;
2971
2972 1
    if (!isset($str[0])) {
2973 1
      return false;
2974
    }
2975
2976 1
    $json = self::json_decode($str);
2977
2978
    if (
2979
        (
2980 1
            is_object($json) === true
2981 1
            ||
2982 1
            is_array($json) === true
2983 1
        )
2984 1
        &&
2985 1
        json_last_error() === JSON_ERROR_NONE
2986 1
    ) {
2987 1
      return true;
2988
    }
2989
2990 1
    return false;
2991
  }
2992
2993
  /**
2994
   * Check if the string is UTF-16.
2995
   *
2996
   * @param string $str <p>The input string.</p>
2997
   *
2998
   * @return int|false <p>
2999
   *                   <strong>false</strong> if is't not UTF-16,<br>
3000
   *                   <strong>1</strong> for UTF-16LE,<br>
3001
   *                   <strong>2</strong> for UTF-16BE.
3002
   *                   </p>
3003
   */
3004 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3005
  {
3006 5
    $str = self::remove_bom($str);
3007
3008 5
    if (self::is_binary($str) === true) {
3009
3010 5
      $maybeUTF16LE = 0;
3011 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3012 5
      if ($test) {
3013 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3014 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3015 5
        if ($test3 === $test) {
3016 5
          $strChars = self::count_chars($str, true);
3017 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3018 5
            if (in_array($test3char, $strChars, true) === true) {
3019 2
              $maybeUTF16LE++;
3020 2
            }
3021 5
          }
3022 5
        }
3023 5
      }
3024
3025 5
      $maybeUTF16BE = 0;
3026 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3027 5
      if ($test) {
3028 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3029 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3030 5
        if ($test3 === $test) {
3031 5
          $strChars = self::count_chars($str, true);
3032 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3033 5
            if (in_array($test3char, $strChars, true) === true) {
3034 4
              $maybeUTF16BE++;
3035 4
            }
3036 5
          }
3037 5
        }
3038 5
      }
3039
3040 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3041 4
        if ($maybeUTF16LE > $maybeUTF16BE) {
3042 2
          return 1;
3043
        }
3044
3045 4
        return 2;
3046
      }
3047
3048 2
    }
3049
3050 2
    return false;
3051
  }
3052
3053
  /**
3054
   * Check if the string is UTF-32.
3055
   *
3056
   * @param string $str
3057
   *
3058
   * @return int|false <p>
3059
   *                   <strong>false</strong> if is't not UTF-32,<br>
3060
   *                   <strong>1</strong> for UTF-32LE,<br>
3061
   *                   <strong>2</strong> for UTF-32BE.
3062
   *                   </p>
3063
   */
3064 2 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3065
  {
3066 2
    $str = self::remove_bom($str);
3067
3068 2
    if (self::is_binary($str) === true) {
3069
3070 2
      $maybeUTF32LE = 0;
3071 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3072 2
      if ($test) {
3073 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3074 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3075 2
        if ($test3 === $test) {
3076 2
          $strChars = self::count_chars($str, true);
3077 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3078 2
            if (in_array($test3char, $strChars, true) === true) {
3079 1
              $maybeUTF32LE++;
3080 1
            }
3081 2
          }
3082 2
        }
3083 2
      }
3084
3085 2
      $maybeUTF32BE = 0;
3086 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3087 2
      if ($test) {
3088 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3089 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3090 2
        if ($test3 === $test) {
3091 2
          $strChars = self::count_chars($str, true);
3092 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3093 2
            if (in_array($test3char, $strChars, true) === true) {
3094 1
              $maybeUTF32BE++;
3095 1
            }
3096 2
          }
3097 2
        }
3098 2
      }
3099
3100 2
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3101 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3102 1
          return 1;
3103
        }
3104
3105 1
        return 2;
3106
      }
3107
3108 2
    }
3109
3110 2
    return false;
3111
  }
3112
3113
  /**
3114
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3115
   *
3116
   * @see    http://hsivonen.iki.fi/php-utf8/
3117
   *
3118
   * @param string $str    <p>The string to be checked.</p>
3119
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3120
   *
3121
   * @return bool
3122
   */
3123 60
  public static function is_utf8($str, $strict = false)
3124
  {
3125 60
    $str = (string)$str;
3126
3127 60
    if (!isset($str[0])) {
3128 3
      return true;
3129
    }
3130
3131 58
    if ($strict === true) {
3132 1
      if (self::is_utf16($str) !== false) {
3133 1
        return false;
3134
      }
3135
3136
      if (self::is_utf32($str) !== false) {
3137
        return false;
3138
      }
3139
    }
3140
3141 58
    if (self::pcre_utf8_support() !== true) {
3142
3143
      // If even just the first character can be matched, when the /u
3144
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3145
      // invalid, nothing at all will match, even if the string contains
3146
      // some valid sequences
3147
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3148
    }
3149
3150 58
    $mState = 0; // cached expected number of octets after the current octet
3151
    // until the beginning of the next UTF8 character sequence
3152 58
    $mUcs4 = 0; // cached Unicode character
3153 58
    $mBytes = 1; // cached expected number of octets in the current sequence
3154
3155 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3156
      self::checkForSupport();
3157
    }
3158
3159 58 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3160
      $len = \mb_strlen($str, '8BIT');
3161
    } else {
3162 58
      $len = \strlen($str);
3163
    }
3164
3165 58
    if (self::$ORD === null) {
3166 1
      self::$ORD = self::getData('ord');
3167 1
    }
3168
3169
    /** @noinspection ForeachInvariantsInspection */
3170 58
    for ($i = 0; $i < $len; $i++) {
3171 58
      $in = self::$ORD[$str[$i]];
3172 58
      if ($mState === 0) {
3173
        // When mState is zero we expect either a US-ASCII character or a
3174
        // multi-octet sequence.
3175 58
        if (0 === (0x80 & $in)) {
3176
          // US-ASCII, pass straight through.
3177 53
          $mBytes = 1;
3178 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3179
          // First octet of 2 octet sequence.
3180 48
          $mUcs4 = $in;
3181 48
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3182 48
          $mState = 1;
3183 48
          $mBytes = 2;
3184 55
        } elseif (0xE0 === (0xF0 & $in)) {
3185
          // First octet of 3 octet sequence.
3186 30
          $mUcs4 = $in;
3187 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3188 30
          $mState = 2;
3189 30
          $mBytes = 3;
3190 46 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3191
          // First octet of 4 octet sequence.
3192 12
          $mUcs4 = $in;
3193 12
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3194 12
          $mState = 3;
3195 12
          $mBytes = 4;
3196 22
        } elseif (0xF8 === (0xFC & $in)) {
3197
          /* First octet of 5 octet sequence.
3198
          *
3199
          * This is illegal because the encoded codepoint must be either
3200
          * (a) not the shortest form or
3201
          * (b) outside the Unicode range of 0-0x10FFFF.
3202
          * Rather than trying to resynchronize, we will carry on until the end
3203
          * of the sequence and let the later error handling code catch it.
3204
          */
3205 4
          $mUcs4 = $in;
3206 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3207 4
          $mState = 4;
3208 4
          $mBytes = 5;
3209 11 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3210
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3211 4
          $mUcs4 = $in;
3212 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3213 4
          $mState = 5;
3214 4
          $mBytes = 6;
3215 4
        } else {
3216
          /* Current octet is neither in the US-ASCII range nor a legal first
3217
           * octet of a multi-octet sequence.
3218
           */
3219 5
          return false;
3220
        }
3221 58
      } else {
3222
        // When mState is non-zero, we expect a continuation of the multi-octet
3223
        // sequence
3224 53
        if (0x80 === (0xC0 & $in)) {
3225
          // Legal continuation.
3226 49
          $shift = ($mState - 1) * 6;
3227 49
          $tmp = $in;
3228 49
          $tmp = ($tmp & 0x0000003F) << $shift;
3229 49
          $mUcs4 |= $tmp;
3230
          /**
3231
           * End of the multi-octet sequence. mUcs4 now contains the final
3232
           * Unicode code point to be output
3233
           */
3234 49
          if (0 === --$mState) {
3235
            /*
3236
            * Check for illegal sequences and code points.
3237
            */
3238
            // From Unicode 3.1, non-shortest form is illegal
3239
            if (
3240 49
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3241 49
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3242 49
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3243 49
                (4 < $mBytes) ||
3244
                // From Unicode 3.2, surrogate characters are illegal.
3245 49
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3246
                // Code points outside the Unicode range are illegal.
3247 49
                ($mUcs4 > 0x10FFFF)
3248 49
            ) {
3249 7
              return false;
3250
            }
3251
            // initialize UTF8 cache
3252 49
            $mState = 0;
3253 49
            $mUcs4 = 0;
3254 49
            $mBytes = 1;
3255 49
          }
3256 49
        } else {
3257
          /**
3258
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3259
           * Incomplete multi-octet sequence.
3260
           */
3261 26
          return false;
3262
        }
3263
      }
3264 58
    }
3265
3266 28
    return true;
3267
  }
3268
3269
  /**
3270
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3271
   * Decodes a JSON string
3272
   *
3273
   * @link http://php.net/manual/en/function.json-decode.php
3274
   *
3275
   * @param string $json    <p>
3276
   *                        The <i>json</i> string being decoded.
3277
   *                        </p>
3278
   *                        <p>
3279
   *                        This function only works with UTF-8 encoded strings.
3280
   *                        </p>
3281
   *                        <p>PHP implements a superset of
3282
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3283
   *                        only supports these values when they are nested inside an array or an object.
3284
   *                        </p>
3285
   * @param bool   $assoc   [optional] <p>
3286
   *                        When <b>TRUE</b>, returned objects will be converted into
3287
   *                        associative arrays.
3288
   *                        </p>
3289
   * @param int    $depth   [optional] <p>
3290
   *                        User specified recursion depth.
3291
   *                        </p>
3292
   * @param int    $options [optional] <p>
3293
   *                        Bitmask of JSON decode options. Currently only
3294
   *                        <b>JSON_BIGINT_AS_STRING</b>
3295
   *                        is supported (default is to cast large integers as floats)
3296
   *                        </p>
3297
   *
3298
   * @return mixed the value encoded in <i>json</i> in appropriate
3299
   * PHP type. Values true, false and
3300
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3301
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3302
   * <i>json</i> cannot be decoded or if the encoded
3303
   * data is deeper than the recursion limit.
3304
   */
3305 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3306
  {
3307 2
    $json = (string)self::filter($json);
3308
3309 2
    if (Bootup::is_php('5.4') === true) {
3310 2
      $json = json_decode($json, $assoc, $depth, $options);
3311 2
    } else {
3312
      $json = json_decode($json, $assoc, $depth);
3313
    }
3314
3315 2
    return $json;
3316
  }
3317
3318
  /**
3319
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3320
   * Returns the JSON representation of a value.
3321
   *
3322
   * @link http://php.net/manual/en/function.json-encode.php
3323
   *
3324
   * @param mixed $value   <p>
3325
   *                       The <i>value</i> being encoded. Can be any type except
3326
   *                       a resource.
3327
   *                       </p>
3328
   *                       <p>
3329
   *                       All string data must be UTF-8 encoded.
3330
   *                       </p>
3331
   *                       <p>PHP implements a superset of
3332
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3333
   *                       only supports these values when they are nested inside an array or an object.
3334
   *                       </p>
3335
   * @param int   $options [optional] <p>
3336
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3337
   *                       <b>JSON_HEX_TAG</b>,
3338
   *                       <b>JSON_HEX_AMP</b>,
3339
   *                       <b>JSON_HEX_APOS</b>,
3340
   *                       <b>JSON_NUMERIC_CHECK</b>,
3341
   *                       <b>JSON_PRETTY_PRINT</b>,
3342
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3343
   *                       <b>JSON_FORCE_OBJECT</b>,
3344
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3345
   *                       constants is described on
3346
   *                       the JSON constants page.
3347
   *                       </p>
3348
   * @param int   $depth   [optional] <p>
3349
   *                       Set the maximum depth. Must be greater than zero.
3350
   *                       </p>
3351
   *
3352
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3353
   */
3354 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3355
  {
3356 2
    $value = self::filter($value);
3357
3358 2
    if (Bootup::is_php('5.5') === true) {
3359
      $json = json_encode($value, $options, $depth);
3360
    } else {
3361 2
      $json = json_encode($value, $options);
3362
    }
3363
3364 2
    return $json;
3365
  }
3366
3367
  /**
3368
   * Makes string's first char lowercase.
3369
   *
3370
   * @param string $str <p>The input string</p>
3371
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3372
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3373
   *
3374
   * @return string <p>The resulting string</p>
3375
   */
3376 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3377
  {
3378 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3379 7
    if ($strPartTwo === false) {
3380
      $strPartTwo = '';
3381
    }
3382
3383 7
    $strPartOne = self::strtolower(
3384 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3385 7
        $encoding,
3386
        $cleanUtf8
3387 7
    );
3388
3389 7
    return $strPartOne . $strPartTwo;
3390
  }
3391
3392
  /**
3393
   * alias for "UTF8::lcfirst()"
3394
   *
3395
   * @see UTF8::lcfirst()
3396
   *
3397
   * @param string  $word
3398
   * @param string  $encoding
3399
   * @param boolean $cleanUtf8
3400
   *
3401
   * @return string
3402
   */
3403 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3404
  {
3405 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3406
  }
3407
3408
  /**
3409
   * Lowercase for all words in the string.
3410
   *
3411
   * @param string   $str        <p>The input string.</p>
3412
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3413
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3414
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3415
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3416
   *
3417
   * @return string
3418
   */
3419 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3420
  {
3421 1
    if (!$str) {
3422 1
      return '';
3423
    }
3424
3425 1
    $words = self::str_to_words($str, $charlist);
3426 1
    $newWords = array();
3427
3428 1
    if (count($exceptions) > 0) {
3429 1
      $useExceptions = true;
3430 1
    } else {
3431 1
      $useExceptions = false;
3432
    }
3433
3434 1 View Code Duplication
    foreach ($words as $word) {
3435
3436 1
      if (!$word) {
3437 1
        continue;
3438
      }
3439
3440
      if (
3441
          $useExceptions === false
3442 1
          ||
3443
          (
3444
              $useExceptions === true
3445 1
              &&
3446 1
              !in_array($word, $exceptions, true)
3447 1
          )
3448 1
      ) {
3449 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3450 1
      }
3451
3452 1
      $newWords[] = $word;
3453 1
    }
3454
3455 1
    return implode('', $newWords);
3456
  }
3457
3458
  /**
3459
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3460
   *
3461
   * @param string $str   <p>The string to be trimmed</p>
3462
   * @param string $chars <p>Optional characters to be stripped</p>
3463
   *
3464
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3465
   */
3466 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3467
  {
3468 24
    $str = (string)$str;
3469
3470 24
    if (!isset($str[0])) {
3471 2
      return '';
3472
    }
3473
3474
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3475 23
    if ($chars === INF || !$chars) {
3476 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3477
    }
3478
3479 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3480
  }
3481
3482
  /**
3483
   * Returns the UTF-8 character with the maximum code point in the given data.
3484
   *
3485
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3486
   *
3487
   * @return string <p>The character with the highest code point than others.</p>
3488
   */
3489 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3490
  {
3491 1
    if (is_array($arg) === true) {
3492 1
      $arg = implode('', $arg);
3493 1
    }
3494
3495 1
    return self::chr(max(self::codepoints($arg)));
3496
  }
3497
3498
  /**
3499
   * Calculates and returns the maximum number of bytes taken by any
3500
   * UTF-8 encoded character in the given string.
3501
   *
3502
   * @param string $str <p>The original Unicode string.</p>
3503
   *
3504
   * @return int <p>Max byte lengths of the given chars.</p>
3505
   */
3506 1
  public static function max_chr_width($str)
3507
  {
3508 1
    $bytes = self::chr_size_list($str);
3509 1
    if (count($bytes) > 0) {
3510 1
      return (int)max($bytes);
3511
    }
3512
3513 1
    return 0;
3514
  }
3515
3516
  /**
3517
   * Checks whether mbstring is available on the server.
3518
   *
3519
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3520
   */
3521 12
  public static function mbstring_loaded()
3522
  {
3523 12
    $return = extension_loaded('mbstring') ? true : false;
3524
3525 12
    if ($return === true) {
3526 12
      \mb_internal_encoding('UTF-8');
3527 12
    }
3528
3529 12
    return $return;
3530
  }
3531
3532 1
  private static function mbstring_overloaded()
3533
  {
3534
    if (
3535 1
        defined('MB_OVERLOAD_STRING')
3536 1
        &&
3537 1
        ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
3538 1
    ) {
3539
      return true;
3540
    }
3541
3542 1
    return false;
3543
  }
3544
3545
  /**
3546
   * Returns the UTF-8 character with the minimum code point in the given data.
3547
   *
3548
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3549
   *
3550
   * @return string <p>The character with the lowest code point than others.</p>
3551
   */
3552 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3553
  {
3554 1
    if (is_array($arg) === true) {
3555 1
      $arg = implode('', $arg);
3556 1
    }
3557
3558 1
    return self::chr(min(self::codepoints($arg)));
3559
  }
3560
3561
  /**
3562
   * alias for "UTF8::normalize_encoding()"
3563
   *
3564
   * @see UTF8::normalize_encoding()
3565
   *
3566
   * @param string $encoding
3567
   * @param mixed  $fallback
3568
   *
3569
   * @return string
3570
   *
3571
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3572
   */
3573
  public static function normalizeEncoding($encoding, $fallback = false)
3574
  {
3575
    return self::normalize_encoding($encoding, $fallback);
3576
  }
3577
3578
  /**
3579
   * Normalize the encoding-"name" input.
3580
   *
3581
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3582
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3583
   *
3584
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3585
   */
3586 80
  public static function normalize_encoding($encoding, $fallback = false)
3587
  {
3588 80
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3589
3590 80
    if (!$encoding) {
3591 3
      return $fallback;
3592
    }
3593
3594 79
    if ('UTF-8' === $encoding) {
3595 1
      return $encoding;
3596
    }
3597
3598 79
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3599 7
      return $encoding;
3600
    }
3601
3602 78
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3603 77
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3604
    }
3605
3606 5
    $encodingOrig = $encoding;
3607 5
    $encoding = strtoupper($encoding);
3608 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3609
3610
    $equivalences = array(
3611 5
        'ISO8859'     => 'ISO-8859-1',
3612 5
        'ISO88591'    => 'ISO-8859-1',
3613 5
        'ISO'         => 'ISO-8859-1',
3614 5
        'LATIN'       => 'ISO-8859-1',
3615 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3616 5
        'ISO88592'    => 'ISO-8859-2',
3617 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3618 5
        'ISO88593'    => 'ISO-8859-3',
3619 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3620 5
        'ISO88594'    => 'ISO-8859-4',
3621 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3622 5
        'ISO88595'    => 'ISO-8859-5',
3623 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3624 5
        'ISO88597'    => 'ISO-8859-7',
3625 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3626 5
        'ISO88599'    => 'ISO-8859-9',
3627 5
        'LATIN5'      => 'ISO-8859-9', // Turkish
3628 5
        'ISO885911'   => 'ISO-8859-11',
3629 5
        'TIS620'      => 'ISO-8859-11', // Thai
3630 5
        'ISO885910'   => 'ISO-8859-10',
3631 5
        'LATIN6'      => 'ISO-8859-10', // Nordic
3632 5
        'ISO885913'   => 'ISO-8859-13',
3633 5
        'LATIN7'      => 'ISO-8859-13', // Baltic
3634 5
        'ISO885914'   => 'ISO-8859-14',
3635 5
        'LATIN8'      => 'ISO-8859-14', // Celtic
3636 5
        'ISO885915'   => 'ISO-8859-15',
3637 5
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3638 5
        'ISO885916'   => 'ISO-8859-16',
3639 5
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3640 5
        'CP1250'      => 'WINDOWS-1250',
3641 5
        'WIN1250'     => 'WINDOWS-1250',
3642 5
        'WINDOWS1250' => 'WINDOWS-1250',
3643 5
        'CP1251'      => 'WINDOWS-1251',
3644 5
        'WIN1251'     => 'WINDOWS-1251',
3645 5
        'WINDOWS1251' => 'WINDOWS-1251',
3646 5
        'CP1252'      => 'WINDOWS-1252',
3647 5
        'WIN1252'     => 'WINDOWS-1252',
3648 5
        'WINDOWS1252' => 'WINDOWS-1252',
3649 5
        'CP1253'      => 'WINDOWS-1253',
3650 5
        'WIN1253'     => 'WINDOWS-1253',
3651 5
        'WINDOWS1253' => 'WINDOWS-1253',
3652 5
        'CP1254'      => 'WINDOWS-1254',
3653 5
        'WIN1254'     => 'WINDOWS-1254',
3654 5
        'WINDOWS1254' => 'WINDOWS-1254',
3655 5
        'CP1255'      => 'WINDOWS-1255',
3656 5
        'WIN1255'     => 'WINDOWS-1255',
3657 5
        'WINDOWS1255' => 'WINDOWS-1255',
3658 5
        'CP1256'      => 'WINDOWS-1256',
3659 5
        'WIN1256'     => 'WINDOWS-1256',
3660 5
        'WINDOWS1256' => 'WINDOWS-1256',
3661 5
        'CP1257'      => 'WINDOWS-1257',
3662 5
        'WIN1257'     => 'WINDOWS-1257',
3663 5
        'WINDOWS1257' => 'WINDOWS-1257',
3664 5
        'CP1258'      => 'WINDOWS-1258',
3665 5
        'WIN1258'     => 'WINDOWS-1258',
3666 5
        'WINDOWS1258' => 'WINDOWS-1258',
3667 5
        'UTF16'       => 'UTF-16',
3668 5
        'UTF32'       => 'UTF-32',
3669 5
        'UTF8'        => 'UTF-8',
3670 5
        'UTF'         => 'UTF-8',
3671 5
        'UTF7'        => 'UTF-7',
3672 5
        '8BIT'        => 'CP850',
3673 5
        'BINARY'      => 'CP850',
3674 5
    );
3675
3676 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3677 5
      $encoding = $equivalences[$encodingUpperHelper];
3678 5
    }
3679
3680 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3681
3682 5
    return $encoding;
3683
  }
3684
3685
  /**
3686
   * Normalize some MS Word special characters.
3687
   *
3688
   * @param string $str <p>The string to be normalized.</p>
3689
   *
3690
   * @return string
3691
   */
3692 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3693
  {
3694 16
    $str = (string)$str;
3695
3696 16
    if (!isset($str[0])) {
3697 1
      return '';
3698
    }
3699
3700 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3701 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3702
3703 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3704 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3705 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3706 1
    }
3707
3708 16
    return \str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3709
  }
3710
3711
  /**
3712
   * Normalize the whitespace.
3713
   *
3714
   * @param string $str                     <p>The string to be normalized.</p>
3715
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3716
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3717
   *                                        bidirectional text chars.</p>
3718
   *
3719
   * @return string
3720
   */
3721 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3722
  {
3723 37
    $str = (string)$str;
3724
3725 37
    if (!isset($str[0])) {
3726 4
      return '';
3727
    }
3728
3729 37
    static $WHITESPACE_CACHE = array();
3730 37
    $cacheKey = (int)$keepNonBreakingSpace;
3731
3732 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3733
3734 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3735
3736 2
      if ($keepNonBreakingSpace === true) {
3737
        /** @noinspection OffsetOperationsInspection */
3738 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3739 1
      }
3740
3741 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3742 2
    }
3743
3744 37
    if ($keepBidiUnicodeControls === false) {
3745 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3746
3747 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3748 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3749 1
      }
3750
3751 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3752 37
    }
3753
3754 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3755
  }
3756
3757
  /**
3758
   * Strip all whitespace characters. This includes tabs and newline
3759
   * characters, as well as multibyte whitespace such as the thin space
3760
   * and ideographic space.
3761
   *
3762
   * @param string $str
3763
   *
3764
   * @return string
3765
   */
3766 12
  public static function strip_whitespace($str)
3767
  {
3768 12
    $str = (string)$str;
3769
3770 12
    if (!isset($str[0])) {
3771 1
      return '';
3772
    }
3773
3774 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3775
  }
3776
3777
  /**
3778
   * Format a number with grouped thousands.
3779
   *
3780
   * @param float  $number
3781
   * @param int    $decimals
3782
   * @param string $dec_point
3783
   * @param string $thousands_sep
3784
   *
3785
   * @return string
3786
   *
3787
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3788
   */
3789
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3790
  {
3791
    $thousands_sep = (string)$thousands_sep;
3792
    $dec_point = (string)$dec_point;
3793
    $number = (float)$number;
3794
3795
    if (
3796
        isset($thousands_sep[1], $dec_point[1])
3797
        &&
3798
        Bootup::is_php('5.4') === true
3799
    ) {
3800
      return str_replace(
3801
          array(
3802
              '.',
3803
              ',',
3804
          ),
3805
          array(
3806
              $dec_point,
3807
              $thousands_sep,
3808
          ),
3809
          number_format($number, $decimals, '.', ',')
3810
      );
3811
    }
3812
3813
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3814
  }
3815
3816
  /**
3817
   * Calculates Unicode code point of the given UTF-8 encoded character.
3818
   *
3819
   * INFO: opposite to UTF8::chr()
3820
   *
3821
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3822
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3823
   *
3824
   * @return int <p>
3825
   *             Unicode code point of the given character,<br>
3826
   *             0 on invalid UTF-8 byte sequence.
3827
   *             </p>
3828
   */
3829 23
  public static function ord($chr, $encoding = 'UTF-8')
3830
  {
3831
    // init
3832 23
    static $CHAR_CACHE = array();
3833 23
    $encoding = (string)$encoding;
3834
3835
    // save the original string
3836 23
    $chr_orig = $chr;
3837
3838 23
    if ($encoding !== 'UTF-8') {
3839 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3840
3841
      // check again, if it's still not UTF-8
3842
      /** @noinspection NotOptimalIfConditionsInspection */
3843 2
      if ($encoding !== 'UTF-8') {
3844 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3845 2
      }
3846 2
    }
3847
3848 23
    $cacheKey = $chr_orig . $encoding;
3849 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3850 23
      return $CHAR_CACHE[$cacheKey];
3851
    }
3852
3853 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3854
      self::checkForSupport();
3855
    }
3856
3857 11
    if (self::$SUPPORT['intlChar'] === true) {
3858
      $code = \IntlChar::ord($chr);
3859
      if ($code) {
3860
        return $CHAR_CACHE[$cacheKey] = $code;
3861
      }
3862
    }
3863
3864
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3865 11
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3866 11
    $code = $chr ? $chr[1] : 0;
3867
3868 11
    if (0xF0 <= $code && isset($chr[4])) {
3869 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3870
    }
3871
3872 11
    if (0xE0 <= $code && isset($chr[3])) {
3873 4
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3874
    }
3875
3876 11
    if (0xC0 <= $code && isset($chr[2])) {
3877 7
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3878
    }
3879
3880 10
    return $CHAR_CACHE[$cacheKey] = $code;
3881
  }
3882
3883
  /**
3884
   * Parses the string into an array (into the the second parameter).
3885
   *
3886
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3887
   *          if the second parameter is not set!
3888
   *
3889
   * @link http://php.net/manual/en/function.parse-str.php
3890
   *
3891
   * @param string  $str       <p>The input string.</p>
3892
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3893
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3894
   *
3895
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3896
   */
3897 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3898
  {
3899 1
    if ($cleanUtf8 === true) {
3900 1
      $str = self::clean($str);
3901 1
    }
3902
3903
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3904 1
    $return = \mb_parse_str($str, $result);
3905 1
    if ($return === false || empty($result)) {
3906 1
      return false;
3907
    }
3908
3909 1
    return true;
3910
  }
3911
3912
  /**
3913
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3914
   *
3915
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3916
   */
3917 58
  public static function pcre_utf8_support()
3918
  {
3919
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3920 58
    return (bool)@preg_match('//u', '');
3921
  }
3922
3923
  /**
3924
   * Create an array containing a range of UTF-8 characters.
3925
   *
3926
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3927
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3928
   *
3929
   * @return array
3930
   */
3931 1
  public static function range($var1, $var2)
3932
  {
3933 1
    if (!$var1 || !$var2) {
3934 1
      return array();
3935
    }
3936
3937 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3938 1
      $start = (int)$var1;
3939 1
    } elseif (ctype_xdigit($var1)) {
3940
      $start = (int)self::hex_to_int($var1);
3941
    } else {
3942 1
      $start = self::ord($var1);
3943
    }
3944
3945 1
    if (!$start) {
3946
      return array();
3947
    }
3948
3949 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3950 1
      $end = (int)$var2;
3951 1
    } elseif (ctype_xdigit($var2)) {
3952
      $end = (int)self::hex_to_int($var2);
3953
    } else {
3954 1
      $end = self::ord($var2);
3955
    }
3956
3957 1
    if (!$end) {
3958
      return array();
3959
    }
3960
3961 1
    return array_map(
3962
        array(
3963 1
            '\\voku\\helper\\UTF8',
3964 1
            'chr',
3965 1
        ),
3966 1
        range($start, $end)
3967 1
    );
3968
  }
3969
3970
  /**
3971
   * Multi decode html entity & fix urlencoded-win1252-chars.
3972
   *
3973
   * e.g:
3974
   * 'test+test'                     => 'test+test'
3975
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3976
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3977
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3978
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3979
   * 'Düsseldorf'                   => 'Düsseldorf'
3980
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3981
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3982
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3983
   *
3984
   * @param string $str          <p>The input string.</p>
3985
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3986
   *
3987
   * @return string
3988
   */
3989 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3990
  {
3991 2
    $str = (string)$str;
3992
3993 2
    if (!isset($str[0])) {
3994 1
      return '';
3995
    }
3996
3997 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3998 2
    if (preg_match($pattern, $str)) {
3999 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
4000 1
    }
4001
4002 2
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
4003
4004
    do {
4005 2
      $str_compare = $str;
4006
4007 2
      $str = self::fix_simple_utf8(
4008 2
          rawurldecode(
4009 2
              self::html_entity_decode(
4010 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
4011
                  $flags
4012 2
              )
4013 2
          )
4014 2
      );
4015
4016 2
    } while ($multi_decode === true && $str_compare !== $str);
4017
4018 2
    return (string)$str;
4019
  }
4020
4021
  /**
4022
   * alias for "UTF8::remove_bom()"
4023
   *
4024
   * @see UTF8::remove_bom()
4025
   *
4026
   * @param string $str
4027
   *
4028
   * @return string
4029
   *
4030
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4031
   */
4032
  public static function removeBOM($str)
4033
  {
4034
    return self::remove_bom($str);
4035
  }
4036
4037
  /**
4038
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4039
   *
4040
   * @param string $str <p>The input string.</p>
4041
   *
4042
   * @return string <p>String without UTF-BOM</p>
4043
   */
4044 40
  public static function remove_bom($str)
4045
  {
4046 40
    $str = (string)$str;
4047
4048 40
    if (!isset($str[0])) {
4049 5
      return '';
4050
    }
4051
4052 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
4053 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4054 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4055 5
        if ($strTmp === false) {
4056
          $strTmp = '';
4057
        }
4058 5
        $str = (string)$strTmp;
4059 5
      }
4060 40
    }
4061
4062 40
    return $str;
4063
  }
4064
4065
  /**
4066
   * Removes duplicate occurrences of a string in another string.
4067
   *
4068
   * @param string          $str  <p>The base string.</p>
4069
   * @param string|string[] $what <p>String to search for in the base string.</p>
4070
   *
4071
   * @return string <p>The result string with removed duplicates.</p>
4072
   */
4073 1
  public static function remove_duplicates($str, $what = ' ')
4074
  {
4075 1
    if (is_string($what) === true) {
4076 1
      $what = array($what);
4077 1
    }
4078
4079 1
    if (is_array($what) === true) {
4080
      /** @noinspection ForeachSourceInspection */
4081 1
      foreach ($what as $item) {
4082 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4083 1
      }
4084 1
    }
4085
4086 1
    return $str;
4087
  }
4088
4089
  /**
4090
   * Remove invisible characters from a string.
4091
   *
4092
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4093
   *
4094
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4095
   *
4096
   * @param string $str
4097
   * @param bool   $url_encoded
4098
   * @param string $replacement
4099
   *
4100
   * @return string
4101
   */
4102 62
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4103
  {
4104
    // init
4105 62
    $non_displayables = array();
4106
4107
    // every control character except newline (dec 10),
4108
    // carriage return (dec 13) and horizontal tab (dec 09)
4109 62
    if ($url_encoded) {
4110 62
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4111 62
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4112 62
    }
4113
4114 62
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4115
4116
    do {
4117 62
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4118 62
    } while ($count !== 0);
4119
4120 62
    return $str;
4121
  }
4122
4123
  /**
4124
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4125
   *
4126
   * @param string $str                <p>The input string</p>
4127
   * @param string $replacementChar    <p>The replacement character.</p>
4128
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4129
   *
4130
   * @return string
4131
   */
4132 62
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4133
  {
4134 62
    $str = (string)$str;
4135
4136 62
    if (!isset($str[0])) {
4137 4
      return '';
4138
    }
4139
4140 62
    if ($processInvalidUtf8 === true) {
4141 62
      $replacementCharHelper = $replacementChar;
4142 62
      if ($replacementChar === '') {
4143 62
        $replacementCharHelper = 'none';
4144 62
      }
4145
4146 62
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4147
        self::checkForSupport();
4148
      }
4149
4150 62
      $save = \mb_substitute_character();
4151 62
      \mb_substitute_character($replacementCharHelper);
4152 62
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4153 62
      \mb_substitute_character($save);
4154 62
    }
4155
4156 62
    return str_replace(
4157
        array(
4158 62
            "\xEF\xBF\xBD",
4159 62
            '�',
4160 62
        ),
4161
        array(
4162 62
            $replacementChar,
4163 62
            $replacementChar,
4164 62
        ),
4165
        $str
4166 62
    );
4167
  }
4168
4169
  /**
4170
   * Strip whitespace or other characters from end of a UTF-8 string.
4171
   *
4172
   * @param string $str   <p>The string to be trimmed.</p>
4173
   * @param string $chars <p>Optional characters to be stripped.</p>
4174
   *
4175
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4176
   */
4177 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4178
  {
4179 23
    $str = (string)$str;
4180
4181 23
    if (!isset($str[0])) {
4182 5
      return '';
4183
    }
4184
4185
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4186 19
    if ($chars === INF || !$chars) {
4187 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4188
    }
4189
4190 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4191
  }
4192
4193
  /**
4194
   * rxClass
4195
   *
4196
   * @param string $s
4197
   * @param string $class
4198
   *
4199
   * @return string
4200
   */
4201 60
  private static function rxClass($s, $class = '')
4202
  {
4203 60
    static $RX_CLASSS_CACHE = array();
4204
4205 60
    $cacheKey = $s . $class;
4206
4207 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4208 48
      return $RX_CLASSS_CACHE[$cacheKey];
4209
    }
4210
4211
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4212 20
    $class = array($class);
4213
4214
    /** @noinspection SuspiciousLoopInspection */
4215 20
    foreach (self::str_split($s) as $s) {
4216 19
      if ('-' === $s) {
4217
        $class[0] = '-' . $class[0];
4218 19
      } elseif (!isset($s[2])) {
4219 19
        $class[0] .= preg_quote($s, '/');
4220 19
      } elseif (1 === self::strlen($s)) {
4221 2
        $class[0] .= $s;
4222 2
      } else {
4223
        $class[] = $s;
4224
      }
4225 20
    }
4226
4227 20
    if ($class[0]) {
4228 20
      $class[0] = '[' . $class[0] . ']';
4229 20
    }
4230
4231 20
    if (1 === count($class)) {
4232 20
      $return = $class[0];
4233 20
    } else {
4234
      $return = '(?:' . implode('|', $class) . ')';
4235
    }
4236
4237 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4238
4239 20
    return $return;
4240
  }
4241
4242
  /**
4243
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4244
   */
4245 1
  public static function showSupport()
4246
  {
4247 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4248
      self::checkForSupport();
4249
    }
4250
4251 1
    echo '<pre>';
4252 1
    foreach (self::$SUPPORT as $key => $value) {
4253 1
      echo $key . ' - ' . print_r($value, true) . "\n<br>";
4254 1
    }
4255 1
    echo '</pre>';
4256 1
  }
4257
4258
  /**
4259
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4260
   *
4261
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4262
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4263
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4264
   *
4265
   * @return string <p>The HTML numbered entity.</p>
4266
   */
4267 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4268
  {
4269 1
    $char = (string)$char;
4270
4271 1
    if (!isset($char[0])) {
4272 1
      return '';
4273
    }
4274
4275
    if (
4276
        $keepAsciiChars === true
4277 1
        &&
4278 1
        self::is_ascii($char) === true
4279 1
    ) {
4280 1
      return $char;
4281
    }
4282
4283 1
    if ($encoding !== 'UTF-8') {
4284 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4285 1
    }
4286
4287 1
    return '&#' . self::ord($char, $encoding) . ';';
4288
  }
4289
4290
  /**
4291
   * Convert a string to an array of Unicode characters.
4292
   *
4293
   * @param string  $str       <p>The string to split into array.</p>
4294
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4295
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4296
   *
4297
   * @return string[] <p>An array containing chunks of the string.</p>
4298
   */
4299 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4300
  {
4301 39
    $str = (string)$str;
4302
4303 39
    if (!isset($str[0])) {
4304 3
      return array();
4305
    }
4306
4307
    // init
4308 38
    $ret = array();
4309
4310 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4311
      self::checkForSupport();
4312
    }
4313
4314 38
    if ($cleanUtf8 === true) {
4315 7
      $str = self::clean($str);
4316 7
    }
4317
4318 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4319
4320 38
      preg_match_all('/./us', $str, $retArray);
4321 38
      if (isset($retArray[0])) {
4322 38
        $ret = $retArray[0];
4323 38
      }
4324 38
      unset($retArray);
4325
4326 38
    } else {
4327
4328
      // fallback
4329
4330 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4331
        self::checkForSupport();
4332
      }
4333
4334 2 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4335
        $len = \mb_strlen($str, '8BIT');
4336
      } else {
4337 2
        $len = strlen($str);
4338
      }
4339
4340
      /** @noinspection ForeachInvariantsInspection */
4341 2
      for ($i = 0; $i < $len; $i++) {
4342
4343 2
        if (($str[$i] & "\x80") === "\x00") {
4344
4345 2
          $ret[] = $str[$i];
4346
4347 2
        } elseif (
4348 2
            isset($str[$i + 1])
4349 2
            &&
4350 2
            ($str[$i] & "\xE0") === "\xC0"
4351 2
        ) {
4352
4353
          if (($str[$i + 1] & "\xC0") === "\x80") {
4354
            $ret[] = $str[$i] . $str[$i + 1];
4355
4356
            $i++;
4357
          }
4358
4359 View Code Duplication
        } elseif (
4360 2
            isset($str[$i + 2])
4361 2
            &&
4362 2
            ($str[$i] & "\xF0") === "\xE0"
4363 2
        ) {
4364
4365
          if (
4366 2
              ($str[$i + 1] & "\xC0") === "\x80"
4367 2
              &&
4368 2
              ($str[$i + 2] & "\xC0") === "\x80"
4369 2
          ) {
4370 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4371
4372 2
            $i += 2;
4373 2
          }
4374
4375 2
        } elseif (
4376
            isset($str[$i + 3])
4377
            &&
4378
            ($str[$i] & "\xF8") === "\xF0"
4379
        ) {
4380
4381 View Code Duplication
          if (
4382
              ($str[$i + 1] & "\xC0") === "\x80"
4383
              &&
4384
              ($str[$i + 2] & "\xC0") === "\x80"
4385
              &&
4386
              ($str[$i + 3] & "\xC0") === "\x80"
4387
          ) {
4388
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4389
4390
            $i += 3;
4391
          }
4392
4393
        }
4394 2
      }
4395
    }
4396
4397 38
    if ($length > 1) {
4398 5
      $ret = array_chunk($ret, $length);
4399
4400 5
      return array_map(
4401
          function ($item) {
4402 5
            return implode('', $item);
4403 5
          }, $ret
4404 5
      );
4405
    }
4406
4407 34
    if (isset($ret[0]) && $ret[0] === '') {
4408
      return array();
4409
    }
4410
4411 34
    return $ret;
4412
  }
4413
4414
  /**
4415
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4416
   *
4417
   * @param string $str <p>The input string.</p>
4418
   *
4419
   * @return false|string <p>
4420
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4421
   *                      otherwise it will return false.
4422
   *                      </p>
4423
   */
4424 13
  public static function str_detect_encoding($str)
4425
  {
4426
    //
4427
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4428
    //
4429
4430 13
    if (self::is_binary($str) === true) {
4431
4432 3
      if (self::is_utf16($str) === 1) {
4433 1
        return 'UTF-16LE';
4434
      }
4435
4436 3
      if (self::is_utf16($str) === 2) {
4437 2
        return 'UTF-16BE';
4438
      }
4439
4440 1
      if (self::is_utf32($str) === 1) {
4441
        return 'UTF-32LE';
4442
      }
4443
4444 1
      if (self::is_utf32($str) === 2) {
4445
        return 'UTF-32BE';
4446
      }
4447
4448 1
    }
4449
4450
    //
4451
    // 2.) simple check for ASCII chars
4452
    //
4453
4454 12
    if (self::is_ascii($str) === true) {
4455 4
      return 'ASCII';
4456
    }
4457
4458
    //
4459
    // 3.) simple check for UTF-8 chars
4460
    //
4461
4462 12
    if (self::is_utf8($str) === true) {
4463 10
      return 'UTF-8';
4464
    }
4465
4466
    //
4467
    // 4.) check via "\mb_detect_encoding()"
4468
    //
4469
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4470
4471
    $detectOrder = array(
4472 6
        'ISO-8859-1',
4473 6
        'ISO-8859-2',
4474 6
        'ISO-8859-3',
4475 6
        'ISO-8859-4',
4476 6
        'ISO-8859-5',
4477 6
        'ISO-8859-6',
4478 6
        'ISO-8859-7',
4479 6
        'ISO-8859-8',
4480 6
        'ISO-8859-9',
4481 6
        'ISO-8859-10',
4482 6
        'ISO-8859-13',
4483 6
        'ISO-8859-14',
4484 6
        'ISO-8859-15',
4485 6
        'ISO-8859-16',
4486 6
        'WINDOWS-1251',
4487 6
        'WINDOWS-1252',
4488 6
        'WINDOWS-1254',
4489 6
        'ISO-2022-JP',
4490 6
        'JIS',
4491 6
        'EUC-JP',
4492 6
    );
4493
4494 6
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4495 6
    if ($encoding) {
4496 6
      return $encoding;
4497
    }
4498
4499
    //
4500
    // 5.) check via "iconv()"
4501
    //
4502
4503
    $md5 = md5($str);
4504
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4505
      # INFO: //IGNORE and //TRANSLIT still throw notice
4506
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4507
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4508
        return $encodingTmp;
4509
      }
4510
    }
4511
4512
    return false;
4513
  }
4514
4515
  /**
4516
   * Check if the string ends with the given substring.
4517
   *
4518
   * @param string $haystack <p>The string to search in.</p>
4519
   * @param string $needle   <p>The substring to search for.</p>
4520
   *
4521
   * @return bool
4522
   */
4523 2
  public static function str_ends_with($haystack, $needle)
4524
  {
4525 2
    $haystack = (string)$haystack;
4526 2
    $needle = (string)$needle;
4527
4528 2
    if (!isset($haystack[0], $needle[0])) {
4529 1
      return false;
4530
    }
4531
4532 2
    if (substr($haystack, -strlen($needle)) === $needle) {
4533 2
      return true;
4534
    }
4535
4536 2
    return false;
4537
  }
4538
4539
  /**
4540
   * Check if the string ends with the given substring, case insensitive.
4541
   *
4542
   * @param string $haystack <p>The string to search in.</p>
4543
   * @param string $needle   <p>The substring to search for.</p>
4544
   *
4545
   * @return bool
4546
   */
4547 2
  public static function str_iends_with($haystack, $needle)
4548
  {
4549 2
    $haystack = (string)$haystack;
4550 2
    $needle = (string)$needle;
4551
4552 2
    if (!isset($haystack[0], $needle[0])) {
4553 1
      return false;
4554
    }
4555
4556 2
    if (self::strcasecmp(substr($haystack, -strlen($needle)), $needle) === 0) {
4557 2
      return true;
4558
    }
4559
4560 2
    return false;
4561
  }
4562
4563
  /**
4564
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4565
   *
4566
   * @link  http://php.net/manual/en/function.str-ireplace.php
4567
   *
4568
   * @param mixed $search  <p>
4569
   *                       Every replacement with search array is
4570
   *                       performed on the result of previous replacement.
4571
   *                       </p>
4572
   * @param mixed $replace <p>
4573
   *                       </p>
4574
   * @param mixed $subject <p>
4575
   *                       If subject is an array, then the search and
4576
   *                       replace is performed with every entry of
4577
   *                       subject, and the return value is an array as
4578
   *                       well.
4579
   *                       </p>
4580
   * @param int   $count   [optional] <p>
4581
   *                       The number of matched and replaced needles will
4582
   *                       be returned in count which is passed by
4583
   *                       reference.
4584
   *                       </p>
4585
   *
4586
   * @return mixed <p>A string or an array of replacements.</p>
4587
   */
4588 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4589
  {
4590 26
    $search = (array)$search;
4591
4592
    /** @noinspection AlterInForeachInspection */
4593 26
    foreach ($search as &$s) {
4594 26
      if ('' === $s .= '') {
4595 2
        $s = '/^(?<=.)$/';
4596 2
      } else {
4597 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4598
      }
4599 26
    }
4600
4601 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4602 26
    $count = $replace; // used as reference parameter
4603
4604 26
    return $subject;
4605
  }
4606
4607
  /**
4608
   * Check if the string starts with the given substring, case insensitive.
4609
   *
4610
   * @param string $haystack <p>The string to search in.</p>
4611
   * @param string $needle   <p>The substring to search for.</p>
4612
   *
4613
   * @return bool
4614
   */
4615 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4616
  {
4617 2
    $haystack = (string)$haystack;
4618 2
    $needle = (string)$needle;
4619
4620 2
    if (!isset($haystack[0], $needle[0])) {
4621 1
      return false;
4622
    }
4623
4624 2
    if (self::stripos($haystack, $needle) === 0) {
4625 2
      return true;
4626
    }
4627
4628 2
    return false;
4629
  }
4630
4631
  /**
4632
   * Limit the number of characters in a string, but also after the next word.
4633
   *
4634
   * @param string $str
4635
   * @param int    $length
4636
   * @param string $strAddOn
4637
   *
4638
   * @return string
4639
   */
4640 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4641
  {
4642 1
    $str = (string)$str;
4643
4644 1
    if (!isset($str[0])) {
4645 1
      return '';
4646
    }
4647
4648 1
    $length = (int)$length;
4649
4650 1
    if (self::strlen($str) <= $length) {
4651 1
      return $str;
4652
    }
4653
4654 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4655 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4656
    }
4657
4658 1
    $str = (string)self::substr($str, 0, $length);
4659 1
    $array = explode(' ', $str);
4660 1
    array_pop($array);
4661 1
    $new_str = implode(' ', $array);
4662
4663 1
    if ($new_str === '') {
4664 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4665 1
    } else {
4666 1
      $str = $new_str . $strAddOn;
4667
    }
4668
4669 1
    return $str;
4670
  }
4671
4672
  /**
4673
   * Pad a UTF-8 string to given length with another string.
4674
   *
4675
   * @param string $str        <p>The input string.</p>
4676
   * @param int    $pad_length <p>The length of return string.</p>
4677
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4678
   * @param int    $pad_type   [optional] <p>
4679
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4680
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4681
   *                           </p>
4682
   *
4683
   * @return string <strong>Returns the padded string</strong>
4684
   */
4685 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4686
  {
4687 2
    $str_length = self::strlen($str);
4688
4689
    if (
4690 2
        is_int($pad_length) === true
4691 2
        &&
4692
        $pad_length > 0
4693 2
        &&
4694
        $pad_length >= $str_length
4695 2
    ) {
4696 2
      $ps_length = self::strlen($pad_string);
4697
4698 2
      $diff = $pad_length - $str_length;
4699
4700
      switch ($pad_type) {
4701 2 View Code Duplication
        case STR_PAD_LEFT:
4702 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4703 2
          $pre = (string)self::substr($pre, 0, $diff);
4704 2
          $post = '';
4705 2
          break;
4706
4707 2
        case STR_PAD_BOTH:
4708 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4709 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4710 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4711 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4712 2
          break;
4713
4714 2
        case STR_PAD_RIGHT:
4715 2 View Code Duplication
        default:
4716 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4717 2
          $post = (string)self::substr($post, 0, $diff);
4718 2
          $pre = '';
4719 2
      }
4720
4721 2
      return $pre . $str . $post;
4722
    }
4723
4724 2
    return $str;
4725
  }
4726
4727
  /**
4728
   * Repeat a string.
4729
   *
4730
   * @param string $str        <p>
4731
   *                           The string to be repeated.
4732
   *                           </p>
4733
   * @param int    $multiplier <p>
4734
   *                           Number of time the input string should be
4735
   *                           repeated.
4736
   *                           </p>
4737
   *                           <p>
4738
   *                           multiplier has to be greater than or equal to 0.
4739
   *                           If the multiplier is set to 0, the function
4740
   *                           will return an empty string.
4741
   *                           </p>
4742
   *
4743
   * @return string <p>The repeated string.</p>
4744
   */
4745 1
  public static function str_repeat($str, $multiplier)
4746
  {
4747 1
    $str = self::filter($str);
4748
4749 1
    return str_repeat($str, $multiplier);
4750
  }
4751
4752
  /**
4753
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4754
   *
4755
   * Replace all occurrences of the search string with the replacement string
4756
   *
4757
   * @link http://php.net/manual/en/function.str-replace.php
4758
   *
4759
   * @param mixed $search  <p>
4760
   *                       The value being searched for, otherwise known as the needle.
4761
   *                       An array may be used to designate multiple needles.
4762
   *                       </p>
4763
   * @param mixed $replace <p>
4764
   *                       The replacement value that replaces found search
4765
   *                       values. An array may be used to designate multiple replacements.
4766
   *                       </p>
4767
   * @param mixed $subject <p>
4768
   *                       The string or array being searched and replaced on,
4769
   *                       otherwise known as the haystack.
4770
   *                       </p>
4771
   *                       <p>
4772
   *                       If subject is an array, then the search and
4773
   *                       replace is performed with every entry of
4774
   *                       subject, and the return value is an array as
4775
   *                       well.
4776
   *                       </p>
4777
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4778
   *
4779
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4780
   */
4781 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4782
  {
4783 12
    return str_replace($search, $replace, $subject, $count);
4784
  }
4785
4786
  /**
4787
   * Replace the first "$search"-term with the "$replace"-term.
4788
   *
4789
   * @param string $search
4790
   * @param string $replace
4791
   * @param string $subject
4792
   *
4793
   * @return string
4794
   */
4795 1
  public static function str_replace_first($search, $replace, $subject)
4796
  {
4797 1
    $pos = self::strpos($subject, $search);
4798
4799 1
    if ($pos !== false) {
4800 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4801
    }
4802
4803 1
    return $subject;
4804
  }
4805
4806
  /**
4807
   * Shuffles all the characters in the string.
4808
   *
4809
   * @param string $str <p>The input string</p>
4810
   *
4811
   * @return string <p>The shuffled string.</p>
4812
   */
4813 1
  public static function str_shuffle($str)
4814
  {
4815 1
    $array = self::split($str);
4816
4817 1
    shuffle($array);
4818
4819 1
    return implode('', $array);
4820
  }
4821
4822
  /**
4823
   * Sort all characters according to code points.
4824
   *
4825
   * @param string $str    <p>A UTF-8 string.</p>
4826
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4827
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4828
   *
4829
   * @return string <p>String of sorted characters.</p>
4830
   */
4831 1
  public static function str_sort($str, $unique = false, $desc = false)
4832
  {
4833 1
    $array = self::codepoints($str);
4834
4835 1
    if ($unique) {
4836 1
      $array = array_flip(array_flip($array));
4837 1
    }
4838
4839 1
    if ($desc) {
4840 1
      arsort($array);
4841 1
    } else {
4842 1
      asort($array);
4843
    }
4844
4845 1
    return self::string($array);
4846
  }
4847
4848
  /**
4849
   * Split a string into an array.
4850
   *
4851
   * @param string $str
4852
   * @param int    $len
4853
   *
4854
   * @return array
4855
   */
4856 23
  public static function str_split($str, $len = 1)
4857
  {
4858 23
    $str = (string)$str;
4859
4860 23
    if (!isset($str[0])) {
4861 1
      return array();
4862
    }
4863
4864 22
    $len = (int)$len;
4865
4866 22
    if ($len < 1) {
4867
      return str_split($str, $len);
4868
    }
4869
4870
    /** @noinspection PhpInternalEntityUsedInspection */
4871 22
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4872 22
    $a = $a[0];
4873
4874 22
    if ($len === 1) {
4875 22
      return $a;
4876
    }
4877
4878 1
    $arrayOutput = array();
4879 1
    $p = -1;
4880
4881
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4882 1
    foreach ($a as $l => $a) {
4883 1
      if ($l % $len) {
4884 1
        $arrayOutput[$p] .= $a;
4885 1
      } else {
4886 1
        $arrayOutput[++$p] = $a;
4887
      }
4888 1
    }
4889
4890 1
    return $arrayOutput;
4891
  }
4892
4893
  /**
4894
   * Check if the string starts with the given substring.
4895
   *
4896
   * @param string $haystack <p>The string to search in.</p>
4897
   * @param string $needle   <p>The substring to search for.</p>
4898
   *
4899
   * @return bool
4900
   */
4901 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4902
  {
4903 2
    $haystack = (string)$haystack;
4904 2
    $needle = (string)$needle;
4905
4906 2
    if (!isset($haystack[0], $needle[0])) {
4907 1
      return false;
4908
    }
4909
4910 2
    if (strpos($haystack, $needle) === 0) {
4911 2
      return true;
4912
    }
4913
4914 2
    return false;
4915
  }
4916
4917
  /**
4918
   * Get a binary representation of a specific string.
4919
   *
4920
   * @param string $str <p>The input string.</p>
4921
   *
4922
   * @return string
4923
   */
4924 1
  public static function str_to_binary($str)
4925
  {
4926 1
    $str = (string)$str;
4927
4928 1
    $value = unpack('H*', $str);
4929
4930 1
    return base_convert($value[1], 16, 2);
4931
  }
4932
4933
  /**
4934
   * Convert a string into an array of words.
4935
   *
4936
   * @param string   $str
4937
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4938
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4939
   * @param null|int $removeShortValues
4940
   *
4941
   * @return array
4942
   */
4943 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4944
  {
4945 10
    $str = (string)$str;
4946
4947 10
    if ($removeShortValues !== null) {
4948 1
      $removeShortValues = (int)$removeShortValues;
4949 1
    }
4950
4951 10
    if (!isset($str[0])) {
4952 2
      if ($removeEmptyValues === true) {
4953
        return array();
4954
      }
4955
4956 2
      return array('');
4957
    }
4958
4959 10
    $charList = self::rxClass($charList, '\pL');
4960
4961 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4962
4963
    if (
4964
        $removeShortValues === null
4965 10
        &&
4966
        $removeEmptyValues === false
4967 10
    ) {
4968 10
      return $return;
4969
    }
4970
4971 1
    $tmpReturn = array();
4972 1
    foreach ($return as $returnValue) {
4973
      if (
4974
          $removeShortValues !== null
4975 1
          &&
4976 1
          self::strlen($returnValue) <= $removeShortValues
4977 1
      ) {
4978 1
        continue;
4979
      }
4980
4981
      if (
4982
          $removeEmptyValues === true
4983 1
          &&
4984 1
          trim($returnValue) === ''
4985 1
      ) {
4986 1
        continue;
4987
      }
4988
4989 1
      $tmpReturn[] = $returnValue;
4990 1
    }
4991
4992 1
    return $tmpReturn;
4993
  }
4994
4995
  /**
4996
   * alias for "UTF8::to_ascii()"
4997
   *
4998
   * @see UTF8::to_ascii()
4999
   *
5000
   * @param string $str
5001
   * @param string $unknown
5002
   * @param bool   $strict
5003
   *
5004
   * @return string
5005
   */
5006 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
5007
  {
5008 7
    return self::to_ascii($str, $unknown, $strict);
5009
  }
5010
5011
  /**
5012
   * Counts number of words in the UTF-8 string.
5013
   *
5014
   * @param string $str      <p>The input string.</p>
5015
   * @param int    $format   [optional] <p>
5016
   *                         <strong>0</strong> => return a number of words (default)<br>
5017
   *                         <strong>1</strong> => return an array of words<br>
5018
   *                         <strong>2</strong> => return an array of words with word-offset as key
5019
   *                         </p>
5020
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5021
   *
5022
   * @return array|int <p>The number of words in the string</p>
5023
   */
5024 1
  public static function str_word_count($str, $format = 0, $charlist = '')
5025
  {
5026 1
    $strParts = self::str_to_words($str, $charlist);
5027
5028 1
    $len = count($strParts);
5029
5030 1
    if ($format === 1) {
5031
5032 1
      $numberOfWords = array();
5033 1
      for ($i = 1; $i < $len; $i += 2) {
5034 1
        $numberOfWords[] = $strParts[$i];
5035 1
      }
5036
5037 1
    } elseif ($format === 2) {
5038
5039 1
      $numberOfWords = array();
5040 1
      $offset = self::strlen($strParts[0]);
5041 1
      for ($i = 1; $i < $len; $i += 2) {
5042 1
        $numberOfWords[$offset] = $strParts[$i];
5043 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5044 1
      }
5045
5046 1
    } else {
5047
5048 1
      $numberOfWords = ($len - 1) / 2;
5049
5050
    }
5051
5052 1
    return $numberOfWords;
5053
  }
5054
5055
  /**
5056
   * Case-insensitive string comparison.
5057
   *
5058
   * INFO: Case-insensitive version of UTF8::strcmp()
5059
   *
5060
   * @param string $str1
5061
   * @param string $str2
5062
   *
5063
   * @return int <p>
5064
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5065
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5066
   *             <strong>0</strong> if they are equal.
5067
   *             </p>
5068
   */
5069 11
  public static function strcasecmp($str1, $str2)
5070
  {
5071 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5072
  }
5073
5074
  /**
5075
   * alias for "UTF8::strstr()"
5076
   *
5077
   * @see UTF8::strstr()
5078
   *
5079
   * @param string  $haystack
5080
   * @param string  $needle
5081
   * @param bool    $before_needle
5082
   * @param string  $encoding
5083
   * @param boolean $cleanUtf8
5084
   *
5085
   * @return string|false
5086
   */
5087 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5088
  {
5089 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5090
  }
5091
5092
  /**
5093
   * Case-sensitive string comparison.
5094
   *
5095
   * @param string $str1
5096
   * @param string $str2
5097
   *
5098
   * @return int  <p>
5099
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5100
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5101
   *              <strong>0</strong> if they are equal.
5102
   *              </p>
5103
   */
5104 14
  public static function strcmp($str1, $str2)
5105
  {
5106
    /** @noinspection PhpUndefinedClassInspection */
5107 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5108 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5109 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
5110 14
    );
5111
  }
5112
5113
  /**
5114
   * Find length of initial segment not matching mask.
5115
   *
5116
   * @param string $str
5117
   * @param string $charList
5118
   * @param int    $offset
5119
   * @param int    $length
5120
   *
5121
   * @return int|null
5122
   */
5123 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5124
  {
5125 15
    if ('' === $charList .= '') {
5126 1
      return null;
5127
    }
5128
5129 14 View Code Duplication
    if ($offset || $length !== null) {
5130 2
      $strTmp = self::substr($str, $offset, $length);
5131 2
      if ($strTmp === false) {
5132
        return null;
5133
      }
5134 2
      $str = (string)$strTmp;
5135 2
    }
5136
5137 14
    $str = (string)$str;
5138 14
    if (!isset($str[0])) {
5139 1
      return null;
5140
    }
5141
5142 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5143
      /** @noinspection OffsetOperationsInspection */
5144 13
      return self::strlen($length[1]);
5145
    }
5146
5147 1
    return self::strlen($str);
5148
  }
5149
5150
  /**
5151
   * alias for "UTF8::stristr()"
5152
   *
5153
   * @see UTF8::stristr()
5154
   *
5155
   * @param string  $haystack
5156
   * @param string  $needle
5157
   * @param bool    $before_needle
5158
   * @param string  $encoding
5159
   * @param boolean $cleanUtf8
5160
   *
5161
   * @return string|false
5162
   */
5163 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5164
  {
5165 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5166
  }
5167
5168
  /**
5169
   * Create a UTF-8 string from code points.
5170
   *
5171
   * INFO: opposite to UTF8::codepoints()
5172
   *
5173
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5174
   *
5175
   * @return string <p>UTF-8 encoded string.</p>
5176
   */
5177 2
  public static function string(array $array)
5178
  {
5179 2
    return implode(
5180 2
        '',
5181 2
        array_map(
5182
            array(
5183 2
                '\\voku\\helper\\UTF8',
5184 2
                'chr',
5185 2
            ),
5186
            $array
5187 2
        )
5188 2
    );
5189
  }
5190
5191
  /**
5192
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5193
   *
5194
   * @param string $str <p>The input string.</p>
5195
   *
5196
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5197
   */
5198 3
  public static function string_has_bom($str)
5199
  {
5200 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5201 3
      if (0 === strpos($str, $bomString)) {
5202 3
        return true;
5203
      }
5204 3
    }
5205
5206 3
    return false;
5207
  }
5208
5209
  /**
5210
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5211
   *
5212
   * @link http://php.net/manual/en/function.strip-tags.php
5213
   *
5214
   * @param string  $str            <p>
5215
   *                                The input string.
5216
   *                                </p>
5217
   * @param string  $allowable_tags [optional] <p>
5218
   *                                You can use the optional second parameter to specify tags which should
5219
   *                                not be stripped.
5220
   *                                </p>
5221
   *                                <p>
5222
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5223
   *                                can not be changed with allowable_tags.
5224
   *                                </p>
5225
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5226
   *
5227
   * @return string <p>The stripped string.</p>
5228
   */
5229 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5230
  {
5231 2
    $str = (string)$str;
5232
5233 2
    if (!isset($str[0])) {
5234 1
      return '';
5235
    }
5236
5237 2
    if ($cleanUtf8 === true) {
5238 1
      $str = self::clean($str);
5239 1
    }
5240
5241 2
    return strip_tags($str, $allowable_tags);
5242
  }
5243
5244
  /**
5245
   * Finds position of first occurrence of a string within another, case insensitive.
5246
   *
5247
   * @link http://php.net/manual/en/function.mb-stripos.php
5248
   *
5249
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5250
   * @param string  $needle    <p>The string to find in haystack.</p>
5251
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5252
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5253
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5254
   *
5255
   * @return int|false <p>
5256
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5257
   *                   or false if needle is not found.
5258
   *                   </p>
5259
   */
5260 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5261
  {
5262 10
    $haystack = (string)$haystack;
5263 10
    $needle = (string)$needle;
5264 10
    $offset = (int)$offset;
5265
5266 10
    if (!isset($haystack[0], $needle[0])) {
5267 3
      return false;
5268
    }
5269
5270 9
    if ($cleanUtf8 === true) {
5271
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5272
      // if invalid characters are found in $haystack before $needle
5273 1
      $haystack = self::clean($haystack);
5274 1
      $needle = self::clean($needle);
5275 1
    }
5276
5277 View Code Duplication
    if (
5278
        $encoding === 'UTF-8'
5279 9
        ||
5280 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5281 9
    ) {
5282 9
      $encoding = 'UTF-8';
5283 9
    } else {
5284 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5285
    }
5286
5287 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5288
      self::checkForSupport();
5289
    }
5290
5291
    if (
5292
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5293 9
        &&
5294 9
        self::$SUPPORT['intl'] === true
5295 9
        &&
5296 9
        Bootup::is_php('5.4') === true
5297 9
    ) {
5298 9
      return \grapheme_stripos($haystack, $needle, $offset);
5299
    }
5300
5301
    // fallback to "mb_"-function via polyfill
5302 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5303
  }
5304
5305
  /**
5306
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5307
   *
5308
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5309
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5310
   * @param bool    $before_needle [optional] <p>
5311
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5312
   *                               haystack before the first occurrence of the needle (excluding the needle).
5313
   *                               </p>
5314
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5315
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5316
   *
5317
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5318
   */
5319 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5320
  {
5321 17
    $haystack = (string)$haystack;
5322 17
    $needle = (string)$needle;
5323 17
    $before_needle = (bool)$before_needle;
5324
5325 17
    if (!isset($haystack[0], $needle[0])) {
5326 6
      return false;
5327
    }
5328
5329 11
    if ($encoding !== 'UTF-8') {
5330 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5331 1
    }
5332
5333 11
    if ($cleanUtf8 === true) {
5334
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5335
      // if invalid characters are found in $haystack before $needle
5336 1
      $needle = self::clean($needle);
5337 1
      $haystack = self::clean($haystack);
5338 1
    }
5339
5340 11
    if (!$needle) {
5341
      return $haystack;
5342
    }
5343
5344 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5345
      self::checkForSupport();
5346
    }
5347
5348 View Code Duplication
    if (
5349
        $encoding !== 'UTF-8'
5350 11
        &&
5351 1
        self::$SUPPORT['mbstring'] === false
5352 11
    ) {
5353
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5354
    }
5355
5356 11
    if (self::$SUPPORT['mbstring'] === true) {
5357 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5358
    }
5359
5360
    if (
5361
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5362
        &&
5363
        self::$SUPPORT['intl'] === true
5364
        &&
5365
        Bootup::is_php('5.4') === true
5366
    ) {
5367
      return \grapheme_stristr($haystack, $needle, $before_needle);
5368
    }
5369
5370
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
5371
      return stristr($haystack, $needle, $before_needle);
5372
    }
5373
5374
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5375
5376
    if (!isset($match[1])) {
5377
      return false;
5378
    }
5379
5380
    if ($before_needle) {
5381
      return $match[1];
5382
    }
5383
5384
    return self::substr($haystack, self::strlen($match[1]));
5385
  }
5386
5387
  /**
5388
   * Get the string length, not the byte-length!
5389
   *
5390
   * @link     http://php.net/manual/en/function.mb-strlen.php
5391
   *
5392
   * @param string  $str       <p>The string being checked for length.</p>
5393
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5394
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5395
   *
5396
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5397
   *             character counted as +1)</p>
5398
   */
5399 88
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5400
  {
5401 88
    $str = (string)$str;
5402
5403 88
    if (!isset($str[0])) {
5404 6
      return 0;
5405
    }
5406
5407 View Code Duplication
    if (
5408
        $encoding === 'UTF-8'
5409 87
        ||
5410 14
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5411 87
    ) {
5412 78
      $encoding = 'UTF-8';
5413 78
    } else {
5414 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5415
    }
5416
5417 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5418
      self::checkForSupport();
5419
    }
5420
5421
    switch ($encoding) {
5422 87
      case 'ASCII':
5423 87
      case 'CP850':
5424 87
      case '8BIT':
5425
        if (
5426
            $encoding === 'CP850'
5427 10
            &&
5428 10
            self::$SUPPORT['mbstring_func_overload'] === false
5429 10
        ) {
5430 10
          return strlen($str);
5431
        }
5432
5433
        return \mb_strlen($str, '8BIT');
5434
    }
5435
5436 79
    if ($cleanUtf8 === true) {
5437
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5438
      // if invalid characters are found in $str
5439 2
      $str = self::clean($str);
5440 2
    }
5441
5442 View Code Duplication
    if (
5443
        $encoding !== 'UTF-8'
5444 79
        &&
5445 2
        self::$SUPPORT['mbstring'] === false
5446 79
        &&
5447
        self::$SUPPORT['iconv'] === false
5448 79
    ) {
5449
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5450
    }
5451
5452
    if (
5453
        $encoding !== 'UTF-8'
5454 79
        &&
5455 2
        self::$SUPPORT['iconv'] === true
5456 79
        &&
5457 2
        self::$SUPPORT['mbstring'] === false
5458 79
    ) {
5459
      return \iconv_strlen($str, $encoding);
5460
    }
5461
5462 79
    if (self::$SUPPORT['mbstring'] === true) {
5463 78
      return \mb_strlen($str, $encoding);
5464
    }
5465
5466 2
    if (self::$SUPPORT['iconv'] === true) {
5467
      return \iconv_strlen($str, $encoding);
5468
    }
5469
5470
    if (
5471
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5472 2
        &&
5473 2
        self::$SUPPORT['intl'] === true
5474 2
        &&
5475
        Bootup::is_php('5.4') === true
5476 2
    ) {
5477
      return \grapheme_strlen($str);
5478
    }
5479
5480 2
    if (self::is_ascii($str)) {
5481 1
      return strlen($str);
5482
    }
5483
5484
    // fallback via vanilla php
5485 2
    preg_match_all('/./us', $str, $parts);
5486 2
    $returnTmp = count($parts[0]);
5487 2
    if ($returnTmp !== 0) {
5488 2
      return $returnTmp;
5489
    }
5490
5491
    // fallback to "mb_"-function via polyfill
5492
    return \mb_strlen($str, $encoding);
5493
  }
5494
5495
  /**
5496
   * Case insensitive string comparisons using a "natural order" algorithm.
5497
   *
5498
   * INFO: natural order version of UTF8::strcasecmp()
5499
   *
5500
   * @param string $str1 <p>The first string.</p>
5501
   * @param string $str2 <p>The second string.</p>
5502
   *
5503
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5504
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5505
   *             <strong>0</strong> if they are equal
5506
   */
5507 1
  public static function strnatcasecmp($str1, $str2)
5508
  {
5509 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5510
  }
5511
5512
  /**
5513
   * String comparisons using a "natural order" algorithm
5514
   *
5515
   * INFO: natural order version of UTF8::strcmp()
5516
   *
5517
   * @link  http://php.net/manual/en/function.strnatcmp.php
5518
   *
5519
   * @param string $str1 <p>The first string.</p>
5520
   * @param string $str2 <p>The second string.</p>
5521
   *
5522
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5523
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5524
   *             <strong>0</strong> if they are equal
5525
   */
5526 2
  public static function strnatcmp($str1, $str2)
5527
  {
5528 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5529
  }
5530
5531
  /**
5532
   * Case-insensitive string comparison of the first n characters.
5533
   *
5534
   * @link  http://php.net/manual/en/function.strncasecmp.php
5535
   *
5536
   * @param string $str1 <p>The first string.</p>
5537
   * @param string $str2 <p>The second string.</p>
5538
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5539
   *
5540
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5541
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5542
   *             <strong>0</strong> if they are equal
5543
   */
5544 1
  public static function strncasecmp($str1, $str2, $len)
5545
  {
5546 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5547
  }
5548
5549
  /**
5550
   * String comparison of the first n characters.
5551
   *
5552
   * @link  http://php.net/manual/en/function.strncmp.php
5553
   *
5554
   * @param string $str1 <p>The first string.</p>
5555
   * @param string $str2 <p>The second string.</p>
5556
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5557
   *
5558
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5559
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5560
   *             <strong>0</strong> if they are equal
5561
   */
5562 2
  public static function strncmp($str1, $str2, $len)
5563
  {
5564 2
    $str1 = (string)self::substr($str1, 0, $len);
5565 2
    $str2 = (string)self::substr($str2, 0, $len);
5566
5567 2
    return self::strcmp($str1, $str2);
5568
  }
5569
5570
  /**
5571
   * Search a string for any of a set of characters.
5572
   *
5573
   * @link  http://php.net/manual/en/function.strpbrk.php
5574
   *
5575
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5576
   * @param string $char_list <p>This parameter is case sensitive.</p>
5577
   *
5578
   * @return string String starting from the character found, or false if it is not found.
5579
   */
5580 1
  public static function strpbrk($haystack, $char_list)
5581
  {
5582 1
    $haystack = (string)$haystack;
5583 1
    $char_list = (string)$char_list;
5584
5585 1
    if (!isset($haystack[0], $char_list[0])) {
5586 1
      return false;
5587
    }
5588
5589 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5590 1
      return substr($haystack, strpos($haystack, $m[0]));
5591
    }
5592
5593 1
    return false;
5594
  }
5595
5596
  /**
5597
   * Find position of first occurrence of string in a string.
5598
   *
5599
   * @link http://php.net/manual/en/function.mb-strpos.php
5600
   *
5601
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5602
   * @param string  $needle    <p>The string to find in haystack.</p>
5603
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5604
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5605
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5606
   *
5607
   * @return int|false <p>
5608
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5609
   *                   If needle is not found it returns false.
5610
   *                   </p>
5611
   */
5612 56
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5613
  {
5614 56
    $haystack = (string)$haystack;
5615 56
    $needle = (string)$needle;
5616
5617 56
    if (!isset($haystack[0], $needle[0])) {
5618 3
      return false;
5619
    }
5620
5621
    // init
5622 55
    $offset = (int)$offset;
5623
5624
    // iconv and mbstring do not support integer $needle
5625
5626 55
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5627
      $needle = (string)self::chr($needle);
5628
    }
5629
5630 55
    if ($cleanUtf8 === true) {
5631
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5632
      // if invalid characters are found in $haystack before $needle
5633 2
      $needle = self::clean($needle);
5634 2
      $haystack = self::clean($haystack);
5635 2
    }
5636
5637 View Code Duplication
    if (
5638
        $encoding === 'UTF-8'
5639 55
        ||
5640 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5641 55
    ) {
5642 15
      $encoding = 'UTF-8';
5643 15
    } else {
5644 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5645
    }
5646
5647 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5648
      self::checkForSupport();
5649
    }
5650
5651
    if (
5652
        $encoding === 'CP850'
5653 55
        &&
5654 41
        self::$SUPPORT['mbstring_func_overload'] === false
5655 55
    ) {
5656 41
      return strpos($haystack, $needle, $offset);
5657
    }
5658
5659 View Code Duplication
    if (
5660
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5661 15
        &
5662 15
        self::$SUPPORT['iconv'] === true
5663 15
        &&
5664 1
        self::$SUPPORT['mbstring'] === false
5665 15
    ) {
5666
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5667
    }
5668
5669
    if (
5670
        $offset >= 0 // iconv_strpos() can't handle negative offset
5671 15
        &&
5672
        $encoding !== 'UTF-8'
5673 15
        &&
5674 1
        self::$SUPPORT['mbstring'] === false
5675 15
        &&
5676
        self::$SUPPORT['iconv'] === true
5677 15
    ) {
5678
      // ignore invalid negative offset to keep compatibility
5679
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5680
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5681
    }
5682
5683 15
    if (self::$SUPPORT['mbstring'] === true) {
5684 15
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5685
    }
5686
5687
    if (
5688
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5689 1
        &&
5690 1
        self::$SUPPORT['intl'] === true
5691 1
        &&
5692
        Bootup::is_php('5.4') === true
5693 1
    ) {
5694
      return \grapheme_strpos($haystack, $needle, $offset);
5695
    }
5696
5697
    if (
5698
        $offset >= 0 // iconv_strpos() can't handle negative offset
5699 1
        &&
5700 1
        self::$SUPPORT['iconv'] === true
5701 1
    ) {
5702
      // ignore invalid negative offset to keep compatibility
5703
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5704
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5705
    }
5706
5707 1
    $haystackIsAscii = self::is_ascii($haystack);
5708 1
    if ($haystackIsAscii && self::is_ascii($needle)) {
5709 1
      return strpos($haystack, $needle, $offset);
5710
    }
5711
5712
    // fallback via vanilla php
5713
5714 1
    if ($haystackIsAscii) {
5715
      $haystackTmp = substr($haystack, $offset);
5716
    } else {
5717 1
      $haystackTmp = self::substr($haystack, $offset);
5718
    }
5719 1
    if ($haystackTmp === false) {
5720
      $haystackTmp = '';
5721
    }
5722 1
    $haystack = (string)$haystackTmp;
5723
5724 1
    if ($offset < 0) {
5725
      $offset = 0;
5726
    }
5727
5728 1
    $pos = strpos($haystack, $needle);
5729 1
    if ($pos === false) {
5730
      return false;
5731
    }
5732
5733 1
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5734 1
    if ($returnTmp !== false) {
5735 1
      return $returnTmp;
5736
    }
5737
5738
    // fallback to "mb_"-function via polyfill
5739
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5740
  }
5741
5742
  /**
5743
   * Finds the last occurrence of a character in a string within another.
5744
   *
5745
   * @link http://php.net/manual/en/function.mb-strrchr.php
5746
   *
5747
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5748
   * @param string $needle        <p>The string to find in haystack</p>
5749
   * @param bool   $before_needle [optional] <p>
5750
   *                              Determines which portion of haystack
5751
   *                              this function returns.
5752
   *                              If set to true, it returns all of haystack
5753
   *                              from the beginning to the last occurrence of needle.
5754
   *                              If set to false, it returns all of haystack
5755
   *                              from the last occurrence of needle to the end,
5756
   *                              </p>
5757
   * @param string $encoding      [optional] <p>
5758
   *                              Character encoding name to use.
5759
   *                              If it is omitted, internal character encoding is used.
5760
   *                              </p>
5761
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5762
   *
5763
   * @return string|false The portion of haystack or false if needle is not found.
5764
   */
5765 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5766
  {
5767 1
    if ($encoding !== 'UTF-8') {
5768 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5769 1
    }
5770
5771 1
    if ($cleanUtf8 === true) {
5772
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5773
      // if invalid characters are found in $haystack before $needle
5774 1
      $needle = self::clean($needle);
5775 1
      $haystack = self::clean($haystack);
5776 1
    }
5777
5778
    // fallback to "mb_"-function via polyfill
5779 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5780
  }
5781
5782
  /**
5783
   * Reverses characters order in the string.
5784
   *
5785
   * @param string $str The input string
5786
   *
5787
   * @return string The string with characters in the reverse sequence
5788
   */
5789 4
  public static function strrev($str)
5790
  {
5791 4
    $str = (string)$str;
5792
5793 4
    if (!isset($str[0])) {
5794 2
      return '';
5795
    }
5796
5797 3
    return implode('', array_reverse(self::split($str)));
5798
  }
5799
5800
  /**
5801
   * Finds the last occurrence of a character in a string within another, case insensitive.
5802
   *
5803
   * @link http://php.net/manual/en/function.mb-strrichr.php
5804
   *
5805
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5806
   * @param string  $needle        <p>The string to find in haystack.</p>
5807
   * @param bool    $before_needle [optional] <p>
5808
   *                               Determines which portion of haystack
5809
   *                               this function returns.
5810
   *                               If set to true, it returns all of haystack
5811
   *                               from the beginning to the last occurrence of needle.
5812
   *                               If set to false, it returns all of haystack
5813
   *                               from the last occurrence of needle to the end,
5814
   *                               </p>
5815
   * @param string  $encoding      [optional] <p>
5816
   *                               Character encoding name to use.
5817
   *                               If it is omitted, internal character encoding is used.
5818
   *                               </p>
5819
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5820
   *
5821
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5822
   */
5823 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5824
  {
5825 1
    if ($encoding !== 'UTF-8') {
5826 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5827 1
    }
5828
5829 1
    if ($cleanUtf8 === true) {
5830
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5831
      // if invalid characters are found in $haystack before $needle
5832 1
      $needle = self::clean($needle);
5833 1
      $haystack = self::clean($haystack);
5834 1
    }
5835
5836 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5837
  }
5838
5839
  /**
5840
   * Find position of last occurrence of a case-insensitive string.
5841
   *
5842
   * @param string  $haystack  <p>The string to look in.</p>
5843
   * @param string  $needle    <p>The string to look for.</p>
5844
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5845
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5846
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5847
   *
5848
   * @return int|false <p>
5849
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5850
   *                   not found, it returns false.
5851
   *                   </p>
5852
   */
5853 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5854
  {
5855 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5856
      $needle = (string)self::chr($needle);
5857
    }
5858
5859
    // init
5860 1
    $haystack = (string)$haystack;
5861 1
    $needle = (string)$needle;
5862 1
    $offset = (int)$offset;
5863
5864 1
    if (!isset($haystack[0], $needle[0])) {
5865
      return false;
5866
    }
5867
5868 View Code Duplication
    if (
5869
        $cleanUtf8 === true
5870 1
        ||
5871
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5872 1
    ) {
5873
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5874
5875 1
      $needle = self::clean($needle);
5876 1
      $haystack = self::clean($haystack);
5877 1
    }
5878
5879 View Code Duplication
    if (
5880
        $encoding === 'UTF-8'
5881 1
        ||
5882 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5883 1
    ) {
5884 1
      $encoding = 'UTF-8';
5885 1
    } else {
5886 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5887
    }
5888
5889 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5890
      self::checkForSupport();
5891
    }
5892
5893 View Code Duplication
    if (
5894
        $encoding !== 'UTF-8'
5895 1
        &&
5896
        self::$SUPPORT['mbstring'] === false
5897 1
    ) {
5898
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5899
    }
5900
5901 1
    if (self::$SUPPORT['mbstring'] === true) {
5902 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5903
    }
5904
5905
    if (
5906
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5907
        &&
5908
        self::$SUPPORT['intl'] === true
5909
        &&
5910
        Bootup::is_php('5.4') === true
5911
    ) {
5912
      return \grapheme_strripos($haystack, $needle, $offset);
5913
    }
5914
5915
    // fallback via vanilla php
5916
5917
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5918
  }
5919
5920
  /**
5921
   * Find position of last occurrence of a string in a string.
5922
   *
5923
   * @link http://php.net/manual/en/function.mb-strrpos.php
5924
   *
5925
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5926
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5927
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5928
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5929
   *                              the end of the string.
5930
   *                              </p>
5931
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5932
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5933
   *
5934
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5935
   *                   is not found, it returns false.</p>
5936
   */
5937 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5938
  {
5939 10
    if ((int)$needle === $needle && $needle >= 0) {
5940 2
      $needle = (string)self::chr($needle);
5941 2
    }
5942
5943
    // init
5944 10
    $haystack = (string)$haystack;
5945 10
    $needle = (string)$needle;
5946 10
    $offset = (int)$offset;
5947
5948 10
    if (!isset($haystack[0], $needle[0])) {
5949 2
      return false;
5950
    }
5951
5952 View Code Duplication
    if (
5953
        $cleanUtf8 === true
5954 9
        ||
5955
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5956 9
    ) {
5957
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5958 3
      $needle = self::clean($needle);
5959 3
      $haystack = self::clean($haystack);
5960 3
    }
5961
5962 View Code Duplication
    if (
5963
        $encoding === 'UTF-8'
5964 9
        ||
5965 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5966 9
    ) {
5967 9
      $encoding = 'UTF-8';
5968 9
    } else {
5969 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5970
    }
5971
5972 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5973
      self::checkForSupport();
5974
    }
5975
5976 View Code Duplication
    if (
5977
        $encoding !== 'UTF-8'
5978 9
        &&
5979 1
        self::$SUPPORT['mbstring'] === false
5980 9
    ) {
5981
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5982
    }
5983
5984 9
    if (self::$SUPPORT['mbstring'] === true) {
5985 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5986
    }
5987
5988
    if (
5989
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5990
        &&
5991
        self::$SUPPORT['intl'] === true
5992
        &&
5993
        Bootup::is_php('5.4') === true
5994
    ) {
5995
      return \grapheme_strrpos($haystack, $needle, $offset);
5996
    }
5997
5998
    // fallback via vanilla php
5999
6000
    $haystackTmp = null;
6001
    if ($offset > 0) {
6002
      $haystackTmp = self::substr($haystack, $offset);
6003
    } elseif ($offset < 0) {
6004
      $haystackTmp = self::substr($haystack, 0, $offset);
6005
      $offset = 0;
6006
    }
6007
6008
    if ($haystackTmp !== null) {
6009
      if ($haystackTmp === false) {
6010
        $haystackTmp = '';
6011
      }
6012
      $haystack = (string)$haystackTmp;
6013
    }
6014
6015
    $pos = strrpos($haystack, $needle);
6016
    if ($pos === false) {
6017
      return false;
6018
    }
6019
6020
    return $offset + self::strlen(substr($haystack, 0, $pos));
6021
  }
6022
6023
  /**
6024
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6025
   * mask.
6026
   *
6027
   * @param string $str    <p>The input string.</p>
6028
   * @param string $mask   <p>The mask of chars</p>
6029
   * @param int    $offset [optional]
6030
   * @param int    $length [optional]
6031
   *
6032
   * @return int
6033
   */
6034 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
6035
  {
6036 10 View Code Duplication
    if ($offset || $length !== null) {
6037 2
      $strTmp = self::substr($str, $offset, $length);
6038 2
      if ($strTmp === false) {
6039
        $strTmp = '';
6040
      }
6041 2
      $str = (string)$strTmp;
6042 2
    }
6043
6044 10
    $str = (string)$str;
6045 10
    if (!isset($str[0], $mask[0])) {
6046 2
      return 0;
6047
    }
6048
6049 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6050
  }
6051
6052
  /**
6053
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6054
   *
6055
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6056
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6057
   * @param bool    $before_needle [optional] <p>
6058
   *                               If <b>TRUE</b>, strstr() returns the part of the
6059
   *                               haystack before the first occurrence of the needle (excluding the needle).
6060
   *                               </p>
6061
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6062
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6063
   *
6064
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6065
   */
6066 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6067
  {
6068 2
    $haystack = (string)$haystack;
6069 2
    $needle = (string)$needle;
6070
6071 2
    if (!isset($haystack[0], $needle[0])) {
6072 1
      return false;
6073
    }
6074
6075 2
    if ($cleanUtf8 === true) {
6076
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6077
      // if invalid characters are found in $haystack before $needle
6078
      $needle = self::clean($needle);
6079
      $haystack = self::clean($haystack);
6080
    }
6081
6082 2
    if ($encoding !== 'UTF-8') {
6083 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6084 1
    }
6085
6086 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6087
      self::checkForSupport();
6088
    }
6089
6090 View Code Duplication
    if (
6091
        $encoding !== 'UTF-8'
6092 2
        &&
6093 1
        self::$SUPPORT['mbstring'] === false
6094 2
    ) {
6095
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6096
    }
6097
6098 2
    if (self::$SUPPORT['mbstring'] === true) {
6099 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6100
    }
6101
6102
    if (
6103
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6104
        &&
6105
        self::$SUPPORT['intl'] === true
6106
        &&
6107
        Bootup::is_php('5.4') === true
6108
    ) {
6109
      return \grapheme_strstr($haystack, $needle, $before_needle);
6110
    }
6111
6112
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6113
6114
    if (!isset($match[1])) {
6115
      return false;
6116
    }
6117
6118
    if ($before_needle) {
6119
      return $match[1];
6120
    }
6121
6122
    return self::substr($haystack, self::strlen($match[1]));
6123
  }
6124
6125
  /**
6126
   * Unicode transformation for case-less matching.
6127
   *
6128
   * @link http://unicode.org/reports/tr21/tr21-5.html
6129
   *
6130
   * @param string  $str       <p>The input string.</p>
6131
   * @param bool    $full      [optional] <p>
6132
   *                           <b>true</b>, replace full case folding chars (default)<br>
6133
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6134
   *                           </p>
6135
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6136
   *
6137
   * @return string
6138
   */
6139 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6140
  {
6141
    // init
6142 13
    $str = (string)$str;
6143
6144 13
    if (!isset($str[0])) {
6145 4
      return '';
6146
    }
6147
6148 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6149 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6150
6151 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6152 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6153 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6154 1
    }
6155
6156 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6157
6158 12
    if ($full) {
6159
6160 12
      static $FULL_CASE_FOLD = null;
6161 12
      if ($FULL_CASE_FOLD === null) {
6162 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6163 1
      }
6164
6165
      /** @noinspection OffsetOperationsInspection */
6166 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6167 12
    }
6168
6169 12
    if ($cleanUtf8 === true) {
6170 1
      $str = self::clean($str);
6171 1
    }
6172
6173 12
    return self::strtolower($str);
6174
  }
6175
6176
  /**
6177
   * Make a string lowercase.
6178
   *
6179
   * @link http://php.net/manual/en/function.mb-strtolower.php
6180
   *
6181
   * @param string      $str       <p>The string being lowercased.</p>
6182
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6183
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6184
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6185
   *
6186
   * @return string str with all alphabetic characters converted to lowercase.
6187
   */
6188 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6189
  {
6190
    // init
6191 25
    $str = (string)$str;
6192
6193 25
    if (!isset($str[0])) {
6194 3
      return '';
6195
    }
6196
6197 23
    if ($cleanUtf8 === true) {
6198
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6199
      // if invalid characters are found in $haystack before $needle
6200 1
      $str = self::clean($str);
6201 1
    }
6202
6203 23
    if ($encoding !== 'UTF-8') {
6204 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6205 2
    }
6206
6207 23
    if ($lang !== null) {
6208
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6209
        self::checkForSupport();
6210
      }
6211
6212
      if (
6213
          self::$SUPPORT['intl'] === true
6214
          &&
6215
          Bootup::is_php('5.4') === true
6216
      ) {
6217
6218
        $langCode = $lang . '-Lower';
6219
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6220
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6221
6222
          $langCode = 'Any-Lower';
6223
        }
6224
6225
        return transliterator_transliterate($langCode, $str);
6226
      }
6227
6228
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6229
    }
6230
6231 23
    return \mb_strtolower($str, $encoding);
6232
  }
6233
6234
  /**
6235
   * Generic case sensitive transformation for collation matching.
6236
   *
6237
   * @param string $str <p>The input string</p>
6238
   *
6239
   * @return string
6240
   */
6241 3
  private static function strtonatfold($str)
6242
  {
6243
    /** @noinspection PhpUndefinedClassInspection */
6244 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6245
  }
6246
6247
  /**
6248
   * Make a string uppercase.
6249
   *
6250
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6251
   *
6252
   * @param string      $str       <p>The string being uppercased.</p>
6253
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6254
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6255
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6256
   *
6257
   * @return string str with all alphabetic characters converted to uppercase.
6258
   */
6259 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6260
  {
6261 19
    $str = (string)$str;
6262
6263 19
    if (!isset($str[0])) {
6264 3
      return '';
6265
    }
6266
6267 17
    if ($cleanUtf8 === true) {
6268
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6269
      // if invalid characters are found in $haystack before $needle
6270 2
      $str = self::clean($str);
6271 2
    }
6272
6273 17
    if ($encoding !== 'UTF-8') {
6274 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6275 3
    }
6276
6277 17
    if ($lang !== null) {
6278
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6279
        self::checkForSupport();
6280
      }
6281
6282
      if (
6283
          self::$SUPPORT['intl'] === true
6284
          &&
6285
          Bootup::is_php('5.4') === true
6286
      ) {
6287
6288
        $langCode = $lang . '-Upper';
6289
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6290
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6291
6292
          $langCode = 'Any-Upper';
6293
        }
6294
6295
        return transliterator_transliterate($langCode, $str);
6296
      }
6297
6298
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6299
    }
6300
6301 17
    return \mb_strtoupper($str, $encoding);
6302
  }
6303
6304
  /**
6305
   * Translate characters or replace sub-strings.
6306
   *
6307
   * @link  http://php.net/manual/en/function.strtr.php
6308
   *
6309
   * @param string          $str  <p>The string being translated.</p>
6310
   * @param string|string[] $from <p>The string replacing from.</p>
6311
   * @param string|string[] $to   <p>The string being translated to to.</p>
6312
   *
6313
   * @return string <p>
6314
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6315
   *                corresponding character in to.
6316
   *                </p>
6317
   */
6318 1
  public static function strtr($str, $from, $to = INF)
6319
  {
6320 1
    $str = (string)$str;
6321
6322 1
    if (!isset($str[0])) {
6323
      return '';
6324
    }
6325
6326 1
    if ($from === $to) {
6327
      return $str;
6328
    }
6329
6330 1
    if (INF !== $to) {
6331 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6331 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6332 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6332 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6333 1
      $countFrom = count($from);
6334 1
      $countTo = count($to);
6335
6336 1
      if ($countFrom > $countTo) {
6337 1
        $from = array_slice($from, 0, $countTo);
6338 1
      } elseif ($countFrom < $countTo) {
6339 1
        $to = array_slice($to, 0, $countFrom);
6340 1
      }
6341
6342 1
      $from = array_combine($from, $to);
6343 1
    }
6344
6345 1
    if (is_string($from)) {
6346 1
      return str_replace($from, '', $str);
6347
    }
6348
6349 1
    return strtr($str, $from);
6350
  }
6351
6352
  /**
6353
   * Return the width of a string.
6354
   *
6355
   * @param string  $str       <p>The input string.</p>
6356
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6357
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6358
   *
6359
   * @return int
6360
   */
6361 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6362
  {
6363 1
    if ($encoding !== 'UTF-8') {
6364 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6365 1
    }
6366
6367 1
    if ($cleanUtf8 === true) {
6368
      // iconv and mbstring are not tolerant to invalid encoding
6369
      // further, their behaviour is inconsistent with that of PHP's substr
6370 1
      $str = self::clean($str);
6371 1
    }
6372
6373
    // fallback to "mb_"-function via polyfill
6374 1
    return \mb_strwidth($str, $encoding);
6375
  }
6376
6377
  /**
6378
   * Changes all keys in an array.
6379
   *
6380
   * @param array $array <p>The array to work on</p>
6381
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6382
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6383
   *
6384
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6385
   *                     input is not an array.</p>
6386
   */
6387 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
6388
  {
6389 1
    if (!is_array($array)) {
6390
      return false;
6391
    }
6392
6393
    if (
6394
        $case !== CASE_LOWER
6395 1
        &&
6396
        $case !== CASE_UPPER
6397 1
    ) {
6398
      $case = CASE_UPPER;
6399
    }
6400
6401 1
    $return = array();
6402 1
    foreach ($array as $key => $value) {
6403 1
      if ($case  === CASE_LOWER) {
6404 1
        $key = self::strtolower($key);
6405 1
      } else {
6406 1
        $key = self::strtoupper($key);
6407
      }
6408
6409 1
      $return[$key] = $value;
6410 1
    }
6411
6412 1
    return $return;
6413
  }
6414
6415
  /**
6416
   * Get part of a string.
6417
   *
6418
   * @link http://php.net/manual/en/function.mb-substr.php
6419
   *
6420
   * @param string  $str       <p>The string being checked.</p>
6421
   * @param int     $offset    <p>The first position used in str.</p>
6422
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6423
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6424
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6425
   *
6426
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6427
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6428
   *                      characters long, <b>FALSE</b> will be returned.</p>
6429
   */
6430 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6431
  {
6432
    // init
6433 76
    $str = (string)$str;
6434
6435 76
    if (!isset($str[0])) {
6436 10
      return '';
6437
    }
6438
6439
    // Empty string
6440 74
    if ($length === 0) {
6441 3
      return '';
6442
    }
6443
6444 73
    if ($cleanUtf8 === true) {
6445
      // iconv and mbstring are not tolerant to invalid encoding
6446
      // further, their behaviour is inconsistent with that of PHP's substr
6447 1
      $str = self::clean($str);
6448 1
    }
6449
6450
    // Whole string
6451 73
    if (!$offset && $length === null) {
6452 2
      return $str;
6453
    }
6454
6455 71
    $str_length = 0;
6456 71
    if ($offset || $length === null) {
6457 45
      $str_length = (int)self::strlen($str, $encoding);
6458 45
    }
6459
6460
    // Impossible
6461 71
    if ($offset && $offset > $str_length) {
6462 2
      return false;
6463
    }
6464
6465 69
    if ($length === null) {
6466 30
      $length = $str_length;
6467 30
    } else {
6468 60
      $length = (int)$length;
6469
    }
6470
6471 View Code Duplication
    if (
6472
        $encoding === 'UTF-8'
6473 69
        ||
6474 25
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6475 69
    ) {
6476 47
      $encoding = 'UTF-8';
6477 47
    } else {
6478 24
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6479
    }
6480
6481 69
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6482
      self::checkForSupport();
6483
    }
6484
6485
    if (
6486
        $encoding === 'CP850'
6487 69
        &&
6488 22
        self::$SUPPORT['mbstring_func_overload'] === false
6489 69
    ) {
6490 22
      return substr($str, $offset, $length === null ? $str_length : $length);
6491
    }
6492
6493 View Code Duplication
    if (
6494
        $encoding !== 'UTF-8'
6495 47
        &&
6496 1
        self::$SUPPORT['mbstring'] === false
6497 47
    ) {
6498
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6499
    }
6500
6501 47
    if (self::$SUPPORT['mbstring'] === true) {
6502 47
      return \mb_substr($str, $offset, $length, $encoding);
6503
    }
6504
6505
    if (
6506
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6507
        &&
6508
        self::$SUPPORT['intl'] === true
6509
        &&
6510
        Bootup::is_php('5.4') === true
6511
    ) {
6512
      return \grapheme_substr($str, $offset, $length);
6513
    }
6514
6515
    if (
6516
        $length >= 0 // "iconv_substr()" can't handle negative length
6517
        &&
6518
        self::$SUPPORT['iconv'] === true
6519
    ) {
6520
      return \iconv_substr($str, $offset, $length);
6521
    }
6522
6523
    if (self::is_ascii($str)) {
6524
      return ($length === null) ?
6525
          substr($str, $offset) :
6526
          substr($str, $offset, $length);
6527
    }
6528
6529
    // fallback via vanilla php
6530
6531
    // split to array, and remove invalid characters
6532
    $array = self::split($str);
6533
6534
    // extract relevant part, and join to make sting again
6535
    return implode('', array_slice($array, $offset, $length));
6536
  }
6537
6538
  /**
6539
   * Binary safe comparison of two strings from an offset, up to length characters.
6540
   *
6541
   * @param string  $str1               <p>The main string being compared.</p>
6542
   * @param string  $str2               <p>The secondary string being compared.</p>
6543
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6544
   *                                    counting from the end of the string.</p>
6545
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6546
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6547
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6548
   *                                    insensitive.</p>
6549
   *
6550
   * @return int <p>
6551
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6552
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6553
   *             <strong>0</strong> if they are equal.
6554
   *             </p>
6555
   */
6556 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6557
  {
6558
    if (
6559
        $offset !== 0
6560 1
        ||
6561
        $length !== null
6562 1
    ) {
6563 1
      $str1Tmp = self::substr($str1, $offset, $length);
6564 1
      if ($str1Tmp === false) {
6565
        $str1Tmp = '';
6566
      }
6567 1
      $str1 = (string)$str1Tmp;
6568
6569 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6570 1
      if ($str2Tmp === false) {
6571
        $str2Tmp = '';
6572
      }
6573 1
      $str2 = (string)$str2Tmp;
6574 1
    }
6575
6576 1
    if ($case_insensitivity === true) {
6577 1
      return self::strcasecmp($str1, $str2);
6578
    }
6579
6580 1
    return self::strcmp($str1, $str2);
6581
  }
6582
6583
  /**
6584
   * Count the number of substring occurrences.
6585
   *
6586
   * @link  http://php.net/manual/en/function.substr-count.php
6587
   *
6588
   * @param string  $haystack  <p>The string to search in.</p>
6589
   * @param string  $needle    <p>The substring to search for.</p>
6590
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6591
   * @param int     $length    [optional] <p>
6592
   *                           The maximum length after the specified offset to search for the
6593
   *                           substring. It outputs a warning if the offset plus the length is
6594
   *                           greater than the haystack length.
6595
   *                           </p>
6596
   * @param string  $encoding  <p>Set the charset.</p>
6597
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6598
   *
6599
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6600
   */
6601 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6602
  {
6603
    // init
6604 1
    $haystack = (string)$haystack;
6605 1
    $needle = (string)$needle;
6606
6607 1
    if (!isset($haystack[0], $needle[0])) {
6608 1
      return false;
6609
    }
6610
6611 1
    if ($offset || $length !== null) {
6612
6613 1
      if ($length === null) {
6614 1
        $length = (int)self::strlen($haystack);
6615 1
      }
6616
6617 1
      $offset = (int)$offset;
6618 1
      $length = (int)$length;
6619
6620
      if (
6621
          (
6622
              $length !== 0
6623 1
              &&
6624
              $offset !== 0
6625 1
          )
6626 1
          &&
6627 1
          $length + $offset <= 0
6628 1
          &&
6629 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6630 1
      ) {
6631 1
        return false;
6632
      }
6633
6634 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6635 1
      if ($haystackTmp === false) {
6636
        $haystackTmp = '';
6637
      }
6638 1
      $haystack = (string)$haystackTmp;
6639 1
    }
6640
6641 1
    if ($encoding !== 'UTF-8') {
6642 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6643 1
    }
6644
6645 1
    if ($cleanUtf8 === true) {
6646
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6647
      // if invalid characters are found in $haystack before $needle
6648
      $needle = self::clean($needle);
6649
      $haystack = self::clean($haystack);
6650
    }
6651
6652 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6653
      self::checkForSupport();
6654
    }
6655
6656 View Code Duplication
    if (
6657
        $encoding !== 'UTF-8'
6658 1
        &&
6659 1
        self::$SUPPORT['mbstring'] === false
6660 1
    ) {
6661
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6662
    }
6663
6664 1
    if (self::$SUPPORT['mbstring'] === true) {
6665 1
      return \mb_substr_count($haystack, $needle, $encoding);
6666
    }
6667
6668
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6669
6670
    return count($matches);
6671
  }
6672
6673
  /**
6674
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6675
   *
6676
   * @param string $haystack <p>The string to search in.</p>
6677
   * @param string $needle   <p>The substring to search for.</p>
6678
   *
6679
   * @return string <p>Return the sub-string.</p>
6680
   */
6681 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6682
  {
6683
    // init
6684 1
    $haystack = (string)$haystack;
6685 1
    $needle = (string)$needle;
6686
6687 1
    if (!isset($haystack[0])) {
6688 1
      return '';
6689
    }
6690
6691 1
    if (!isset($needle[0])) {
6692 1
      return $haystack;
6693
    }
6694
6695 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6696 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6697 1
      if ($haystackTmp === false) {
6698
        $haystackTmp = '';
6699
      }
6700 1
      $haystack = (string)$haystackTmp;
6701 1
    }
6702
6703 1
    return $haystack;
6704
  }
6705
6706
  /**
6707
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6708
   *
6709
   * @param string $haystack <p>The string to search in.</p>
6710
   * @param string $needle   <p>The substring to search for.</p>
6711
   *
6712
   * @return string <p>Return the sub-string.</p>
6713
   */
6714 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6715
  {
6716
    // init
6717 1
    $haystack = (string)$haystack;
6718 1
    $needle = (string)$needle;
6719
6720 1
    if (!isset($haystack[0])) {
6721 1
      return '';
6722
    }
6723
6724 1
    if (!isset($needle[0])) {
6725 1
      return $haystack;
6726
    }
6727
6728 1
    if (self::str_iends_with($haystack, $needle) === true) {
6729 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6730 1
      if ($haystackTmp === false) {
6731
        $haystackTmp = '';
6732
      }
6733 1
      $haystack = (string)$haystackTmp;
6734 1
    }
6735
6736 1
    return $haystack;
6737
  }
6738
6739
  /**
6740
   * Removes an prefix ($needle) from start of the string ($haystack).
6741
   *
6742
   * @param string $haystack <p>The string to search in.</p>
6743
   * @param string $needle   <p>The substring to search for.</p>
6744
   *
6745
   * @return string <p>Return the sub-string.</p>
6746
   */
6747 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6748
  {
6749
    // init
6750 1
    $haystack = (string)$haystack;
6751 1
    $needle = (string)$needle;
6752
6753 1
    if (!isset($haystack[0])) {
6754 1
      return '';
6755
    }
6756
6757 1
    if (!isset($needle[0])) {
6758 1
      return $haystack;
6759
    }
6760
6761 1
    if (self::str_starts_with($haystack, $needle) === true) {
6762 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6763 1
      if ($haystackTmp === false) {
6764
        $haystackTmp = '';
6765
      }
6766 1
      $haystack = (string)$haystackTmp;
6767 1
    }
6768
6769 1
    return $haystack;
6770
  }
6771
6772
  /**
6773
   * Replace text within a portion of a string.
6774
   *
6775
   * source: https://gist.github.com/stemar/8287074
6776
   *
6777
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6778
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6779
   * @param int|int[]       $offset           <p>
6780
   *                                          If start is positive, the replacing will begin at the start'th offset
6781
   *                                          into string.
6782
   *                                          <br><br>
6783
   *                                          If start is negative, the replacing will begin at the start'th character
6784
   *                                          from the end of string.
6785
   *                                          </p>
6786
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6787
   *                                          portion of string which is to be replaced. If it is negative, it
6788
   *                                          represents the number of characters from the end of string at which to
6789
   *                                          stop replacing. If it is not given, then it will default to strlen(
6790
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6791
   *                                          length is zero then this function will have the effect of inserting
6792
   *                                          replacement into string at the given start offset.</p>
6793
   *
6794
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6795
   */
6796 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6797
  {
6798 7
    if (is_array($str) === true) {
6799 1
      $num = count($str);
6800
6801
      // the replacement
6802 1
      if (is_array($replacement) === true) {
6803 1
        $replacement = array_slice($replacement, 0, $num);
6804 1
      } else {
6805 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6806
      }
6807
6808
      // the offset
6809 1 View Code Duplication
      if (is_array($offset) === true) {
6810 1
        $offset = array_slice($offset, 0, $num);
6811 1
        foreach ($offset as &$valueTmp) {
6812 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6813 1
        }
6814 1
        unset($valueTmp);
6815 1
      } else {
6816 1
        $offset = array_pad(array($offset), $num, $offset);
6817
      }
6818
6819
      // the length
6820 1
      if (!isset($length)) {
6821 1
        $length = array_fill(0, $num, 0);
6822 1 View Code Duplication
      } elseif (is_array($length) === true) {
6823 1
        $length = array_slice($length, 0, $num);
6824 1
        foreach ($length as &$valueTmpV2) {
6825 1
          if (isset($valueTmpV2)) {
6826 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6827 1
          } else {
6828
            $valueTmpV2 = 0;
6829
          }
6830 1
        }
6831 1
        unset($valueTmpV2);
6832 1
      } else {
6833 1
        $length = array_pad(array($length), $num, $length);
6834
      }
6835
6836
      // recursive call
6837 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6838
    }
6839
6840 7
    if (is_array($replacement) === true) {
6841 1
      if (count($replacement) > 0) {
6842 1
        $replacement = $replacement[0];
6843 1
      } else {
6844 1
        $replacement = '';
6845
      }
6846 1
    }
6847
6848
    // init
6849 7
    $str = (string)$str;
6850 7
    $replacement = (string)$replacement;
6851
6852 7
    if (!isset($str[0])) {
6853 1
      return $replacement;
6854
    }
6855
6856 6
    if (self::is_ascii($str)) {
6857 3
      return ($length === null) ?
6858 3
          substr_replace($str, $replacement, $offset) :
6859 3
          substr_replace($str, $replacement, $offset, $length);
6860
    }
6861
6862 5
    preg_match_all('/./us', $str, $smatches);
6863 5
    preg_match_all('/./us', $replacement, $rmatches);
6864
6865 5
    if ($length === null) {
6866 3
      $length = (int)self::strlen($str);
6867 3
    }
6868
6869 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6870
6871 5
    return implode('', $smatches[0]);
6872
  }
6873
6874
  /**
6875
   * Removes an suffix ($needle) from end of the string ($haystack).
6876
   *
6877
   * @param string $haystack <p>The string to search in.</p>
6878
   * @param string $needle   <p>The substring to search for.</p>
6879
   *
6880
   * @return string <p>Return the sub-string.</p>
6881
   */
6882 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6883
  {
6884 1
    $haystack = (string)$haystack;
6885 1
    $needle = (string)$needle;
6886
6887 1
    if (!isset($haystack[0])) {
6888 1
      return '';
6889
    }
6890
6891 1
    if (!isset($needle[0])) {
6892 1
      return $haystack;
6893
    }
6894
6895 1
    if (self::str_ends_with($haystack, $needle) === true) {
6896 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6897 1
      if ($haystackTmp === false) {
6898
        $haystackTmp = '';
6899
      }
6900 1
      $haystack = (string)$haystackTmp;
6901 1
    }
6902
6903 1
    return $haystack;
6904
  }
6905
6906
  /**
6907
   * Returns a case swapped version of the string.
6908
   *
6909
   * @param string  $str       <p>The input string.</p>
6910
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6911
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6912
   *
6913
   * @return string <p>Each character's case swapped.</p>
6914
   */
6915 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6916
  {
6917 1
    $str = (string)$str;
6918
6919 1
    if (!isset($str[0])) {
6920 1
      return '';
6921
    }
6922
6923 1
    if ($encoding !== 'UTF-8') {
6924 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6925 1
    }
6926
6927 1
    if ($cleanUtf8 === true) {
6928
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6929
      // if invalid characters are found in $haystack before $needle
6930 1
      $str = self::clean($str);
6931 1
    }
6932
6933 1
    $strSwappedCase = preg_replace_callback(
6934 1
        '/[\S]/u',
6935
        function ($match) use ($encoding) {
6936 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6937
6938 1
          if ($match[0] === $marchToUpper) {
6939 1
            return UTF8::strtolower($match[0], $encoding);
6940
          }
6941
6942 1
          return $marchToUpper;
6943 1
        },
6944
        $str
6945 1
    );
6946
6947 1
    return $strSwappedCase;
6948
  }
6949
6950
  /**
6951
   * alias for "UTF8::to_ascii()"
6952
   *
6953
   * @see UTF8::to_ascii()
6954
   *
6955
   * @param string $s
6956
   * @param string $subst_chr
6957
   * @param bool   $strict
6958
   *
6959
   * @return string
6960
   *
6961
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6962
   */
6963
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6964
  {
6965
    return self::to_ascii($s, $subst_chr, $strict);
6966
  }
6967
6968
  /**
6969
   * alias for "UTF8::to_iso8859()"
6970
   *
6971
   * @see UTF8::to_iso8859()
6972
   *
6973
   * @param string $str
6974
   *
6975
   * @return string|string[]
6976
   *
6977
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6978
   */
6979
  public static function toIso8859($str)
6980
  {
6981
    return self::to_iso8859($str);
6982
  }
6983
6984
  /**
6985
   * alias for "UTF8::to_latin1()"
6986
   *
6987
   * @see UTF8::to_latin1()
6988
   *
6989
   * @param $str
6990
   *
6991
   * @return string
6992
   *
6993
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6994
   */
6995
  public static function toLatin1($str)
6996
  {
6997
    return self::to_latin1($str);
6998
  }
6999
7000
  /**
7001
   * alias for "UTF8::to_utf8()"
7002
   *
7003
   * @see UTF8::to_utf8()
7004
   *
7005
   * @param string $str
7006
   *
7007
   * @return string
7008
   *
7009
   * @deprecated <p>use "UTF8::to_utf8()"</p>
7010
   */
7011
  public static function toUTF8($str)
7012
  {
7013
    return self::to_utf8($str);
7014
  }
7015
7016
  /**
7017
   * Convert a string into ASCII.
7018
   *
7019
   * @param string $str     <p>The input string.</p>
7020
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7021
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7022
   *                        performance</p>
7023
   *
7024
   * @return string
7025
   */
7026 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
7027
  {
7028 21
    static $UTF8_TO_ASCII;
7029
7030
    // init
7031 21
    $str = (string)$str;
7032
7033 21
    if (!isset($str[0])) {
7034 4
      return '';
7035
    }
7036
7037
    // check if we only have ASCII, first (better performance)
7038 18
    if (self::is_ascii($str) === true) {
7039 6
      return $str;
7040
    }
7041
7042 13
    $str = self::clean($str, true, true, true);
7043
7044
    // check again, if we only have ASCII, now ...
7045 13
    if (self::is_ascii($str) === true) {
7046 7
      return $str;
7047
    }
7048
7049 7
    if ($strict === true) {
7050 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7051
        self::checkForSupport();
7052
      }
7053
7054
      if (
7055 1
          self::$SUPPORT['intl'] === true
7056 1
          &&
7057 1
          Bootup::is_php('5.4') === true
7058 1
      ) {
7059
7060
        // HACK for issue from "transliterator_transliterate()"
7061 1
        $str = str_replace(
7062 1
            'ℌ',
7063 1
            'H',
7064
            $str
7065 1
        );
7066
7067 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7068
7069
        // check again, if we only have ASCII, now ...
7070 1
        if (self::is_ascii($str) === true) {
7071 1
          return $str;
7072
        }
7073
7074 1
      }
7075 1
    }
7076
7077 7
    if (self::$ORD === null) {
7078
      self::$ORD = self::getData('ord');
7079
    }
7080
7081 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7082 7
    $chars = $ar[0];
7083 7
    foreach ($chars as &$c) {
7084
7085 7
      $ordC0 = self::$ORD[$c[0]];
7086
7087 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7088 7
        continue;
7089
      }
7090
7091 7
      $ordC1 = self::$ORD[$c[1]];
7092
7093
      // ASCII - next please
7094 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7095 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7096 7
      }
7097
7098 7
      if ($ordC0 >= 224) {
7099 2
        $ordC2 = self::$ORD[$c[2]];
7100
7101 2
        if ($ordC0 <= 239) {
7102 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7103 2
        }
7104
7105 2
        if ($ordC0 >= 240) {
7106 1
          $ordC3 = self::$ORD[$c[3]];
7107
7108 1
          if ($ordC0 <= 247) {
7109 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7110 1
          }
7111
7112 1
          if ($ordC0 >= 248) {
7113
            $ordC4 = self::$ORD[$c[4]];
7114
7115 View Code Duplication
            if ($ordC0 <= 251) {
7116
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7117
            }
7118
7119
            if ($ordC0 >= 252) {
7120
              $ordC5 = self::$ORD[$c[5]];
7121
7122 View Code Duplication
              if ($ordC0 <= 253) {
7123
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7124
              }
7125
            }
7126
          }
7127 1
        }
7128 2
      }
7129
7130 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7131
        $c = $unknown;
7132
        continue;
7133
      }
7134
7135 7
      if (!isset($ord)) {
7136
        $c = $unknown;
7137
        continue;
7138
      }
7139
7140 7
      $bank = $ord >> 8;
7141 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7142 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7143 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7144 1
          $UTF8_TO_ASCII[$bank] = array();
7145 1
        }
7146 3
      }
7147
7148 7
      $newchar = $ord & 255;
7149
7150 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7151
7152
        // keep for debugging
7153
        /*
7154
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7155
        echo "char: " . $c . "\n";
7156
        echo "ord: " . $ord . "\n";
7157
        echo "newchar: " . $newchar . "\n";
7158
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7159
        echo "bank:" . $bank . "\n\n";
7160
        */
7161
7162 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7163 7
      } else {
7164
7165
        // keep for debugging missing chars
7166
        /*
7167
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7168
        echo "char: " . $c . "\n";
7169
        echo "ord: " . $ord . "\n";
7170
        echo "newchar: " . $newchar . "\n";
7171
        echo "bank:" . $bank . "\n\n";
7172
        */
7173
7174 1
        $c = $unknown;
7175
      }
7176 7
    }
7177
7178 7
    return implode('', $chars);
7179
  }
7180
7181
  /**
7182
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7183
   *
7184
   * @param string|string[] $str
7185
   *
7186
   * @return string|string[]
7187
   */
7188 3
  public static function to_iso8859($str)
7189
  {
7190 3
    if (is_array($str) === true) {
7191
7192
      /** @noinspection ForeachSourceInspection */
7193 1
      foreach ($str as $k => $v) {
7194
        /** @noinspection AlterInForeachInspection */
7195
        /** @noinspection OffsetOperationsInspection */
7196 1
        $str[$k] = self::to_iso8859($v);
7197 1
      }
7198
7199 1
      return $str;
7200
    }
7201
7202 3
    $str = (string)$str;
7203
7204 3
    if (!isset($str[0])) {
7205 1
      return '';
7206
    }
7207
7208 3
    return self::utf8_decode($str);
7209
  }
7210
7211
  /**
7212
   * alias for "UTF8::to_iso8859()"
7213
   *
7214
   * @see UTF8::to_iso8859()
7215
   *
7216
   * @param string|string[] $str
7217
   *
7218
   * @return string|string[]
7219
   */
7220 1
  public static function to_latin1($str)
7221
  {
7222 1
    return self::to_iso8859($str);
7223
  }
7224
7225
  /**
7226
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7227
   *
7228
   * <ul>
7229
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7230
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7231
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7232
   * case.</li>
7233
   * </ul>
7234
   *
7235
   * @param string|string[] $str                    <p>Any string or array.</p>
7236
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7237
   *
7238
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7239
   */
7240 21
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7241
  {
7242 21
    if (is_array($str) === true) {
7243
      /** @noinspection ForeachSourceInspection */
7244 2
      foreach ($str as $k => $v) {
7245
        /** @noinspection AlterInForeachInspection */
7246
        /** @noinspection OffsetOperationsInspection */
7247 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7248 2
      }
7249
7250 2
      return $str;
7251
    }
7252
7253 21
    $str = (string)$str;
7254
7255 21
    if (!isset($str[0])) {
7256 3
      return $str;
7257
    }
7258
7259 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7260
      self::checkForSupport();
7261
    }
7262
7263 21 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7264
      $max = \mb_strlen($str, '8BIT');
7265
    } else {
7266 21
      $max = strlen($str);
7267
    }
7268
7269 21
    $buf = '';
7270
7271
    /** @noinspection ForeachInvariantsInspection */
7272 21
    for ($i = 0; $i < $max; $i++) {
7273 21
      $c1 = $str[$i];
7274
7275 21
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7276
7277 21
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7278
7279 19
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7280
7281 19
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7282 17
            $buf .= $c1 . $c2;
7283 17
            $i++;
7284 17
          } else { // not valid UTF8 - convert it
7285 7
            $buf .= self::to_utf8_convert($c1);
7286
          }
7287
7288 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7289
7290 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7291 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7292
7293 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7294 15
            $buf .= $c1 . $c2 . $c3;
7295 15
            $i += 2;
7296 15
          } else { // not valid UTF8 - convert it
7297 10
            $buf .= self::to_utf8_convert($c1);
7298
          }
7299
7300 21
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7301
7302 11
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7303 11
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7304 11
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7305
7306 11
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7307 4
            $buf .= $c1 . $c2 . $c3 . $c4;
7308 4
            $i += 3;
7309 4
          } else { // not valid UTF8 - convert it
7310 8
            $buf .= self::to_utf8_convert($c1);
7311
          }
7312
7313 11
        } else { // doesn't look like UTF8, but should be converted
7314 7
          $buf .= self::to_utf8_convert($c1);
7315
        }
7316
7317 21
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7318
7319 4
        $buf .= self::to_utf8_convert($c1);
7320
7321 4
      } else { // it doesn't need conversion
7322 19
        $buf .= $c1;
7323
      }
7324 21
    }
7325
7326
    // decode unicode escape sequences
7327 21
    $buf = preg_replace_callback(
7328 21
        '/\\\\u([0-9a-f]{4})/i',
7329 21
        function ($match) {
7330 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7331 21
        },
7332
        $buf
7333 21
    );
7334
7335
    // decode UTF-8 codepoints
7336 21
    if ($decodeHtmlEntityToUtf8 === true) {
7337 1
      $buf = self::html_entity_decode($buf);
7338 1
    }
7339
7340 21
    return $buf;
7341
  }
7342
7343
  /**
7344
   * @param int $int
7345
   *
7346
   * @return string
7347
   */
7348 15
  private static function to_utf8_convert($int)
7349
  {
7350
    // init
7351 15
    $buf = '';
7352
7353 15
    if (self::$ORD === null) {
7354
      self::$ORD = self::getData('ord');
7355
    }
7356
7357 15
    if (self::$CHR === null) {
7358 1
      self::$CHR = self::getData('chr');
7359 1
    }
7360
7361 15
    $ordC1 = self::$ORD[$int];
7362 15
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7363 1
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7364 1
    } else {
7365 15
      $cc1 = self::$CHR[$ordC1 / 64] | "\xC0";
7366 15
      $cc2 = ($int & "\x3F") | "\x80";
7367 15
      $buf .= $cc1 . $cc2;
7368
    }
7369
7370 15
    return $buf;
7371
  }
7372
7373
  /**
7374
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7375
   *
7376
   * INFO: This is slower then "trim()"
7377
   *
7378
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7379
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7380
   *
7381
   * @param string $str   <p>The string to be trimmed</p>
7382
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7383
   *
7384
   * @return string <p>The trimmed string.</p>
7385
   */
7386 26
  public static function trim($str = '', $chars = INF)
7387
  {
7388 26
    $str = (string)$str;
7389
7390 26
    if (!isset($str[0])) {
7391 5
      return '';
7392
    }
7393
7394
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7395 22
    if ($chars === INF || !$chars) {
7396 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7397
    }
7398
7399 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7400
  }
7401
7402
  /**
7403
   * Makes string's first char uppercase.
7404
   *
7405
   * @param string  $str       <p>The input string.</p>
7406
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7407
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7408
   *
7409
   * @return string <p>The resulting string</p>
7410
   */
7411 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7412
  {
7413 14
    if ($cleanUtf8 === true) {
7414
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7415
      // if invalid characters are found in $haystack before $needle
7416 1
      $str = self::clean($str);
7417 1
    }
7418
7419 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7420 14
    if ($strPartTwo === false) {
7421
      $strPartTwo = '';
7422
    }
7423
7424 14
    $strPartOne = self::strtoupper(
7425 14
        (string)self::substr($str, 0, 1, $encoding),
7426 14
        $encoding,
7427
        $cleanUtf8
7428 14
    );
7429
7430 14
    return $strPartOne . $strPartTwo;
7431
  }
7432
7433
  /**
7434
   * alias for "UTF8::ucfirst()"
7435
   *
7436
   * @see UTF8::ucfirst()
7437
   *
7438
   * @param string  $word
7439
   * @param string  $encoding
7440
   * @param boolean $cleanUtf8
7441
   *
7442
   * @return string
7443
   */
7444 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7445
  {
7446 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7447
  }
7448
7449
  /**
7450
   * Uppercase for all words in the string.
7451
   *
7452
   * @param string   $str        <p>The input string.</p>
7453
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7454
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7455
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7456
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7457
   *
7458
   * @return string
7459
   */
7460 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7461
  {
7462 8
    if (!$str) {
7463 2
      return '';
7464
    }
7465
7466
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7467
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7468
7469 7
    if ($cleanUtf8 === true) {
7470
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7471
      // if invalid characters are found in $haystack before $needle
7472 1
      $str = self::clean($str);
7473 1
    }
7474
7475 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7476
7477
    if (
7478
        $usePhpDefaultFunctions === true
7479 7
        &&
7480 7
        self::is_ascii($str) === true
7481 7
    ) {
7482
      return ucwords($str);
7483
    }
7484
7485 7
    $words = self::str_to_words($str, $charlist);
7486 7
    $newWords = array();
7487
7488 7
    if (count($exceptions) > 0) {
7489 1
      $useExceptions = true;
7490 1
    } else {
7491 7
      $useExceptions = false;
7492
    }
7493
7494 7 View Code Duplication
    foreach ($words as $word) {
7495
7496 7
      if (!$word) {
7497 7
        continue;
7498
      }
7499
7500
      if (
7501
          $useExceptions === false
7502 7
          ||
7503
          (
7504
              $useExceptions === true
7505 1
              &&
7506 1
              !in_array($word, $exceptions, true)
7507 1
          )
7508 7
      ) {
7509 7
        $word = self::ucfirst($word, $encoding);
7510 7
      }
7511
7512 7
      $newWords[] = $word;
7513 7
    }
7514
7515 7
    return implode('', $newWords);
7516
  }
7517
7518
  /**
7519
   * Multi decode html entity & fix urlencoded-win1252-chars.
7520
   *
7521
   * e.g:
7522
   * 'test+test'                     => 'test test'
7523
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7524
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7525
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7526
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7527
   * 'Düsseldorf'                   => 'Düsseldorf'
7528
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7529
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7530
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7531
   *
7532
   * @param string $str          <p>The input string.</p>
7533
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7534
   *
7535
   * @return string
7536
   */
7537 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7538
  {
7539 1
    $str = (string)$str;
7540
7541 1
    if (!isset($str[0])) {
7542 1
      return '';
7543
    }
7544
7545 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7546 1
    if (preg_match($pattern, $str)) {
7547 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7548 1
    }
7549
7550 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7551
7552
    do {
7553 1
      $str_compare = $str;
7554
7555 1
      $str = self::fix_simple_utf8(
7556 1
          urldecode(
7557 1
              self::html_entity_decode(
7558 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7559
                  $flags
7560 1
              )
7561 1
          )
7562 1
      );
7563
7564 1
    } while ($multi_decode === true && $str_compare !== $str);
7565
7566 1
    return (string)$str;
7567
  }
7568
7569
  /**
7570
   * Return a array with "urlencoded"-win1252 -> UTF-8
7571
   *
7572
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7573
   *
7574
   * @return array
7575
   */
7576
  public static function urldecode_fix_win1252_chars()
7577
  {
7578
    return array(
7579
        '%20' => ' ',
7580
        '%21' => '!',
7581
        '%22' => '"',
7582
        '%23' => '#',
7583
        '%24' => '$',
7584
        '%25' => '%',
7585
        '%26' => '&',
7586
        '%27' => "'",
7587
        '%28' => '(',
7588
        '%29' => ')',
7589
        '%2A' => '*',
7590
        '%2B' => '+',
7591
        '%2C' => ',',
7592
        '%2D' => '-',
7593
        '%2E' => '.',
7594
        '%2F' => '/',
7595
        '%30' => '0',
7596
        '%31' => '1',
7597
        '%32' => '2',
7598
        '%33' => '3',
7599
        '%34' => '4',
7600
        '%35' => '5',
7601
        '%36' => '6',
7602
        '%37' => '7',
7603
        '%38' => '8',
7604
        '%39' => '9',
7605
        '%3A' => ':',
7606
        '%3B' => ';',
7607
        '%3C' => '<',
7608
        '%3D' => '=',
7609
        '%3E' => '>',
7610
        '%3F' => '?',
7611
        '%40' => '@',
7612
        '%41' => 'A',
7613
        '%42' => 'B',
7614
        '%43' => 'C',
7615
        '%44' => 'D',
7616
        '%45' => 'E',
7617
        '%46' => 'F',
7618
        '%47' => 'G',
7619
        '%48' => 'H',
7620
        '%49' => 'I',
7621
        '%4A' => 'J',
7622
        '%4B' => 'K',
7623
        '%4C' => 'L',
7624
        '%4D' => 'M',
7625
        '%4E' => 'N',
7626
        '%4F' => 'O',
7627
        '%50' => 'P',
7628
        '%51' => 'Q',
7629
        '%52' => 'R',
7630
        '%53' => 'S',
7631
        '%54' => 'T',
7632
        '%55' => 'U',
7633
        '%56' => 'V',
7634
        '%57' => 'W',
7635
        '%58' => 'X',
7636
        '%59' => 'Y',
7637
        '%5A' => 'Z',
7638
        '%5B' => '[',
7639
        '%5C' => '\\',
7640
        '%5D' => ']',
7641
        '%5E' => '^',
7642
        '%5F' => '_',
7643
        '%60' => '`',
7644
        '%61' => 'a',
7645
        '%62' => 'b',
7646
        '%63' => 'c',
7647
        '%64' => 'd',
7648
        '%65' => 'e',
7649
        '%66' => 'f',
7650
        '%67' => 'g',
7651
        '%68' => 'h',
7652
        '%69' => 'i',
7653
        '%6A' => 'j',
7654
        '%6B' => 'k',
7655
        '%6C' => 'l',
7656
        '%6D' => 'm',
7657
        '%6E' => 'n',
7658
        '%6F' => 'o',
7659
        '%70' => 'p',
7660
        '%71' => 'q',
7661
        '%72' => 'r',
7662
        '%73' => 's',
7663
        '%74' => 't',
7664
        '%75' => 'u',
7665
        '%76' => 'v',
7666
        '%77' => 'w',
7667
        '%78' => 'x',
7668
        '%79' => 'y',
7669
        '%7A' => 'z',
7670
        '%7B' => '{',
7671
        '%7C' => '|',
7672
        '%7D' => '}',
7673
        '%7E' => '~',
7674
        '%7F' => '',
7675
        '%80' => '`',
7676
        '%81' => '',
7677
        '%82' => '‚',
7678
        '%83' => 'ƒ',
7679
        '%84' => '„',
7680
        '%85' => '…',
7681
        '%86' => '†',
7682
        '%87' => '‡',
7683
        '%88' => 'ˆ',
7684
        '%89' => '‰',
7685
        '%8A' => 'Š',
7686
        '%8B' => '‹',
7687
        '%8C' => 'Œ',
7688
        '%8D' => '',
7689
        '%8E' => 'Ž',
7690
        '%8F' => '',
7691
        '%90' => '',
7692
        '%91' => '‘',
7693
        '%92' => '’',
7694
        '%93' => '“',
7695
        '%94' => '”',
7696
        '%95' => '•',
7697
        '%96' => '–',
7698
        '%97' => '—',
7699
        '%98' => '˜',
7700
        '%99' => '™',
7701
        '%9A' => 'š',
7702
        '%9B' => '›',
7703
        '%9C' => 'œ',
7704
        '%9D' => '',
7705
        '%9E' => 'ž',
7706
        '%9F' => 'Ÿ',
7707
        '%A0' => '',
7708
        '%A1' => '¡',
7709
        '%A2' => '¢',
7710
        '%A3' => '£',
7711
        '%A4' => '¤',
7712
        '%A5' => '¥',
7713
        '%A6' => '¦',
7714
        '%A7' => '§',
7715
        '%A8' => '¨',
7716
        '%A9' => '©',
7717
        '%AA' => 'ª',
7718
        '%AB' => '«',
7719
        '%AC' => '¬',
7720
        '%AD' => '',
7721
        '%AE' => '®',
7722
        '%AF' => '¯',
7723
        '%B0' => '°',
7724
        '%B1' => '±',
7725
        '%B2' => '²',
7726
        '%B3' => '³',
7727
        '%B4' => '´',
7728
        '%B5' => 'µ',
7729
        '%B6' => '¶',
7730
        '%B7' => '·',
7731
        '%B8' => '¸',
7732
        '%B9' => '¹',
7733
        '%BA' => 'º',
7734
        '%BB' => '»',
7735
        '%BC' => '¼',
7736
        '%BD' => '½',
7737
        '%BE' => '¾',
7738
        '%BF' => '¿',
7739
        '%C0' => 'À',
7740
        '%C1' => 'Á',
7741
        '%C2' => 'Â',
7742
        '%C3' => 'Ã',
7743
        '%C4' => 'Ä',
7744
        '%C5' => 'Å',
7745
        '%C6' => 'Æ',
7746
        '%C7' => 'Ç',
7747
        '%C8' => 'È',
7748
        '%C9' => 'É',
7749
        '%CA' => 'Ê',
7750
        '%CB' => 'Ë',
7751
        '%CC' => 'Ì',
7752
        '%CD' => 'Í',
7753
        '%CE' => 'Î',
7754
        '%CF' => 'Ï',
7755
        '%D0' => 'Ð',
7756
        '%D1' => 'Ñ',
7757
        '%D2' => 'Ò',
7758
        '%D3' => 'Ó',
7759
        '%D4' => 'Ô',
7760
        '%D5' => 'Õ',
7761
        '%D6' => 'Ö',
7762
        '%D7' => '×',
7763
        '%D8' => 'Ø',
7764
        '%D9' => 'Ù',
7765
        '%DA' => 'Ú',
7766
        '%DB' => 'Û',
7767
        '%DC' => 'Ü',
7768
        '%DD' => 'Ý',
7769
        '%DE' => 'Þ',
7770
        '%DF' => 'ß',
7771
        '%E0' => 'à',
7772
        '%E1' => 'á',
7773
        '%E2' => 'â',
7774
        '%E3' => 'ã',
7775
        '%E4' => 'ä',
7776
        '%E5' => 'å',
7777
        '%E6' => 'æ',
7778
        '%E7' => 'ç',
7779
        '%E8' => 'è',
7780
        '%E9' => 'é',
7781
        '%EA' => 'ê',
7782
        '%EB' => 'ë',
7783
        '%EC' => 'ì',
7784
        '%ED' => 'í',
7785
        '%EE' => 'î',
7786
        '%EF' => 'ï',
7787
        '%F0' => 'ð',
7788
        '%F1' => 'ñ',
7789
        '%F2' => 'ò',
7790
        '%F3' => 'ó',
7791
        '%F4' => 'ô',
7792
        '%F5' => 'õ',
7793
        '%F6' => 'ö',
7794
        '%F7' => '÷',
7795
        '%F8' => 'ø',
7796
        '%F9' => 'ù',
7797
        '%FA' => 'ú',
7798
        '%FB' => 'û',
7799
        '%FC' => 'ü',
7800
        '%FD' => 'ý',
7801
        '%FE' => 'þ',
7802
        '%FF' => 'ÿ',
7803
    );
7804
  }
7805
7806
  /**
7807
   * Decodes an UTF-8 string to ISO-8859-1.
7808
   *
7809
   * @param string $str <p>The input string.</p>
7810
   * @param bool   $keepUtf8Chars
7811
   *
7812
   * @return string
7813
   */
7814 6
  public static function utf8_decode($str, $keepUtf8Chars = false)
7815
  {
7816
    // init
7817 6
    $str = (string)$str;
7818
7819 6
    if (!isset($str[0])) {
7820 3
      return '';
7821
    }
7822
7823 6
    $str = (string)self::to_utf8($str);
7824
7825 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7826 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7827
7828 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7829 1
      $UTF8_TO_WIN1252_KEYS_CACHE = \array_keys(self::$UTF8_TO_WIN1252);
7830 1
      $UTF8_TO_WIN1252_VALUES_CACHE = \array_values(self::$UTF8_TO_WIN1252);
7831 1
    }
7832
7833
    /** @noinspection PhpInternalEntityUsedInspection */
7834 6
    $str = \str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7835
7836 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7837
      self::checkForSupport();
7838
    }
7839
7840
    // save for later comparision
7841 6
    $str_backup = $str;
7842
7843 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7844
      $len = \mb_strlen($str, '8BIT');
7845
    } else {
7846 6
      $len = \strlen($str);
7847
    }
7848
7849 6
    if (self::$ORD === null) {
7850
      self::$ORD = self::getData('ord');
7851
    }
7852
7853 6
    if (self::$CHR === null) {
7854
      self::$CHR = self::getData('chr');
7855
    }
7856
7857 6
    $noCharFound = '?';
7858
    /** @noinspection ForeachInvariantsInspection */
7859 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7860 6
      switch ($str[$i] & "\xF0") {
7861 6
        case "\xC0":
7862 6
        case "\xD0":
7863 6
          $c = (self::$ORD[$str[$i] & "\x1F"] << 6) | self::$ORD[$str[++$i] & "\x3F"];
7864 6
          $str[$j] = $c < 256 ? self::$CHR[$c] : $noCharFound;
7865 6
          break;
7866
7867
        /** @noinspection PhpMissingBreakStatementInspection */
7868 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7869
          ++$i;
7870 6
        case "\xE0":
7871 5
          $str[$j] = $noCharFound;
7872 5
          $i += 2;
7873 5
          break;
7874
7875 6
        default:
7876 6
          $str[$j] = $str[$i];
7877 6
      }
7878 6
    }
7879
7880 6
    $return = (string)self::substr($str, 0, $j, '8BIT');
7881
7882
    if (
7883
        $keepUtf8Chars === true
7884 6
        &&
7885 1
        self::strlen($return) >= self::strlen($str_backup)
7886 6
    ) {
7887 1
      return $str_backup;
7888
    }
7889
7890 6
    return $return;
7891
  }
7892
7893
  /**
7894
   * Encodes an ISO-8859-1 string to UTF-8.
7895
   *
7896
   * @param string $str <p>The input string.</p>
7897
   *
7898
   * @return string
7899
   */
7900 7
  public static function utf8_encode($str)
7901
  {
7902
    // init
7903 7
    $str = (string)$str;
7904
7905 7
    if (!isset($str[0])) {
7906 7
      return '';
7907
    }
7908
7909 7
    $strTmp = \utf8_encode($str);
7910
7911
    // the polyfill maybe return false
7912 7
    if ($strTmp === false) {
7913
      return '';
7914
    }
7915
7916 7
    $str = (string)$strTmp;
7917 7
    if (false === strpos($str, "\xC2")) {
7918 3
      return $str;
7919
    }
7920
7921 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7922 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7923
7924 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7925 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7926 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7927 1
    }
7928
7929 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7930
  }
7931
7932
  /**
7933
   * fix -> utf8-win1252 chars
7934
   *
7935
   * @param string $str <p>The input string.</p>
7936
   *
7937
   * @return string
7938
   *
7939
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7940
   */
7941
  public static function utf8_fix_win1252_chars($str)
7942
  {
7943
    return self::fix_simple_utf8($str);
7944
  }
7945
7946
  /**
7947
   * Returns an array with all utf8 whitespace characters.
7948
   *
7949
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7950
   *
7951
   * @author: Derek E. [email protected]
7952
   *
7953
   * @return array <p>
7954
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7955
   *               as defined in above URL.
7956
   *               </p>
7957
   */
7958 1
  public static function whitespace_table()
7959
  {
7960 1
    return self::$WHITESPACE_TABLE;
7961
  }
7962
7963
  /**
7964
   * Limit the number of words in a string.
7965
   *
7966
   * @param string $str      <p>The input string.</p>
7967
   * @param int    $limit    <p>The limit of words as integer.</p>
7968
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7969
   *
7970
   * @return string
7971
   */
7972 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7973
  {
7974 1
    $str = (string)$str;
7975
7976 1
    if (!isset($str[0])) {
7977 1
      return '';
7978
    }
7979
7980
    // init
7981 1
    $limit = (int)$limit;
7982
7983 1
    if ($limit < 1) {
7984 1
      return '';
7985
    }
7986
7987 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7988
7989
    if (
7990 1
        !isset($matches[0])
7991 1
        ||
7992 1
        self::strlen($str) === self::strlen($matches[0])
7993 1
    ) {
7994 1
      return $str;
7995
    }
7996
7997 1
    return self::rtrim($matches[0]) . $strAddOn;
7998
  }
7999
8000
  /**
8001
   * Wraps a string to a given number of characters
8002
   *
8003
   * @link  http://php.net/manual/en/function.wordwrap.php
8004
   *
8005
   * @param string $str   <p>The input string.</p>
8006
   * @param int    $width [optional] <p>The column width.</p>
8007
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
8008
   * @param bool   $cut   [optional] <p>
8009
   *                      If the cut is set to true, the string is
8010
   *                      always wrapped at or before the specified width. So if you have
8011
   *                      a word that is larger than the given width, it is broken apart.
8012
   *                      </p>
8013
   *
8014
   * @return string <p>The given string wrapped at the specified column.</p>
8015
   */
8016 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
8017
  {
8018 10
    $str = (string)$str;
8019 10
    $break = (string)$break;
8020
8021 10
    if (!isset($str[0], $break[0])) {
8022 3
      return '';
8023
    }
8024
8025 8
    $w = '';
8026 8
    $strSplit = explode($break, $str);
8027 8
    $count = count($strSplit);
8028
8029 8
    $chars = array();
8030
    /** @noinspection ForeachInvariantsInspection */
8031 8
    for ($i = 0; $i < $count; ++$i) {
8032
8033 8
      if ($i) {
8034 1
        $chars[] = $break;
8035 1
        $w .= '#';
8036 1
      }
8037
8038 8
      $c = $strSplit[$i];
8039 8
      unset($strSplit[$i]);
8040
8041 8
      foreach (self::split($c) as $c) {
8042 8
        $chars[] = $c;
8043 8
        $w .= ' ' === $c ? ' ' : '?';
8044 8
      }
8045 8
    }
8046
8047 8
    $strReturn = '';
8048 8
    $j = 0;
8049 8
    $b = $i = -1;
8050 8
    $w = wordwrap($w, $width, '#', $cut);
8051
8052 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8053 6
      for (++$i; $i < $b; ++$i) {
8054 6
        $strReturn .= $chars[$j];
8055 6
        unset($chars[$j++]);
8056 6
      }
8057
8058 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8059 3
        unset($chars[$j++]);
8060 3
      }
8061
8062 6
      $strReturn .= $break;
8063 6
    }
8064
8065 8
    return $strReturn . implode('', $chars);
8066
  }
8067
8068
  /**
8069
   * Returns an array of Unicode White Space characters.
8070
   *
8071
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
8072
   */
8073 1
  public static function ws()
8074
  {
8075 1
    return self::$WHITESPACE;
8076
  }
8077
8078
}
8079