Completed
Push — master ( 410843...d7d49c )
by Lars
14:00 queued 03:01
created

UTF8::isHtml()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 0
cts 0
cp 0
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943 1
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965
    // init
966 10
    static $CHAR_CACHE = array();
967
968 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
969
      self::checkForSupport();
970
    }
971
972 10
    if ($encoding !== 'UTF-8') {
973 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
974 2
    }
975
976 View Code Duplication
    if (
977
        $encoding !== 'UTF-8'
978 10
        &&
979
        $encoding !== 'WINDOWS-1252'
980 10
        &&
981 1
        self::$SUPPORT['mbstring'] === false
982 10
    ) {
983
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
984
    }
985
986 10
    $cacheKey = $code_point . $encoding;
987 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
988 8
      return $CHAR_CACHE[$cacheKey];
989
    }
990
991 9
    if (self::$SUPPORT['intlChar'] === true) {
992
      $str = \IntlChar::chr($code_point);
993
994
      if ($encoding !== 'UTF-8') {
995
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
996
      }
997
998
      $CHAR_CACHE[$cacheKey] = $str;
999
      return $str;
1000
    }
1001
1002
    // check type of code_point, only if there is no support for "\IntlChar"
1003 9
    if ((int)$code_point !== $code_point) {
1004 1
      $CHAR_CACHE[$cacheKey] = null;
1005 1
      return null;
1006
    }
1007
1008 9
    if ($code_point <= 0x7F) {
1009 7
      $str = self::chr_and_parse_int($code_point);
1010 9
    } elseif ($code_point <= 0x7FF) {
1011 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
1012 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1013 7
    } elseif ($code_point <= 0xFFFF) {
1014 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
1015 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1016 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1017 7
    } else {
1018 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
1019 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1020 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1021 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1022
    }
1023
1024 9
    if ($encoding !== 'UTF-8') {
1025 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1026 1
    }
1027
1028
    // add into static cache
1029 9
    $CHAR_CACHE[$cacheKey] = $str;
1030
1031 9
    return $str;
1032
  }
1033
1034
  /**
1035
   * @param int $int
1036
   *
1037
   * @return string
1038
   */
1039 25
  private static function chr_and_parse_int($int)
1040
  {
1041 25
    return chr((int)$int);
1042
  }
1043
1044
  /**
1045
   * Applies callback to all characters of a string.
1046
   *
1047
   * @param string|array $callback <p>The callback function.</p>
1048
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1049
   *
1050
   * @return array <p>The outcome of callback.</p>
1051
   */
1052 1
  public static function chr_map($callback, $str)
1053
  {
1054 1
    $chars = self::split($str);
1055
1056 1
    return array_map($callback, $chars);
1057
  }
1058
1059
  /**
1060
   * Generates an array of byte length of each character of a Unicode string.
1061
   *
1062
   * 1 byte => U+0000  - U+007F
1063
   * 2 byte => U+0080  - U+07FF
1064
   * 3 byte => U+0800  - U+FFFF
1065
   * 4 byte => U+10000 - U+10FFFF
1066
   *
1067
   * @param string $str <p>The original Unicode string.</p>
1068
   *
1069
   * @return array <p>An array of byte lengths of each character.</p>
1070
   */
1071 4
  public static function chr_size_list($str)
1072
  {
1073 4
    $str = (string)$str;
1074
1075 4
    if (!isset($str[0])) {
1076 3
      return array();
1077
    }
1078
1079 4
    return array_map(
1080
        function ($data) {
1081 4
          return self::strlen($data, '8BIT');
1082 4
        },
1083 4
        self::split($str)
1084 4
    );
1085
  }
1086
1087
  /**
1088
   * Get a decimal code representation of a specific character.
1089
   *
1090
   * @param string $char <p>The input character.</p>
1091
   *
1092
   * @return int
1093
   */
1094 2
  public static function chr_to_decimal($char)
1095
  {
1096 2
    $char = (string)$char;
1097 2
    $code = self::ord($char[0]);
1098 2
    $bytes = 1;
1099
1100 2
    if (!($code & 0x80)) {
1101
      // 0xxxxxxx
1102 2
      return $code;
1103
    }
1104
1105 2
    if (($code & 0xe0) === 0xc0) {
1106
      // 110xxxxx
1107 2
      $bytes = 2;
1108 2
      $code &= ~0xc0;
1109 2
    } elseif (($code & 0xf0) === 0xe0) {
1110
      // 1110xxxx
1111 2
      $bytes = 3;
1112 2
      $code &= ~0xe0;
1113 2
    } elseif (($code & 0xf8) === 0xf0) {
1114
      // 11110xxx
1115 1
      $bytes = 4;
1116 1
      $code &= ~0xf0;
1117 1
    }
1118
1119 2
    for ($i = 2; $i <= $bytes; $i++) {
1120
      // 10xxxxxx
1121 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1122 2
    }
1123
1124 2
    return $code;
1125
  }
1126
1127
  /**
1128
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1129
   *
1130
   * @param string $char <p>The input character</p>
1131
   * @param string $pfix [optional]
1132
   *
1133
   * @return string <p>The code point encoded as U+xxxx<p>
1134
   */
1135 1
  public static function chr_to_hex($char, $pfix = 'U+')
1136
  {
1137 1
    $char = (string)$char;
1138
1139 1
    if (!isset($char[0])) {
1140 1
      return '';
1141
    }
1142
1143 1
    if ($char === '&#0;') {
1144 1
      $char = '';
1145 1
    }
1146
1147 1
    return self::int_to_hex(self::ord($char), $pfix);
1148
  }
1149
1150
  /**
1151
   * alias for "UTF8::chr_to_decimal()"
1152
   *
1153
   * @see UTF8::chr_to_decimal()
1154
   *
1155
   * @param string $chr
1156
   *
1157
   * @return int
1158
   */
1159 1
  public static function chr_to_int($chr)
1160
  {
1161 1
    return self::chr_to_decimal($chr);
1162
  }
1163
1164
  /**
1165
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1166
   *
1167
   * @param string $body     <p>The original string to be split.</p>
1168
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1169
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1170
   *
1171
   * @return string <p>The chunked string</p>
1172
   */
1173 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1174
  {
1175 1
    return implode($end, self::split($body, $chunklen));
1176
  }
1177
1178
  /**
1179
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1180
   *
1181
   * @param string $str                     <p>The string to be sanitized.</p>
1182
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1183
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1184
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1185
   *                                        => "..."</p>
1186
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1187
   *                                        $normalize_whitespace</p>
1188
   *
1189
   * @return string <p>Clean UTF-8 encoded string.</p>
1190
   */
1191 56
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1192
  {
1193
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1194
    // caused connection reset problem on larger strings
1195
1196
    $regx = '/
1197
      (
1198
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1199
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1200
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1201
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1202
        ){1,100}                      # ...one or more times
1203
      )
1204
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1205
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1206 56
    /x';
1207 56
    $str = preg_replace($regx, '$1', $str);
1208
1209 56
    $str = self::replace_diamond_question_mark($str, '');
1210 56
    $str = self::remove_invisible_characters($str);
1211
1212 56
    if ($normalize_whitespace === true) {
1213 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1214 36
    }
1215
1216 56
    if ($normalize_msword === true) {
1217 15
      $str = self::normalize_msword($str);
1218 15
    }
1219
1220 56
    if ($remove_bom === true) {
1221 35
      $str = self::remove_bom($str);
1222 35
    }
1223
1224 56
    return $str;
1225
  }
1226
1227
  /**
1228
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1229
   *
1230
   * @param string $str <p>The input string.</p>
1231
   *
1232
   * @return string
1233
   */
1234 22 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236 22
    $str = (string)$str;
1237
1238 22
    if (!isset($str[0])) {
1239 2
      return '';
1240
    }
1241
1242
    // fixed ISO <-> UTF-8 Errors
1243 22
    $str = self::fix_simple_utf8($str);
1244
1245
    // remove all none UTF-8 symbols
1246
    // && remove diamond question mark (�)
1247
    // && remove remove invisible characters (e.g. "\0")
1248
    // && remove BOM
1249
    // && normalize whitespace chars (but keep non-breaking-spaces)
1250 22
    $str = self::clean($str, true, true, false, true);
1251
1252 22
    return (string)$str;
1253
  }
1254
1255
  /**
1256
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1257
   *
1258
   * INFO: opposite to UTF8::string()
1259
   *
1260
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1261
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1262
   *                                    default, code points will be returned as integers.</p>
1263
   *
1264
   * @return array <p>The array of code points.</p>
1265
   */
1266 7
  public static function codepoints($arg, $u_style = false)
1267
  {
1268 7
    if (is_string($arg) === true) {
1269 7
      $arg = self::split($arg);
1270 7
    }
1271
1272 7
    $arg = array_map(
1273
        array(
1274 7
            '\\voku\\helper\\UTF8',
1275 7
            'ord',
1276 7
        ),
1277
        $arg
1278 7
    );
1279
1280 7
    if ($u_style) {
1281 1
      $arg = array_map(
1282
          array(
1283 1
              '\\voku\\helper\\UTF8',
1284 1
              'int_to_hex',
1285 1
          ),
1286
          $arg
1287 1
      );
1288 1
    }
1289
1290 7
    return $arg;
1291
  }
1292
1293
  /**
1294
   * Returns count of characters used in a string.
1295
   *
1296
   * @param string $str       <p>The input string.</p>
1297
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1298
   *
1299
   * @return array <p>An associative array of Character as keys and
1300
   *               their count as values.</p>
1301
   */
1302 7
  public static function count_chars($str, $cleanUtf8 = false)
1303
  {
1304 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1305
  }
1306
1307
  /**
1308
   * Converts a int-value into an UTF-8 character.
1309
   *
1310
   * @param mixed $int
1311
   *
1312
   * @return string
1313
   */
1314 5
  public static function decimal_to_chr($int)
1315
  {
1316 5
    if (Bootup::is_php('5.4') === true) {
1317 5
      $flags = ENT_QUOTES | ENT_HTML5;
1318 5
    } else {
1319
      $flags = ENT_QUOTES;
1320
    }
1321
1322 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1323
  }
1324
1325
  /**
1326
   * Encode a string with a new charset-encoding.
1327
   *
1328
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1329
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1330
   *
1331
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1332
   * @param string $str      <p>The input string</p>
1333
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1334
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1335
   *
1336
   * @return string
1337
   */
1338 12
  public static function encode($encoding, $str, $force = true)
1339
  {
1340 12
    $str = (string)$str;
1341 12
    $encoding = (string)$encoding;
1342
1343 12
    if (!isset($str[0], $encoding[0])) {
1344 5
      return $str;
1345
    }
1346
1347 12
    if ($encoding !== 'UTF-8') {
1348 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1349 2
    }
1350
1351 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1352
      self::checkForSupport();
1353
    }
1354
1355 12
    $encodingDetected = self::str_detect_encoding($str);
1356
1357
    if (
1358
        $encodingDetected !== false
1359 12
        &&
1360
        (
1361
            $force === true
1362 12
            ||
1363
            $encodingDetected !== $encoding
1364 4
        )
1365 12
    ) {
1366
1367 View Code Duplication
      if (
1368
          $encoding === 'UTF-8'
1369 12
          &&
1370
          (
1371
              $force === true
1372 12
              || $encodingDetected === 'UTF-8'
1373 3
              || $encodingDetected === 'WINDOWS-1252'
1374 3
              || $encodingDetected === 'ISO-8859-1'
1375 3
          )
1376 12
      ) {
1377 10
        return self::to_utf8($str);
1378
      }
1379
1380 View Code Duplication
      if (
1381
          $encoding === 'ISO-8859-1'
1382 5
          &&
1383
          (
1384
              $force === true
1385 2
              || $encodingDetected === 'ISO-8859-1'
1386 1
              || $encodingDetected === 'UTF-8'
1387 1
          )
1388 5
      ) {
1389 2
        return self::to_iso8859($str);
1390
      }
1391
1392 View Code Duplication
      if (
1393
          $encoding !== 'UTF-8'
1394 4
          &&
1395
          $encoding !== 'WINDOWS-1252'
1396 4
          &&
1397 1
          self::$SUPPORT['mbstring'] === false
1398 4
      ) {
1399
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1400
      }
1401
1402 4
      $strEncoded = \mb_convert_encoding(
1403 4
          $str,
1404 4
          $encoding,
1405
          $encodingDetected
1406 4
      );
1407
1408 4
      if ($strEncoded) {
1409 4
        return $strEncoded;
1410
      }
1411
    }
1412
1413 2
    return $str;
1414
  }
1415
1416
  /**
1417
   * Reads entire file into a string.
1418
   *
1419
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1420
   *
1421
   * @link http://php.net/manual/en/function.file-get-contents.php
1422
   *
1423
   * @param string        $filename      <p>
1424
   *                                     Name of the file to read.
1425
   *                                     </p>
1426
   * @param int|false     $flags         [optional] <p>
1427
   *                                     Prior to PHP 6, this parameter is called
1428
   *                                     use_include_path and is a bool.
1429
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1430
   *                                     to trigger include path
1431
   *                                     search.
1432
   *                                     </p>
1433
   *                                     <p>
1434
   *                                     The value of flags can be any combination of
1435
   *                                     the following flags (with some restrictions), joined with the
1436
   *                                     binary OR (|)
1437
   *                                     operator.
1438
   *                                     </p>
1439
   *                                     <p>
1440
   *                                     <table>
1441
   *                                     Available flags
1442
   *                                     <tr valign="top">
1443
   *                                     <td>Flag</td>
1444
   *                                     <td>Description</td>
1445
   *                                     </tr>
1446
   *                                     <tr valign="top">
1447
   *                                     <td>
1448
   *                                     FILE_USE_INCLUDE_PATH
1449
   *                                     </td>
1450
   *                                     <td>
1451
   *                                     Search for filename in the include directory.
1452
   *                                     See include_path for more
1453
   *                                     information.
1454
   *                                     </td>
1455
   *                                     </tr>
1456
   *                                     <tr valign="top">
1457
   *                                     <td>
1458
   *                                     FILE_TEXT
1459
   *                                     </td>
1460
   *                                     <td>
1461
   *                                     As of PHP 6, the default encoding of the read
1462
   *                                     data is UTF-8. You can specify a different encoding by creating a
1463
   *                                     custom context or by changing the default using
1464
   *                                     stream_default_encoding. This flag cannot be
1465
   *                                     used with FILE_BINARY.
1466
   *                                     </td>
1467
   *                                     </tr>
1468
   *                                     <tr valign="top">
1469
   *                                     <td>
1470
   *                                     FILE_BINARY
1471
   *                                     </td>
1472
   *                                     <td>
1473
   *                                     With this flag, the file is read in binary mode. This is the default
1474
   *                                     setting and cannot be used with FILE_TEXT.
1475
   *                                     </td>
1476
   *                                     </tr>
1477
   *                                     </table>
1478
   *                                     </p>
1479
   * @param resource|null $context       [optional] <p>
1480
   *                                     A valid context resource created with
1481
   *                                     stream_context_create. If you don't need to use a
1482
   *                                     custom context, you can skip this parameter by &null;.
1483
   *                                     </p>
1484
   * @param int|null $offset             [optional] <p>
1485
   *                                     The offset where the reading starts.
1486
   *                                     </p>
1487
   * @param int|null $maxLength          [optional] <p>
1488
   *                                     Maximum length of data read. The default is to read until end
1489
   *                                     of file is reached.
1490
   *                                     </p>
1491
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1492
   *
1493
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1494
   *                                     or pdf, because they used non default utf-8 chars</p>
1495
   *
1496
   * @return string <p>The function returns the read data or false on failure.</p>
1497
   */
1498 4
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1499
  {
1500
    // init
1501 4
    $timeout = (int)$timeout;
1502 4
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1503
1504 4
    if ($timeout && $context === null) {
1505 3
      $context = stream_context_create(
1506
          array(
1507
              'http' =>
1508
                  array(
1509 3
                      'timeout' => $timeout,
1510 3
                  ),
1511
          )
1512 3
      );
1513 3
    }
1514
1515 4
    if (!$flags) {
1516 4
      $flags = false;
1517 4
    }
1518
1519 4
    if ($offset === null) {
1520 4
      $offset = 0;
1521 4
    }
1522
1523 4
    if (is_int($maxLength) === true) {
1524 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1525 1
    } else {
1526 4
      $data = file_get_contents($filename, $flags, $context, $offset);
1527
    }
1528
1529
    // return false on error
1530 4
    if ($data === false) {
1531 1
      return false;
1532
    }
1533
1534 3
    if ($convertToUtf8 === true) {
1535 3
      $data = self::encode('UTF-8', $data, false);
1536 3
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1537 3
    }
1538
1539 3
    return $data;
1540
  }
1541
1542
  /**
1543
   * Checks if a file starts with BOM (Byte Order Mark) character.
1544
   *
1545
   * @param string $file_path <p>Path to a valid file.</p>
1546
   *
1547
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1548
   */
1549 1
  public static function file_has_bom($file_path)
1550
  {
1551 1
    return self::string_has_bom(file_get_contents($file_path));
1552
  }
1553
1554
  /**
1555
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1556
   *
1557
   * @param mixed  $var
1558
   * @param int    $normalization_form
1559
   * @param string $leading_combining
1560
   *
1561
   * @return mixed
1562
   */
1563 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1564
  {
1565 9
    switch (gettype($var)) {
1566 9 View Code Duplication
      case 'array':
1567 3
        foreach ($var as $k => $v) {
1568
          /** @noinspection AlterInForeachInspection */
1569 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1570 3
        }
1571 3
        break;
1572 9 View Code Duplication
      case 'object':
1573 2
        foreach ($var as $k => $v) {
1574 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1575 2
        }
1576 2
        break;
1577 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1578
1579 8
        if (false !== strpos($var, "\r")) {
1580
          // Workaround https://bugs.php.net/65732
1581 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1582 2
        }
1583
1584 8
        if (self::is_ascii($var) === false) {
1585
          /** @noinspection PhpUndefinedClassInspection */
1586 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1587 6
            $n = '-';
1588 6
          } else {
1589
            /** @noinspection PhpUndefinedClassInspection */
1590 6
            $n = \Normalizer::normalize($var, $normalization_form);
1591
1592 6
            if (isset($n[0])) {
1593 3
              $var = $n;
1594 3
            } else {
1595 5
              $var = self::encode('UTF-8', $var, true);
1596
            }
1597
          }
1598
1599
          if (
1600 8
              $var[0] >= "\x80"
1601 8
              &&
1602 6
              isset($n[0], $leading_combining[0])
1603 8
              &&
1604 5
              preg_match('/^\p{Mn}/u', $var)
1605 8
          ) {
1606
            // Prevent leading combining chars
1607
            // for NFC-safe concatenations.
1608 2
            $var = $leading_combining . $var;
1609 2
          }
1610 8
        }
1611
1612 8
        break;
1613 9
    }
1614
1615 9
    return $var;
1616
  }
1617
1618
  /**
1619
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1620
   *
1621
   * Gets a specific external variable by name and optionally filters it
1622
   *
1623
   * @link  http://php.net/manual/en/function.filter-input.php
1624
   *
1625
   * @param int    $type          <p>
1626
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1627
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1628
   *                              <b>INPUT_ENV</b>.
1629
   *                              </p>
1630
   * @param string $variable_name <p>
1631
   *                              Name of a variable to get.
1632
   *                              </p>
1633
   * @param int    $filter        [optional] <p>
1634
   *                              The ID of the filter to apply. The
1635
   *                              manual page lists the available filters.
1636
   *                              </p>
1637
   * @param mixed  $options       [optional] <p>
1638
   *                              Associative array of options or bitwise disjunction of flags. If filter
1639
   *                              accepts options, flags can be provided in "flags" field of array.
1640
   *                              </p>
1641
   *
1642
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1643
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1644
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1645
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1646
   * @since 5.2.0
1647
   */
1648 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1649
  {
1650
    if (4 > func_num_args()) {
1651
      $var = filter_input($type, $variable_name, $filter);
1652
    } else {
1653
      $var = filter_input($type, $variable_name, $filter, $options);
1654
    }
1655
1656
    return self::filter($var);
1657
  }
1658
1659
  /**
1660
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1661
   *
1662
   * Gets external variables and optionally filters them
1663
   *
1664
   * @link  http://php.net/manual/en/function.filter-input-array.php
1665
   *
1666
   * @param int   $type       <p>
1667
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1668
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1669
   *                          <b>INPUT_ENV</b>.
1670
   *                          </p>
1671
   * @param mixed $definition [optional] <p>
1672
   *                          An array defining the arguments. A valid key is a string
1673
   *                          containing a variable name and a valid value is either a filter type, or an array
1674
   *                          optionally specifying the filter, flags and options. If the value is an
1675
   *                          array, valid keys are filter which specifies the
1676
   *                          filter type,
1677
   *                          flags which specifies any flags that apply to the
1678
   *                          filter, and options which specifies any options that
1679
   *                          apply to the filter. See the example below for a better understanding.
1680
   *                          </p>
1681
   *                          <p>
1682
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1683
   *                          input array are filtered by this filter.
1684
   *                          </p>
1685
   * @param bool  $add_empty  [optional] <p>
1686
   *                          Add missing keys as <b>NULL</b> to the return value.
1687
   *                          </p>
1688
   *
1689
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1690
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1691
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1692
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1693
   * fails.
1694
   * @since 5.2.0
1695
   */
1696 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1697
  {
1698
    if (2 > func_num_args()) {
1699
      $a = filter_input_array($type);
1700
    } else {
1701
      $a = filter_input_array($type, $definition, $add_empty);
1702
    }
1703
1704
    return self::filter($a);
1705
  }
1706
1707
  /**
1708
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1709
   *
1710
   * Filters a variable with a specified filter
1711
   *
1712
   * @link  http://php.net/manual/en/function.filter-var.php
1713
   *
1714
   * @param mixed $variable <p>
1715
   *                        Value to filter.
1716
   *                        </p>
1717
   * @param int   $filter   [optional] <p>
1718
   *                        The ID of the filter to apply. The
1719
   *                        manual page lists the available filters.
1720
   *                        </p>
1721
   * @param mixed $options  [optional] <p>
1722
   *                        Associative array of options or bitwise disjunction of flags. If filter
1723
   *                        accepts options, flags can be provided in "flags" field of array. For
1724
   *                        the "callback" filter, callable type should be passed. The
1725
   *                        callback must accept one argument, the value to be filtered, and return
1726
   *                        the value after filtering/sanitizing it.
1727
   *                        </p>
1728
   *                        <p>
1729
   *                        <code>
1730
   *                        // for filters that accept options, use this format
1731
   *                        $options = array(
1732
   *                        'options' => array(
1733
   *                        'default' => 3, // value to return if the filter fails
1734
   *                        // other options here
1735
   *                        'min_range' => 0
1736
   *                        ),
1737
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1738
   *                        );
1739
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1740
   *                        // for filter that only accept flags, you can pass them directly
1741
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1742
   *                        // for filter that only accept flags, you can also pass as an array
1743
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1744
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1745
   *                        // callback validate filter
1746
   *                        function foo($value)
1747
   *                        {
1748
   *                        // Expected format: Surname, GivenNames
1749
   *                        if (strpos($value, ", ") === false) return false;
1750
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1751
   *                        $empty = (empty($surname) || empty($givennames));
1752
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1753
   *                        if ($empty || $notstrings) {
1754
   *                        return false;
1755
   *                        } else {
1756
   *                        return $value;
1757
   *                        }
1758
   *                        }
1759
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1760
   *                        </code>
1761
   *                        </p>
1762
   *
1763
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1764
   * @since 5.2.0
1765
   */
1766 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1767
  {
1768 1
    if (3 > func_num_args()) {
1769 1
      $variable = filter_var($variable, $filter);
1770 1
    } else {
1771 1
      $variable = filter_var($variable, $filter, $options);
1772
    }
1773
1774 1
    return self::filter($variable);
1775
  }
1776
1777
  /**
1778
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1779
   *
1780
   * Gets multiple variables and optionally filters them
1781
   *
1782
   * @link  http://php.net/manual/en/function.filter-var-array.php
1783
   *
1784
   * @param array $data       <p>
1785
   *                          An array with string keys containing the data to filter.
1786
   *                          </p>
1787
   * @param mixed $definition [optional] <p>
1788
   *                          An array defining the arguments. A valid key is a string
1789
   *                          containing a variable name and a valid value is either a
1790
   *                          filter type, or an
1791
   *                          array optionally specifying the filter, flags and options.
1792
   *                          If the value is an array, valid keys are filter
1793
   *                          which specifies the filter type,
1794
   *                          flags which specifies any flags that apply to the
1795
   *                          filter, and options which specifies any options that
1796
   *                          apply to the filter. See the example below for a better understanding.
1797
   *                          </p>
1798
   *                          <p>
1799
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1800
   *                          input array are filtered by this filter.
1801
   *                          </p>
1802
   * @param bool  $add_empty  [optional] <p>
1803
   *                          Add missing keys as <b>NULL</b> to the return value.
1804
   *                          </p>
1805
   *
1806
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1807
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1808
   * the variable is not set.
1809
   * @since 5.2.0
1810
   */
1811 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1812
  {
1813 1
    if (2 > func_num_args()) {
1814 1
      $a = filter_var_array($data);
1815 1
    } else {
1816 1
      $a = filter_var_array($data, $definition, $add_empty);
1817
    }
1818
1819 1
    return self::filter($a);
1820
  }
1821
1822
  /**
1823
   * Check if the number of unicode characters are not more than the specified integer.
1824
   *
1825
   * @param string $str      The original string to be checked.
1826
   * @param int    $box_size The size in number of chars to be checked against string.
1827
   *
1828
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1829
   */
1830 1
  public static function fits_inside($str, $box_size)
1831
  {
1832 1
    return (self::strlen($str) <= $box_size);
1833
  }
1834
1835
  /**
1836
   * Try to fix simple broken UTF-8 strings.
1837
   *
1838
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1839
   *
1840
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1841
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1842
   * See: http://en.wikipedia.org/wiki/Windows-1252
1843
   *
1844
   * @param string $str <p>The input string</p>
1845
   *
1846
   * @return string
1847
   */
1848 27 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1849
  {
1850
    // init
1851 27
    $str = (string)$str;
1852
1853 27
    if (!isset($str[0])) {
1854 2
      return '';
1855
    }
1856
1857 27
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1858 27
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1859
1860 27
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1861 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1862 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1863 1
    }
1864
1865 27
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1866
  }
1867
1868
  /**
1869
   * Fix a double (or multiple) encoded UTF8 string.
1870
   *
1871
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1872
   *
1873
   * @return string|string[] <p>Will return the fixed input-"array" or
1874
   *                         the fixed input-"string".</p>
1875
   */
1876 1
  public static function fix_utf8($str)
1877
  {
1878 1
    if (is_array($str) === true) {
1879
1880
      /** @noinspection ForeachSourceInspection */
1881 1
      foreach ($str as $k => $v) {
1882
        /** @noinspection AlterInForeachInspection */
1883
        /** @noinspection OffsetOperationsInspection */
1884 1
        $str[$k] = self::fix_utf8($v);
1885 1
      }
1886
1887 1
      return $str;
1888
    }
1889
1890 1
    $last = '';
1891 1
    while ($last !== $str) {
1892 1
      $last = $str;
1893 1
      $str = self::to_utf8(
1894 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1893 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1895 1
      );
1896 1
    }
1897
1898 1
    return $str;
1899
  }
1900
1901
  /**
1902
   * Get character of a specific character.
1903
   *
1904
   * @param string $char
1905
   *
1906
   * @return string <p>'RTL' or 'LTR'</p>
1907
   */
1908 1
  public static function getCharDirection($char)
1909
  {
1910 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1911
      self::checkForSupport();
1912
    }
1913
1914 1
    if (self::$SUPPORT['intlChar'] === true) {
1915
      $tmpReturn = \IntlChar::charDirection($char);
1916
1917
      // from "IntlChar"-Class
1918
      $charDirection = array(
1919
          'RTL' => array(1, 13, 14, 15, 21),
1920
          'LTR' => array(0, 11, 12, 20),
1921
      );
1922
1923
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1924
        return 'LTR';
1925
      }
1926
1927
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1928
        return 'RTL';
1929
      }
1930
    }
1931
1932 1
    $c = static::chr_to_decimal($char);
1933
1934 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1935 1
      return 'LTR';
1936
    }
1937
1938 1
    if (0x85e >= $c) {
1939
1940 1
      if (0x5be === $c ||
1941 1
          0x5c0 === $c ||
1942 1
          0x5c3 === $c ||
1943 1
          0x5c6 === $c ||
1944 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1945 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1946 1
          0x608 === $c ||
1947 1
          0x60b === $c ||
1948 1
          0x60d === $c ||
1949 1
          0x61b === $c ||
1950 1
          (0x61e <= $c && 0x64a >= $c) ||
1951 1
          (0x66d <= $c && 0x66f >= $c) ||
1952 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1953 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1954 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1955 1
          (0x6fa <= $c && 0x70d >= $c) ||
1956 1
          0x710 === $c ||
1957 1
          (0x712 <= $c && 0x72f >= $c) ||
1958 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1959 1
          0x7b1 === $c ||
1960 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1961 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1962 1
          0x7fa === $c ||
1963 1
          (0x800 <= $c && 0x815 >= $c) ||
1964 1
          0x81a === $c ||
1965 1
          0x824 === $c ||
1966 1
          0x828 === $c ||
1967 1
          (0x830 <= $c && 0x83e >= $c) ||
1968 1
          (0x840 <= $c && 0x858 >= $c) ||
1969
          0x85e === $c
1970 1
      ) {
1971 1
        return 'RTL';
1972
      }
1973
1974 1
    } elseif (0x200f === $c) {
1975
1976
      return 'RTL';
1977
1978 1
    } elseif (0xfb1d <= $c) {
1979
1980 1
      if (0xfb1d === $c ||
1981 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1982 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1983 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1984 1
          0xfb3e === $c ||
1985 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1986 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1987 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1988 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1989 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1990 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1991 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1992 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1993 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1994 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1995 1
          0x10808 === $c ||
1996 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1997 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1998 1
          0x1083c === $c ||
1999 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2000 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2001 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2002 1
          (0x10920 <= $c && 0x10939 >= $c) ||
2003 1
          0x1093f === $c ||
2004 1
          0x10a00 === $c ||
2005 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2006 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2007 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2008 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2009 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2010 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2011 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2012 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2013 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2014
          (0x10b78 <= $c && 0x10b7f >= $c)
2015 1
      ) {
2016 1
        return 'RTL';
2017
      }
2018
    }
2019
2020 1
    return 'LTR';
2021
  }
2022
2023
  /**
2024
   * get data from "/data/*.ser"
2025
   *
2026
   * @param string $file
2027
   *
2028
   * @return bool|string|array|int <p>Will return false on error.</p>
2029
   */
2030 4
  private static function getData($file)
2031
  {
2032 4
    $file = __DIR__ . '/data/' . $file . '.php';
2033 4
    if (file_exists($file)) {
2034
      /** @noinspection PhpIncludeInspection */
2035 4
      return require $file;
2036
    }
2037
2038 1
    return false;
2039
  }
2040
2041
  /**
2042
   * Check for php-support.
2043
   *
2044
   * @param string|null $key
2045
   *
2046
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2047
   *               return bool-value, if $key is used and available<br>
2048
   *               otherwise return null</p>
2049
   */
2050 7
  public static function getSupportInfo($key = null)
2051
  {
2052 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2053
      self::checkForSupport();
2054
    }
2055
2056 7
    if ($key === null) {
2057 2
      return self::$SUPPORT;
2058
    }
2059
2060 5
    if (!isset(self::$SUPPORT[$key])) {
2061
      return null;
2062
    }
2063
2064 5
    return self::$SUPPORT[$key];
2065
  }
2066
2067
  /**
2068
   * alias for "UTF8::string_has_bom()"
2069
   *
2070
   * @see UTF8::string_has_bom()
2071
   *
2072
   * @param string $str
2073
   *
2074
   * @return bool
2075
   *
2076
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2077
   */
2078
  public static function hasBom($str)
2079
  {
2080
    return self::string_has_bom($str);
2081
  }
2082
2083
  /**
2084
   * Converts a hexadecimal-value into an UTF-8 character.
2085
   *
2086
   * @param string $hexdec <p>The hexadecimal value.</p>
2087
   *
2088
   * @return string|false <p>One single UTF-8 character.</p>
2089
   */
2090 2
  public static function hex_to_chr($hexdec)
2091
  {
2092 2
    return self::decimal_to_chr(hexdec($hexdec));
2093
  }
2094
2095
  /**
2096
   * Converts hexadecimal U+xxxx code point representation to integer.
2097
   *
2098
   * INFO: opposite to UTF8::int_to_hex()
2099
   *
2100
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2101
   *
2102
   * @return int|false <p>The code point, or false on failure.</p>
2103
   */
2104 1
  public static function hex_to_int($hexDec)
2105
  {
2106 1
    $hexDec = (string)$hexDec;
2107
2108 1
    if (!isset($hexDec[0])) {
2109 1
      return false;
2110
    }
2111
2112 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2113 1
      return intval($match[1], 16);
2114
    }
2115
2116 1
    return false;
2117
  }
2118
2119
  /**
2120
   * alias for "UTF8::html_entity_decode()"
2121
   *
2122
   * @see UTF8::html_entity_decode()
2123
   *
2124
   * @param string $str
2125
   * @param int    $flags
2126
   * @param string $encoding
2127
   *
2128
   * @return string
2129
   */
2130 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2131
  {
2132 1
    return self::html_entity_decode($str, $flags, $encoding);
2133
  }
2134
2135
  /**
2136
   * Converts a UTF-8 string to a series of HTML numbered entities.
2137
   *
2138
   * INFO: opposite to UTF8::html_decode()
2139
   *
2140
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2141
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2142
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2143
   *
2144
   * @return string <p>HTML numbered entities.</p>
2145
   */
2146 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2147
  {
2148
    // init
2149 2
    $str = (string)$str;
2150
2151 2
    if (!isset($str[0])) {
2152 1
      return '';
2153
    }
2154
2155 2
    if ($encoding !== 'UTF-8') {
2156 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2157 1
    }
2158
2159
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2160 2
    if (function_exists('mb_encode_numericentity')) {
2161
2162 2
      $startCode = 0x00;
2163 2
      if ($keepAsciiChars === true) {
2164 1
        $startCode = 0x80;
2165 1
      }
2166
2167 2
      return mb_encode_numericentity(
2168 2
          $str,
2169 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2170
          $encoding
2171 2
      );
2172
    }
2173
2174
    return implode(
2175
        '',
2176
        array_map(
2177
            function ($data) use ($keepAsciiChars, $encoding) {
2178
              return self::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2179
            },
2180
            self::split($str)
2181
        )
2182
    );
2183
  }
2184
2185
  /**
2186
   * UTF-8 version of html_entity_decode()
2187
   *
2188
   * The reason we are not using html_entity_decode() by itself is because
2189
   * while it is not technically correct to leave out the semicolon
2190
   * at the end of an entity most browsers will still interpret the entity
2191
   * correctly. html_entity_decode() does not convert entities without
2192
   * semicolons, so we are left with our own little solution here. Bummer.
2193
   *
2194
   * Convert all HTML entities to their applicable characters
2195
   *
2196
   * INFO: opposite to UTF8::html_encode()
2197
   *
2198
   * @link http://php.net/manual/en/function.html-entity-decode.php
2199
   *
2200
   * @param string $str      <p>
2201
   *                         The input string.
2202
   *                         </p>
2203
   * @param int    $flags    [optional] <p>
2204
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2205
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2206
   *                         <table>
2207
   *                         Available <i>flags</i> constants
2208
   *                         <tr valign="top">
2209
   *                         <td>Constant Name</td>
2210
   *                         <td>Description</td>
2211
   *                         </tr>
2212
   *                         <tr valign="top">
2213
   *                         <td><b>ENT_COMPAT</b></td>
2214
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2215
   *                         </tr>
2216
   *                         <tr valign="top">
2217
   *                         <td><b>ENT_QUOTES</b></td>
2218
   *                         <td>Will convert both double and single quotes.</td>
2219
   *                         </tr>
2220
   *                         <tr valign="top">
2221
   *                         <td><b>ENT_NOQUOTES</b></td>
2222
   *                         <td>Will leave both double and single quotes unconverted.</td>
2223
   *                         </tr>
2224
   *                         <tr valign="top">
2225
   *                         <td><b>ENT_HTML401</b></td>
2226
   *                         <td>
2227
   *                         Handle code as HTML 4.01.
2228
   *                         </td>
2229
   *                         </tr>
2230
   *                         <tr valign="top">
2231
   *                         <td><b>ENT_XML1</b></td>
2232
   *                         <td>
2233
   *                         Handle code as XML 1.
2234
   *                         </td>
2235
   *                         </tr>
2236
   *                         <tr valign="top">
2237
   *                         <td><b>ENT_XHTML</b></td>
2238
   *                         <td>
2239
   *                         Handle code as XHTML.
2240
   *                         </td>
2241
   *                         </tr>
2242
   *                         <tr valign="top">
2243
   *                         <td><b>ENT_HTML5</b></td>
2244
   *                         <td>
2245
   *                         Handle code as HTML 5.
2246
   *                         </td>
2247
   *                         </tr>
2248
   *                         </table>
2249
   *                         </p>
2250
   * @param string $encoding [optional] <p>Encoding to use.</p>
2251
   *
2252
   * @return string <p>The decoded string.</p>
2253
   */
2254 17
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2255
  {
2256
    // init
2257 17
    $str = (string)$str;
2258
2259 17
    if (!isset($str[0])) {
2260 6
      return '';
2261
    }
2262
2263 17
    if (!isset($str[3])) { // examples: &; || &x;
2264 10
      return $str;
2265
    }
2266
2267
    if (
2268 16
        strpos($str, '&') === false
2269 16
        ||
2270
        (
2271 16
            strpos($str, '&#') === false
2272 16
            &&
2273 10
            strpos($str, ';') === false
2274 10
        )
2275 16
    ) {
2276 9
      return $str;
2277
    }
2278
2279 16
    if ($encoding !== 'UTF-8') {
2280 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2281 2
    }
2282
2283 16
    if ($flags === null) {
2284 5
      if (Bootup::is_php('5.4') === true) {
2285 5
        $flags = ENT_QUOTES | ENT_HTML5;
2286 5
      } else {
2287
        $flags = ENT_QUOTES;
2288
      }
2289 5
    }
2290
2291 View Code Duplication
    if (
2292
        $encoding !== 'UTF-8'
2293 16
        &&
2294
        $encoding !== 'WINDOWS-1252'
2295 16
        &&
2296 2
        self::$SUPPORT['mbstring'] === false
2297 16
    ) {
2298
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2299
    }
2300
2301
    do {
2302 16
      $str_compare = $str;
2303
2304 16
      $str = preg_replace_callback(
2305 16
          "/&#\d{2,6};/",
2306
          function ($matches) use ($encoding) {
2307 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2308
2309 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2310 13
              return $returnTmp;
2311
            }
2312
2313 7
            return $matches[0];
2314 16
          },
2315
          $str
2316 16
      );
2317
2318
      // decode numeric & UTF16 two byte entities
2319 16
      $str = html_entity_decode(
2320 16
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2321 16
          $flags,
2322
          $encoding
2323 16
      );
2324
2325 16
    } while ($str_compare !== $str);
2326
2327 16
    return $str;
2328
  }
2329
2330
  /**
2331
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2332
   *
2333
   * @link http://php.net/manual/en/function.htmlentities.php
2334
   *
2335
   * @param string $str           <p>
2336
   *                              The input string.
2337
   *                              </p>
2338
   * @param int    $flags         [optional] <p>
2339
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2340
   *                              invalid code unit sequences and the used document type. The default is
2341
   *                              ENT_COMPAT | ENT_HTML401.
2342
   *                              <table>
2343
   *                              Available <i>flags</i> constants
2344
   *                              <tr valign="top">
2345
   *                              <td>Constant Name</td>
2346
   *                              <td>Description</td>
2347
   *                              </tr>
2348
   *                              <tr valign="top">
2349
   *                              <td><b>ENT_COMPAT</b></td>
2350
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2351
   *                              </tr>
2352
   *                              <tr valign="top">
2353
   *                              <td><b>ENT_QUOTES</b></td>
2354
   *                              <td>Will convert both double and single quotes.</td>
2355
   *                              </tr>
2356
   *                              <tr valign="top">
2357
   *                              <td><b>ENT_NOQUOTES</b></td>
2358
   *                              <td>Will leave both double and single quotes unconverted.</td>
2359
   *                              </tr>
2360
   *                              <tr valign="top">
2361
   *                              <td><b>ENT_IGNORE</b></td>
2362
   *                              <td>
2363
   *                              Silently discard invalid code unit sequences instead of returning
2364
   *                              an empty string. Using this flag is discouraged as it
2365
   *                              may have security implications.
2366
   *                              </td>
2367
   *                              </tr>
2368
   *                              <tr valign="top">
2369
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2370
   *                              <td>
2371
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2372
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2373
   *                              </td>
2374
   *                              </tr>
2375
   *                              <tr valign="top">
2376
   *                              <td><b>ENT_DISALLOWED</b></td>
2377
   *                              <td>
2378
   *                              Replace invalid code points for the given document type with a
2379
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2380
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2381
   *                              instance, to ensure the well-formedness of XML documents with
2382
   *                              embedded external content.
2383
   *                              </td>
2384
   *                              </tr>
2385
   *                              <tr valign="top">
2386
   *                              <td><b>ENT_HTML401</b></td>
2387
   *                              <td>
2388
   *                              Handle code as HTML 4.01.
2389
   *                              </td>
2390
   *                              </tr>
2391
   *                              <tr valign="top">
2392
   *                              <td><b>ENT_XML1</b></td>
2393
   *                              <td>
2394
   *                              Handle code as XML 1.
2395
   *                              </td>
2396
   *                              </tr>
2397
   *                              <tr valign="top">
2398
   *                              <td><b>ENT_XHTML</b></td>
2399
   *                              <td>
2400
   *                              Handle code as XHTML.
2401
   *                              </td>
2402
   *                              </tr>
2403
   *                              <tr valign="top">
2404
   *                              <td><b>ENT_HTML5</b></td>
2405
   *                              <td>
2406
   *                              Handle code as HTML 5.
2407
   *                              </td>
2408
   *                              </tr>
2409
   *                              </table>
2410
   *                              </p>
2411
   * @param string $encoding      [optional] <p>
2412
   *                              Like <b>htmlspecialchars</b>,
2413
   *                              <b>htmlentities</b> takes an optional third argument
2414
   *                              <i>encoding</i> which defines encoding used in
2415
   *                              conversion.
2416
   *                              Although this argument is technically optional, you are highly
2417
   *                              encouraged to specify the correct value for your code.
2418
   *                              </p>
2419
   * @param bool   $double_encode [optional] <p>
2420
   *                              When <i>double_encode</i> is turned off PHP will not
2421
   *                              encode existing html entities. The default is to convert everything.
2422
   *                              </p>
2423
   *
2424
   *
2425
   * @return string the encoded string.
2426
   * </p>
2427
   * <p>
2428
   * If the input <i>string</i> contains an invalid code unit
2429
   * sequence within the given <i>encoding</i> an empty string
2430
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2431
   * <b>ENT_SUBSTITUTE</b> flags are set.
2432
   */
2433 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2434
  {
2435 2
    if ($encoding !== 'UTF-8') {
2436 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2437 1
    }
2438
2439 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2440
2441
    /**
2442
     * PHP doesn't replace a backslash to its html entity since this is something
2443
     * that's mostly used to escape characters when inserting in a database. Since
2444
     * we're using a decent database layer, we don't need this shit and we're replacing
2445
     * the double backslashes by its' html entity equivalent.
2446
     *
2447
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2448
     */
2449 2
    $str = str_replace('\\', '&#92;', $str);
2450
2451 2
    if ($encoding !== 'UTF-8') {
2452 1
      return $str;
2453
    }
2454
2455 2
    $byteLengths = self::chr_size_list($str);
2456 2
    $search = array();
2457 2
    $replacements = array();
2458 2
    foreach ($byteLengths as $counter => $byteLength) {
2459 2
      if ($byteLength >= 3) {
2460 1
        $char = self::access($str, $counter);
2461
2462 1
        if (!isset($replacements[$char])) {
2463 1
          $search[$char] = $char;
2464 1
          $replacements[$char] = self::html_encode($char);
2465 1
        }
2466 1
      }
2467 2
    }
2468
2469 2
    return str_replace($search, $replacements, $str);
2470
  }
2471
2472
  /**
2473
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2474
   *
2475
   * INFO: Take a look at "UTF8::htmlentities()"
2476
   *
2477
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2478
   *
2479
   * @param string $str           <p>
2480
   *                              The string being converted.
2481
   *                              </p>
2482
   * @param int    $flags         [optional] <p>
2483
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2484
   *                              invalid code unit sequences and the used document type. The default is
2485
   *                              ENT_COMPAT | ENT_HTML401.
2486
   *                              <table>
2487
   *                              Available <i>flags</i> constants
2488
   *                              <tr valign="top">
2489
   *                              <td>Constant Name</td>
2490
   *                              <td>Description</td>
2491
   *                              </tr>
2492
   *                              <tr valign="top">
2493
   *                              <td><b>ENT_COMPAT</b></td>
2494
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2495
   *                              </tr>
2496
   *                              <tr valign="top">
2497
   *                              <td><b>ENT_QUOTES</b></td>
2498
   *                              <td>Will convert both double and single quotes.</td>
2499
   *                              </tr>
2500
   *                              <tr valign="top">
2501
   *                              <td><b>ENT_NOQUOTES</b></td>
2502
   *                              <td>Will leave both double and single quotes unconverted.</td>
2503
   *                              </tr>
2504
   *                              <tr valign="top">
2505
   *                              <td><b>ENT_IGNORE</b></td>
2506
   *                              <td>
2507
   *                              Silently discard invalid code unit sequences instead of returning
2508
   *                              an empty string. Using this flag is discouraged as it
2509
   *                              may have security implications.
2510
   *                              </td>
2511
   *                              </tr>
2512
   *                              <tr valign="top">
2513
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2514
   *                              <td>
2515
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2516
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2517
   *                              </td>
2518
   *                              </tr>
2519
   *                              <tr valign="top">
2520
   *                              <td><b>ENT_DISALLOWED</b></td>
2521
   *                              <td>
2522
   *                              Replace invalid code points for the given document type with a
2523
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2524
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2525
   *                              instance, to ensure the well-formedness of XML documents with
2526
   *                              embedded external content.
2527
   *                              </td>
2528
   *                              </tr>
2529
   *                              <tr valign="top">
2530
   *                              <td><b>ENT_HTML401</b></td>
2531
   *                              <td>
2532
   *                              Handle code as HTML 4.01.
2533
   *                              </td>
2534
   *                              </tr>
2535
   *                              <tr valign="top">
2536
   *                              <td><b>ENT_XML1</b></td>
2537
   *                              <td>
2538
   *                              Handle code as XML 1.
2539
   *                              </td>
2540
   *                              </tr>
2541
   *                              <tr valign="top">
2542
   *                              <td><b>ENT_XHTML</b></td>
2543
   *                              <td>
2544
   *                              Handle code as XHTML.
2545
   *                              </td>
2546
   *                              </tr>
2547
   *                              <tr valign="top">
2548
   *                              <td><b>ENT_HTML5</b></td>
2549
   *                              <td>
2550
   *                              Handle code as HTML 5.
2551
   *                              </td>
2552
   *                              </tr>
2553
   *                              </table>
2554
   *                              </p>
2555
   * @param string $encoding      [optional] <p>
2556
   *                              Defines encoding used in conversion.
2557
   *                              </p>
2558
   *                              <p>
2559
   *                              For the purposes of this function, the encodings
2560
   *                              ISO-8859-1, ISO-8859-15,
2561
   *                              UTF-8, cp866,
2562
   *                              cp1251, cp1252, and
2563
   *                              KOI8-R are effectively equivalent, provided the
2564
   *                              <i>string</i> itself is valid for the encoding, as
2565
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2566
   *                              the same positions in all of these encodings.
2567
   *                              </p>
2568
   * @param bool   $double_encode [optional] <p>
2569
   *                              When <i>double_encode</i> is turned off PHP will not
2570
   *                              encode existing html entities, the default is to convert everything.
2571
   *                              </p>
2572
   *
2573
   * @return string The converted string.
2574
   * </p>
2575
   * <p>
2576
   * If the input <i>string</i> contains an invalid code unit
2577
   * sequence within the given <i>encoding</i> an empty string
2578
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2579
   * <b>ENT_SUBSTITUTE</b> flags are set.
2580
   */
2581 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2582
  {
2583 1
    if ($encoding !== 'UTF-8') {
2584 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2585 1
    }
2586
2587 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2588
  }
2589
2590
  /**
2591
   * Checks whether iconv is available on the server.
2592
   *
2593
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2594
   */
2595 1
  public static function iconv_loaded()
2596
  {
2597 1
    $return = extension_loaded('iconv') ? true : false;
2598
2599
    // INFO: "default_charset" is already set by the "Bootup"-class
2600
2601 1
    if (Bootup::is_php('5.6') === false) {
2602
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2603 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2604 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2605 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2606 1
    }
2607
2608 1
    return $return;
2609
  }
2610
2611
  /**
2612
   * alias for "UTF8::decimal_to_chr()"
2613
   *
2614
   * @see UTF8::decimal_to_chr()
2615
   *
2616
   * @param mixed $int
2617
   *
2618
   * @return string
2619
   */
2620 2
  public static function int_to_chr($int)
2621
  {
2622 2
    return self::decimal_to_chr($int);
2623
  }
2624
2625
  /**
2626
   * Converts Integer to hexadecimal U+xxxx code point representation.
2627
   *
2628
   * INFO: opposite to UTF8::hex_to_int()
2629
   *
2630
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2631
   * @param string $pfix [optional]
2632
   *
2633
   * @return string <p>The code point, or empty string on failure.</p>
2634
   */
2635 3
  public static function int_to_hex($int, $pfix = 'U+')
2636
  {
2637 3
    if ((int)$int === $int) {
2638 3
      $hex = dechex($int);
2639
2640 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2641
2642 3
      return $pfix . $hex;
2643
    }
2644
2645 1
    return '';
2646
  }
2647
2648
  /**
2649
   * Checks whether intl-char is available on the server.
2650
   *
2651
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2652
   */
2653 1
  public static function intlChar_loaded()
2654
  {
2655
    return (
2656 1
        Bootup::is_php('7.0') === true
2657 1
        &&
2658
        class_exists('IntlChar') === true
2659 1
    );
2660
  }
2661
2662
  /**
2663
   * Checks whether intl is available on the server.
2664
   *
2665
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2666
   */
2667 4
  public static function intl_loaded()
2668
  {
2669 4
    return extension_loaded('intl') ? true : false;
2670
  }
2671
2672
  /**
2673
   * alias for "UTF8::is_ascii()"
2674
   *
2675
   * @see UTF8::is_ascii()
2676
   *
2677
   * @param string $str
2678
   *
2679
   * @return boolean
2680
   *
2681
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2682
   */
2683
  public static function isAscii($str)
2684
  {
2685
    return self::is_ascii($str);
2686
  }
2687
2688
  /**
2689
   * alias for "UTF8::is_base64()"
2690
   *
2691
   * @see UTF8::is_base64()
2692
   *
2693
   * @param string $str
2694
   *
2695
   * @return bool
2696
   *
2697
   * @deprecated <p>use "UTF8::is_base64()"</p>
2698
   */
2699
  public static function isBase64($str)
2700
  {
2701
    return self::is_base64($str);
2702
  }
2703
2704
  /**
2705
   * alias for "UTF8::is_binary()"
2706
   *
2707
   * @see UTF8::is_binary()
2708
   *
2709
   * @param string $str
2710
   *
2711
   * @return bool
2712
   *
2713
   * @deprecated <p>use "UTF8::is_binary()"</p>
2714
   */
2715
  public static function isBinary($str)
2716
  {
2717
    return self::is_binary($str);
2718
  }
2719
2720
  /**
2721
   * alias for "UTF8::is_bom()"
2722
   *
2723
   * @see UTF8::is_bom()
2724
   *
2725
   * @param string $utf8_chr
2726
   *
2727
   * @return boolean
2728
   *
2729
   * @deprecated <p>use "UTF8::is_bom()"</p>
2730
   */
2731
  public static function isBom($utf8_chr)
2732
  {
2733
    return self::is_bom($utf8_chr);
2734
  }
2735
2736
  /**
2737
   * alias for "UTF8::is_html()"
2738
   *
2739
   * @see UTF8::is_html()
2740
   *
2741
   * @param string $str
2742
   *
2743
   * @return boolean
2744
   *
2745
   * @deprecated <p>use "UTF8::is_html()"</p>
2746
   */
2747
  public static function isHtml($str)
2748
  {
2749
    return self::is_html($str);
2750
  }
2751
2752
  /**
2753
   * alias for "UTF8::is_json()"
2754
   *
2755
   * @see UTF8::is_json()
2756
   *
2757
   * @param string $str
2758
   *
2759
   * @return bool
2760
   *
2761
   * @deprecated <p>use "UTF8::is_json()"</p>
2762
   */
2763
  public static function isJson($str)
2764
  {
2765
    return self::is_json($str);
2766
  }
2767
2768
  /**
2769
   * alias for "UTF8::is_utf16()"
2770
   *
2771
   * @see UTF8::is_utf16()
2772
   *
2773
   * @param string $str
2774
   *
2775
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2776
   *
2777
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2778
   */
2779
  public static function isUtf16($str)
2780
  {
2781
    return self::is_utf16($str);
2782
  }
2783
2784
  /**
2785
   * alias for "UTF8::is_utf32()"
2786
   *
2787
   * @see UTF8::is_utf32()
2788
   *
2789
   * @param string $str
2790
   *
2791
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2792
   *
2793
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2794
   */
2795
  public static function isUtf32($str)
2796
  {
2797
    return self::is_utf32($str);
2798
  }
2799
2800
  /**
2801
   * alias for "UTF8::is_utf8()"
2802
   *
2803
   * @see UTF8::is_utf8()
2804
   *
2805
   * @param string $str
2806
   * @param bool   $strict
2807
   *
2808
   * @return bool
2809
   *
2810
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2811
   */
2812
  public static function isUtf8($str, $strict = false)
2813
  {
2814
    return self::is_utf8($str, $strict);
2815
  }
2816
2817
  /**
2818
   * Checks if a string is 7 bit ASCII.
2819
   *
2820
   * @param string $str <p>The string to check.</p>
2821
   *
2822
   * @return bool <p>
2823
   *              <strong>true</strong> if it is ASCII<br>
2824
   *              <strong>false</strong> otherwise
2825
   *              </p>
2826
   */
2827 52
  public static function is_ascii($str)
2828
  {
2829 52
    $str = (string)$str;
2830
2831 52
    if (!isset($str[0])) {
2832 6
      return true;
2833
    }
2834
2835 51
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2836
  }
2837
2838
  /**
2839
   * Returns true if the string is base64 encoded, false otherwise.
2840
   *
2841
   * @param string $str <p>The input string.</p>
2842
   *
2843
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2844
   */
2845 1
  public static function is_base64($str)
2846
  {
2847 1
    $str = (string)$str;
2848
2849 1
    if (!isset($str[0])) {
2850 1
      return false;
2851
    }
2852
2853 1
    $base64String = (string)base64_decode($str, true);
2854 1
    if ($base64String && base64_encode($base64String) === $str) {
2855 1
      return true;
2856
    }
2857
2858 1
    return false;
2859
  }
2860
2861
  /**
2862
   * Check if the input is binary... (is look like a hack).
2863
   *
2864
   * @param mixed $input
2865
   *
2866
   * @return bool
2867
   */
2868 17
  public static function is_binary($input)
2869
  {
2870 17
    $input = (string)$input;
2871
2872 17
    if (!isset($input[0])) {
2873 4
      return false;
2874
    }
2875
2876 17
    if (preg_match('~^[01]+$~', $input)) {
2877 4
      return true;
2878
    }
2879
2880 17
    $testLength = strlen($input);
2881 17
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2882 5
      return true;
2883
    }
2884
2885 16
    if (substr_count($input, "\x00") > 0) {
2886 1
      return true;
2887
    }
2888
2889 16
    return false;
2890
  }
2891
2892
  /**
2893
   * Check if the file is binary.
2894
   *
2895
   * @param string $file
2896
   *
2897
   * @return boolean
2898
   */
2899
  public static function is_binary_file($file)
2900
  {
2901
    try {
2902
      $fp = fopen($file, 'rb');
2903
      $block = fread($fp, 512);
2904
      fclose($fp);
2905
    } catch (\Exception $e) {
2906
      $block = '';
2907
    }
2908
2909
    return self::is_binary($block);
2910
  }
2911
2912
  /**
2913
   * Checks if the given string is equal to any "Byte Order Mark".
2914
   *
2915
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2916
   *
2917
   * @param string $str <p>The input string.</p>
2918
   *
2919
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2920
   */
2921 1
  public static function is_bom($str)
2922
  {
2923 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2924 1
      if ($str === $bomString) {
2925 1
        return true;
2926
      }
2927 1
    }
2928
2929 1
    return false;
2930
  }
2931
2932
  /**
2933
   * Check if the string contains any html-tags <lall>.
2934
   *
2935
   * @param string $str <p>The input string.</p>
2936
   *
2937
   * @return boolean
2938
   */
2939 1
  public static function is_html($str)
2940
  {
2941 1
    $str = (string)$str;
2942
2943 1
    if (!isset($str[0])) {
2944 1
      return false;
2945
    }
2946
2947
    // init
2948 1
    $matches = array();
2949
2950 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2951
2952 1
    if (count($matches) === 0) {
2953 1
      return false;
2954
    }
2955
2956 1
    return true;
2957
  }
2958
2959
  /**
2960
   * Try to check if "$str" is an json-string.
2961
   *
2962
   * @param string $str <p>The input string.</p>
2963
   *
2964
   * @return bool
2965
   */
2966 1
  public static function is_json($str)
2967
  {
2968 1
    $str = (string)$str;
2969
2970 1
    if (!isset($str[0])) {
2971
      return false;
2972
    }
2973
2974 1
    $json = self::json_decode($str);
2975
2976
    if (
2977
        (
2978 1
            is_object($json) === true
2979 1
            ||
2980 1
            is_array($json) === true
2981 1
        )
2982 1
        &&
2983 1
        json_last_error() === JSON_ERROR_NONE
2984 1
    ) {
2985 1
      return true;
2986
    }
2987
2988 1
    return false;
2989
  }
2990
2991
  /**
2992
   * Check if the string is UTF-16.
2993
   *
2994
   * @param string $str <p>The input string.</p>
2995
   *
2996
   * @return int|false <p>
2997
   *                   <strong>false</strong> if is't not UTF-16,<br>
2998
   *                   <strong>1</strong> for UTF-16LE,<br>
2999
   *                   <strong>2</strong> for UTF-16BE.
3000
   *                   </p>
3001
   */
3002 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3003
  {
3004 5
    $str = self::remove_bom($str);
3005
3006 5
    if (self::is_binary($str) === true) {
3007
3008 5
      $maybeUTF16LE = 0;
3009 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3010 5
      if ($test) {
3011 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3012 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3013 5
        if ($test3 === $test) {
3014 5
          $strChars = self::count_chars($str, true);
3015 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3016 5
            if (in_array($test3char, $strChars, true) === true) {
3017 2
              $maybeUTF16LE++;
3018 2
            }
3019 5
          }
3020 5
        }
3021 5
      }
3022
3023 5
      $maybeUTF16BE = 0;
3024 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3025 5
      if ($test) {
3026 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3027 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3028 5
        if ($test3 === $test) {
3029 5
          $strChars = self::count_chars($str, true);
3030 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3031 5
            if (in_array($test3char, $strChars, true) === true) {
3032 4
              $maybeUTF16BE++;
3033 4
            }
3034 5
          }
3035 5
        }
3036 5
      }
3037
3038 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3039 4
        if ($maybeUTF16LE > $maybeUTF16BE) {
3040 2
          return 1;
3041
        }
3042
3043 4
        return 2;
3044
      }
3045
3046 2
    }
3047
3048 2
    return false;
3049
  }
3050
3051
  /**
3052
   * Check if the string is UTF-32.
3053
   *
3054
   * @param string $str
3055
   *
3056
   * @return int|false <p>
3057
   *                   <strong>false</strong> if is't not UTF-32,<br>
3058
   *                   <strong>1</strong> for UTF-32LE,<br>
3059
   *                   <strong>2</strong> for UTF-32BE.
3060
   *                   </p>
3061
   */
3062 2 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3063
  {
3064 2
    $str = self::remove_bom($str);
3065
3066 2
    if (self::is_binary($str) === true) {
3067
3068 2
      $maybeUTF32LE = 0;
3069 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3070 2
      if ($test) {
3071 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3072 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3073 2
        if ($test3 === $test) {
3074 2
          $strChars = self::count_chars($str, true);
3075 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3076 2
            if (in_array($test3char, $strChars, true) === true) {
3077 1
              $maybeUTF32LE++;
3078 1
            }
3079 2
          }
3080 2
        }
3081 2
      }
3082
3083 2
      $maybeUTF32BE = 0;
3084 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3085 2
      if ($test) {
3086 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3087 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3088 2
        if ($test3 === $test) {
3089 2
          $strChars = self::count_chars($str, true);
3090 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3091 2
            if (in_array($test3char, $strChars, true) === true) {
3092 1
              $maybeUTF32BE++;
3093 1
            }
3094 2
          }
3095 2
        }
3096 2
      }
3097
3098 2
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3099 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3100 1
          return 1;
3101
        }
3102
3103 1
        return 2;
3104
      }
3105
3106 2
    }
3107
3108 2
    return false;
3109
  }
3110
3111
  /**
3112
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3113
   *
3114
   * @see    http://hsivonen.iki.fi/php-utf8/
3115
   *
3116
   * @param string $str    <p>The string to be checked.</p>
3117
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3118
   *
3119
   * @return bool
3120
   */
3121 60
  public static function is_utf8($str, $strict = false)
3122
  {
3123 60
    $str = (string)$str;
3124
3125 60
    if (!isset($str[0])) {
3126 3
      return true;
3127
    }
3128
3129 58
    if ($strict === true) {
3130 1
      if (self::is_utf16($str) !== false) {
3131 1
        return false;
3132
      }
3133
3134
      if (self::is_utf32($str) !== false) {
3135
        return false;
3136
      }
3137
    }
3138
3139 58
    if (self::pcre_utf8_support() !== true) {
3140
3141
      // If even just the first character can be matched, when the /u
3142
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3143
      // invalid, nothing at all will match, even if the string contains
3144
      // some valid sequences
3145
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3146
    }
3147
3148 58
    $mState = 0; // cached expected number of octets after the current octet
3149
    // until the beginning of the next UTF8 character sequence
3150 58
    $mUcs4 = 0; // cached Unicode character
3151 58
    $mBytes = 1; // cached expected number of octets in the current sequence
3152
3153 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3154
      self::checkForSupport();
3155
    }
3156
3157 58 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3158
      $len = \mb_strlen($str, '8BIT');
3159
    } else {
3160 58
      $len = strlen($str);
3161
    }
3162
3163
    /** @noinspection ForeachInvariantsInspection */
3164 58
    for ($i = 0; $i < $len; $i++) {
3165 58
      $in = ord($str[$i]);
3166 58
      if ($mState === 0) {
3167
        // When mState is zero we expect either a US-ASCII character or a
3168
        // multi-octet sequence.
3169 58
        if (0 === (0x80 & $in)) {
3170
          // US-ASCII, pass straight through.
3171 53
          $mBytes = 1;
3172 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3173
          // First octet of 2 octet sequence.
3174 48
          $mUcs4 = $in;
3175 48
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3176 48
          $mState = 1;
3177 48
          $mBytes = 2;
3178 55
        } elseif (0xE0 === (0xF0 & $in)) {
3179
          // First octet of 3 octet sequence.
3180 30
          $mUcs4 = $in;
3181 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3182 30
          $mState = 2;
3183 30
          $mBytes = 3;
3184 46 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3185
          // First octet of 4 octet sequence.
3186 12
          $mUcs4 = $in;
3187 12
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3188 12
          $mState = 3;
3189 12
          $mBytes = 4;
3190 22
        } elseif (0xF8 === (0xFC & $in)) {
3191
          /* First octet of 5 octet sequence.
3192
          *
3193
          * This is illegal because the encoded codepoint must be either
3194
          * (a) not the shortest form or
3195
          * (b) outside the Unicode range of 0-0x10FFFF.
3196
          * Rather than trying to resynchronize, we will carry on until the end
3197
          * of the sequence and let the later error handling code catch it.
3198
          */
3199 4
          $mUcs4 = $in;
3200 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3201 4
          $mState = 4;
3202 4
          $mBytes = 5;
3203 11 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3204
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3205 4
          $mUcs4 = $in;
3206 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3207 4
          $mState = 5;
3208 4
          $mBytes = 6;
3209 4
        } else {
3210
          /* Current octet is neither in the US-ASCII range nor a legal first
3211
           * octet of a multi-octet sequence.
3212
           */
3213 5
          return false;
3214
        }
3215 58
      } else {
3216
        // When mState is non-zero, we expect a continuation of the multi-octet
3217
        // sequence
3218 53
        if (0x80 === (0xC0 & $in)) {
3219
          // Legal continuation.
3220 49
          $shift = ($mState - 1) * 6;
3221 49
          $tmp = $in;
3222 49
          $tmp = ($tmp & 0x0000003F) << $shift;
3223 49
          $mUcs4 |= $tmp;
3224
          /**
3225
           * End of the multi-octet sequence. mUcs4 now contains the final
3226
           * Unicode code point to be output
3227
           */
3228 49
          if (0 === --$mState) {
3229
            /*
3230
            * Check for illegal sequences and code points.
3231
            */
3232
            // From Unicode 3.1, non-shortest form is illegal
3233
            if (
3234 49
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3235 49
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3236 49
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3237 49
                (4 < $mBytes) ||
3238
                // From Unicode 3.2, surrogate characters are illegal.
3239 49
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3240
                // Code points outside the Unicode range are illegal.
3241 49
                ($mUcs4 > 0x10FFFF)
3242 49
            ) {
3243 7
              return false;
3244
            }
3245
            // initialize UTF8 cache
3246 49
            $mState = 0;
3247 49
            $mUcs4 = 0;
3248 49
            $mBytes = 1;
3249 49
          }
3250 49
        } else {
3251
          /**
3252
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3253
           * Incomplete multi-octet sequence.
3254
           */
3255 26
          return false;
3256
        }
3257
      }
3258 58
    }
3259
3260 28
    return true;
3261
  }
3262
3263
  /**
3264
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3265
   * Decodes a JSON string
3266
   *
3267
   * @link http://php.net/manual/en/function.json-decode.php
3268
   *
3269
   * @param string $json    <p>
3270
   *                        The <i>json</i> string being decoded.
3271
   *                        </p>
3272
   *                        <p>
3273
   *                        This function only works with UTF-8 encoded strings.
3274
   *                        </p>
3275
   *                        <p>PHP implements a superset of
3276
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3277
   *                        only supports these values when they are nested inside an array or an object.
3278
   *                        </p>
3279
   * @param bool   $assoc   [optional] <p>
3280
   *                        When <b>TRUE</b>, returned objects will be converted into
3281
   *                        associative arrays.
3282
   *                        </p>
3283
   * @param int    $depth   [optional] <p>
3284
   *                        User specified recursion depth.
3285
   *                        </p>
3286
   * @param int    $options [optional] <p>
3287
   *                        Bitmask of JSON decode options. Currently only
3288
   *                        <b>JSON_BIGINT_AS_STRING</b>
3289
   *                        is supported (default is to cast large integers as floats)
3290
   *                        </p>
3291
   *
3292
   * @return mixed the value encoded in <i>json</i> in appropriate
3293
   * PHP type. Values true, false and
3294
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3295
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3296
   * <i>json</i> cannot be decoded or if the encoded
3297
   * data is deeper than the recursion limit.
3298
   */
3299 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3300
  {
3301 2
    $json = (string)self::filter($json);
3302
3303 2
    if (Bootup::is_php('5.4') === true) {
3304 2
      $json = json_decode($json, $assoc, $depth, $options);
3305 2
    } else {
3306
      $json = json_decode($json, $assoc, $depth);
3307
    }
3308
3309 2
    return $json;
3310
  }
3311
3312
  /**
3313
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3314
   * Returns the JSON representation of a value.
3315
   *
3316
   * @link http://php.net/manual/en/function.json-encode.php
3317
   *
3318
   * @param mixed $value   <p>
3319
   *                       The <i>value</i> being encoded. Can be any type except
3320
   *                       a resource.
3321
   *                       </p>
3322
   *                       <p>
3323
   *                       All string data must be UTF-8 encoded.
3324
   *                       </p>
3325
   *                       <p>PHP implements a superset of
3326
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3327
   *                       only supports these values when they are nested inside an array or an object.
3328
   *                       </p>
3329
   * @param int   $options [optional] <p>
3330
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3331
   *                       <b>JSON_HEX_TAG</b>,
3332
   *                       <b>JSON_HEX_AMP</b>,
3333
   *                       <b>JSON_HEX_APOS</b>,
3334
   *                       <b>JSON_NUMERIC_CHECK</b>,
3335
   *                       <b>JSON_PRETTY_PRINT</b>,
3336
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3337
   *                       <b>JSON_FORCE_OBJECT</b>,
3338
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3339
   *                       constants is described on
3340
   *                       the JSON constants page.
3341
   *                       </p>
3342
   * @param int   $depth   [optional] <p>
3343
   *                       Set the maximum depth. Must be greater than zero.
3344
   *                       </p>
3345
   *
3346
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3347
   */
3348 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3349
  {
3350 2
    $value = self::filter($value);
3351
3352 2
    if (Bootup::is_php('5.5') === true) {
3353
      $json = json_encode($value, $options, $depth);
3354
    } else {
3355 2
      $json = json_encode($value, $options);
3356
    }
3357
3358 2
    return $json;
3359
  }
3360
3361
  /**
3362
   * Makes string's first char lowercase.
3363
   *
3364
   * @param string $str <p>The input string</p>
3365
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3366
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3367
   *
3368
   * @return string <p>The resulting string</p>
3369
   */
3370 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3371
  {
3372 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3373 7
    if ($strPartTwo === false) {
3374
      $strPartTwo = '';
3375
    }
3376
3377 7
    $strPartOne = self::strtolower(
3378 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3379 7
        $encoding,
3380
        $cleanUtf8
3381 7
    );
3382
3383 7
    return $strPartOne . $strPartTwo;
3384
  }
3385
3386
  /**
3387
   * alias for "UTF8::lcfirst()"
3388
   *
3389
   * @see UTF8::lcfirst()
3390
   *
3391
   * @param string  $word
3392
   * @param string  $encoding
3393
   * @param boolean $cleanUtf8
3394
   *
3395
   * @return string
3396
   */
3397 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3398
  {
3399 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3400
  }
3401
3402
  /**
3403
   * Lowercase for all words in the string.
3404
   *
3405
   * @param string   $str        <p>The input string.</p>
3406
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3407
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3408
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3409
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3410
   *
3411
   * @return string
3412
   */
3413 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3414
  {
3415 1
    if (!$str) {
3416 1
      return '';
3417
    }
3418
3419 1
    $words = self::str_to_words($str, $charlist);
3420 1
    $newWords = array();
3421
3422 1
    if (count($exceptions) > 0) {
3423 1
      $useExceptions = true;
3424 1
    } else {
3425 1
      $useExceptions = false;
3426
    }
3427
3428 1 View Code Duplication
    foreach ($words as $word) {
3429
3430 1
      if (!$word) {
3431 1
        continue;
3432
      }
3433
3434
      if (
3435
          $useExceptions === false
3436 1
          ||
3437
          (
3438
              $useExceptions === true
3439 1
              &&
3440 1
              !in_array($word, $exceptions, true)
3441 1
          )
3442 1
      ) {
3443 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3444 1
      }
3445
3446 1
      $newWords[] = $word;
3447 1
    }
3448
3449 1
    return implode('', $newWords);
3450
  }
3451
3452
  /**
3453
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3454
   *
3455
   * @param string $str   <p>The string to be trimmed</p>
3456
   * @param string $chars <p>Optional characters to be stripped</p>
3457
   *
3458
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3459
   */
3460 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3461
  {
3462 24
    $str = (string)$str;
3463
3464 24
    if (!isset($str[0])) {
3465 2
      return '';
3466
    }
3467
3468
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3469 23
    if ($chars === INF || !$chars) {
3470 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3471
    }
3472
3473 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3474
  }
3475
3476
  /**
3477
   * Returns the UTF-8 character with the maximum code point in the given data.
3478
   *
3479
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3480
   *
3481
   * @return string <p>The character with the highest code point than others.</p>
3482
   */
3483 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3484
  {
3485 1
    if (is_array($arg) === true) {
3486 1
      $arg = implode('', $arg);
3487 1
    }
3488
3489 1
    return self::chr(max(self::codepoints($arg)));
3490
  }
3491
3492
  /**
3493
   * Calculates and returns the maximum number of bytes taken by any
3494
   * UTF-8 encoded character in the given string.
3495
   *
3496
   * @param string $str <p>The original Unicode string.</p>
3497
   *
3498
   * @return int <p>Max byte lengths of the given chars.</p>
3499
   */
3500 1
  public static function max_chr_width($str)
3501
  {
3502 1
    $bytes = self::chr_size_list($str);
3503 1
    if (count($bytes) > 0) {
3504 1
      return (int)max($bytes);
3505
    }
3506
3507 1
    return 0;
3508
  }
3509
3510
  /**
3511
   * Checks whether mbstring is available on the server.
3512
   *
3513
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3514
   */
3515 15
  public static function mbstring_loaded()
3516
  {
3517 15
    $return = extension_loaded('mbstring') ? true : false;
3518
3519 15
    if ($return === true) {
3520 15
      \mb_internal_encoding('UTF-8');
3521 15
    }
3522
3523 15
    return $return;
3524
  }
3525
3526
  /**
3527
   * Returns the UTF-8 character with the minimum code point in the given data.
3528
   *
3529
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3530
   *
3531
   * @return string <p>The character with the lowest code point than others.</p>
3532
   */
3533 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3534
  {
3535 1
    if (is_array($arg) === true) {
3536 1
      $arg = implode('', $arg);
3537 1
    }
3538
3539 1
    return self::chr(min(self::codepoints($arg)));
3540
  }
3541
3542
  /**
3543
   * alias for "UTF8::normalize_encoding()"
3544
   *
3545
   * @see UTF8::normalize_encoding()
3546
   *
3547
   * @param string $encoding
3548
   * @param mixed  $fallback
3549
   *
3550
   * @return string
3551
   *
3552
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3553
   */
3554
  public static function normalizeEncoding($encoding, $fallback = false)
3555
  {
3556
    return self::normalize_encoding($encoding, $fallback);
3557
  }
3558
3559
  /**
3560
   * Normalize the encoding-"name" input.
3561
   *
3562
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3563
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3564
   *
3565
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3566
   */
3567 77
  public static function normalize_encoding($encoding, $fallback = false)
3568
  {
3569 77
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3570
3571 77
    if (!$encoding) {
3572 3
      return $fallback;
3573
    }
3574
3575 76
    if ('UTF-8' === $encoding) {
3576 1
      return $encoding;
3577
    }
3578
3579 76
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3580 6
      return $encoding;
3581
    }
3582
3583 75
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3584 74
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3585
    }
3586
3587 5
    $encodingOrig = $encoding;
3588 5
    $encoding = strtoupper($encoding);
3589 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3590
3591
    $equivalences = array(
3592 5
        'ISO8859'     => 'ISO-8859-1',
3593 5
        'ISO88591'    => 'ISO-8859-1',
3594 5
        'ISO'         => 'ISO-8859-1',
3595 5
        'LATIN'       => 'ISO-8859-1',
3596 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3597 5
        'ISO88592'    => 'ISO-8859-2',
3598 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3599 5
        'ISO88593'    => 'ISO-8859-3',
3600 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3601 5
        'ISO88594'    => 'ISO-8859-4',
3602 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3603 5
        'ISO88595'    => 'ISO-8859-5',
3604 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3605 5
        'ISO88597'    => 'ISO-8859-7',
3606 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3607 5
        'ISO88599'    => 'ISO-8859-9',
3608 5
        'LATIN5'      => 'ISO-8859-9', // Turkish
3609 5
        'ISO885911'   => 'ISO-8859-11',
3610 5
        'TIS620'      => 'ISO-8859-11', // Thai
3611 5
        'ISO885910'   => 'ISO-8859-10',
3612 5
        'LATIN6'      => 'ISO-8859-10', // Nordic
3613 5
        'ISO885913'   => 'ISO-8859-13',
3614 5
        'LATIN7'      => 'ISO-8859-13', // Baltic
3615 5
        'ISO885914'   => 'ISO-8859-14',
3616 5
        'LATIN8'      => 'ISO-8859-14', // Celtic
3617 5
        'ISO885915'   => 'ISO-8859-15',
3618 5
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3619 5
        'ISO885916'   => 'ISO-8859-16',
3620 5
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3621 5
        'CP1250'      => 'WINDOWS-1250',
3622 5
        'WIN1250'     => 'WINDOWS-1250',
3623 5
        'WINDOWS1250' => 'WINDOWS-1250',
3624 5
        'CP1251'      => 'WINDOWS-1251',
3625 5
        'WIN1251'     => 'WINDOWS-1251',
3626 5
        'WINDOWS1251' => 'WINDOWS-1251',
3627 5
        'CP1252'      => 'WINDOWS-1252',
3628 5
        'WIN1252'     => 'WINDOWS-1252',
3629 5
        'WINDOWS1252' => 'WINDOWS-1252',
3630 5
        'CP1253'      => 'WINDOWS-1253',
3631 5
        'WIN1253'     => 'WINDOWS-1253',
3632 5
        'WINDOWS1253' => 'WINDOWS-1253',
3633 5
        'CP1254'      => 'WINDOWS-1254',
3634 5
        'WIN1254'     => 'WINDOWS-1254',
3635 5
        'WINDOWS1254' => 'WINDOWS-1254',
3636 5
        'CP1255'      => 'WINDOWS-1255',
3637 5
        'WIN1255'     => 'WINDOWS-1255',
3638 5
        'WINDOWS1255' => 'WINDOWS-1255',
3639 5
        'CP1256'      => 'WINDOWS-1256',
3640 5
        'WIN1256'     => 'WINDOWS-1256',
3641 5
        'WINDOWS1256' => 'WINDOWS-1256',
3642 5
        'CP1257'      => 'WINDOWS-1257',
3643 5
        'WIN1257'     => 'WINDOWS-1257',
3644 5
        'WINDOWS1257' => 'WINDOWS-1257',
3645 5
        'CP1258'      => 'WINDOWS-1258',
3646 5
        'WIN1258'     => 'WINDOWS-1258',
3647 5
        'WINDOWS1258' => 'WINDOWS-1258',
3648 5
        'UTF16'       => 'UTF-16',
3649 5
        'UTF32'       => 'UTF-32',
3650 5
        'UTF8'        => 'UTF-8',
3651 5
        'UTF'         => 'UTF-8',
3652 5
        'UTF7'        => 'UTF-7',
3653 5
        '8BIT'        => 'CP850',
3654 5
        'BINARY'      => 'CP850',
3655 5
    );
3656
3657 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3658 5
      $encoding = $equivalences[$encodingUpperHelper];
3659 5
    }
3660
3661 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3662
3663 5
    return $encoding;
3664
  }
3665
3666
  /**
3667
   * Normalize some MS Word special characters.
3668
   *
3669
   * @param string $str <p>The string to be normalized.</p>
3670
   *
3671
   * @return string
3672
   */
3673 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3674
  {
3675 16
    $str = (string)$str;
3676
3677 16
    if (!isset($str[0])) {
3678 1
      return '';
3679
    }
3680
3681 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3682 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3683
3684 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3685 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3686 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3687 1
    }
3688
3689 16
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3690
  }
3691
3692
  /**
3693
   * Normalize the whitespace.
3694
   *
3695
   * @param string $str                     <p>The string to be normalized.</p>
3696
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3697
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3698
   *                                        bidirectional text chars.</p>
3699
   *
3700
   * @return string
3701
   */
3702 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3703
  {
3704 37
    $str = (string)$str;
3705
3706 37
    if (!isset($str[0])) {
3707 4
      return '';
3708
    }
3709
3710 37
    static $WHITESPACE_CACHE = array();
3711 37
    $cacheKey = (int)$keepNonBreakingSpace;
3712
3713 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3714
3715 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3716
3717 2
      if ($keepNonBreakingSpace === true) {
3718
        /** @noinspection OffsetOperationsInspection */
3719 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3720 1
      }
3721
3722 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3723 2
    }
3724
3725 37
    if ($keepBidiUnicodeControls === false) {
3726 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3727
3728 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3729 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3730 1
      }
3731
3732 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3733 37
    }
3734
3735 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3736
  }
3737
3738
  /**
3739
   * Strip all whitespace characters. This includes tabs and newline
3740
   * characters, as well as multibyte whitespace such as the thin space
3741
   * and ideographic space.
3742
   *
3743
   * @param string $str
3744
   *
3745
   * @return string
3746
   */
3747 12
  public static function strip_whitespace($str)
3748
  {
3749 12
    $str = (string)$str;
3750
3751 12
    if (!isset($str[0])) {
3752 1
      return '';
3753
    }
3754
3755 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3756
  }
3757
3758
  /**
3759
   * Format a number with grouped thousands.
3760
   *
3761
   * @param float  $number
3762
   * @param int    $decimals
3763
   * @param string $dec_point
3764
   * @param string $thousands_sep
3765
   *
3766
   * @return string
3767
   *
3768
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3769
   */
3770
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3771
  {
3772
    $thousands_sep = (string)$thousands_sep;
3773
    $dec_point = (string)$dec_point;
3774
    $number = (float)$number;
3775
3776
    if (
3777
        isset($thousands_sep[1], $dec_point[1])
3778
        &&
3779
        Bootup::is_php('5.4') === true
3780
    ) {
3781
      return str_replace(
3782
          array(
3783
              '.',
3784
              ',',
3785
          ),
3786
          array(
3787
              $dec_point,
3788
              $thousands_sep,
3789
          ),
3790
          number_format($number, $decimals, '.', ',')
3791
      );
3792
    }
3793
3794
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3795
  }
3796
3797
  /**
3798
   * Calculates Unicode code point of the given UTF-8 encoded character.
3799
   *
3800
   * INFO: opposite to UTF8::chr()
3801
   *
3802
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3803
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3804
   *
3805
   * @return int <p>
3806
   *             Unicode code point of the given character,<br>
3807
   *             0 on invalid UTF-8 byte sequence.
3808
   *             </p>
3809
   */
3810 23
  public static function ord($chr, $encoding = 'UTF-8')
3811
  {
3812
    // init
3813 23
    static $CHAR_CACHE = array();
3814 23
    $encoding = (string)$encoding;
3815
3816
    // save the original string
3817 23
    $chr_orig = $chr;
3818
3819 23
    if ($encoding !== 'UTF-8') {
3820 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3821
3822
      // check again, if it's still not UTF-8
3823
      /** @noinspection NotOptimalIfConditionsInspection */
3824 1
      if ($encoding !== 'UTF-8') {
3825 1
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3826 1
      }
3827 1
    }
3828
3829 23
    $cacheKey = $chr_orig . $encoding;
3830 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3831 23
      return $CHAR_CACHE[$cacheKey];
3832
    }
3833
3834 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3835
      self::checkForSupport();
3836
    }
3837
3838 10
    if (self::$SUPPORT['intlChar'] === true) {
3839
      $code = \IntlChar::ord($chr);
3840
      if ($code) {
3841
        return $CHAR_CACHE[$cacheKey] = $code;
3842
      }
3843
    }
3844
3845
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3846 10
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3847 10
    $code = $chr ? $chr[1] : 0;
3848
3849 10
    if (0xF0 <= $code && isset($chr[4])) {
3850 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3851
    }
3852
3853 10
    if (0xE0 <= $code && isset($chr[3])) {
3854 4
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3855
    }
3856
3857 10
    if (0xC0 <= $code && isset($chr[2])) {
3858 6
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3859
    }
3860
3861 10
    return $CHAR_CACHE[$cacheKey] = $code;
3862
  }
3863
3864
  /**
3865
   * Parses the string into an array (into the the second parameter).
3866
   *
3867
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3868
   *          if the second parameter is not set!
3869
   *
3870
   * @link http://php.net/manual/en/function.parse-str.php
3871
   *
3872
   * @param string  $str       <p>The input string.</p>
3873
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3874
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3875
   *
3876
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3877
   */
3878 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3879
  {
3880 1
    if ($cleanUtf8 === true) {
3881 1
      $str = self::clean($str);
3882 1
    }
3883
3884
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3885 1
    $return = \mb_parse_str($str, $result);
3886 1
    if ($return === false || empty($result)) {
3887 1
      return false;
3888
    }
3889
3890 1
    return true;
3891
  }
3892
3893
  /**
3894
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3895
   *
3896
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3897
   */
3898 58
  public static function pcre_utf8_support()
3899
  {
3900
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3901 58
    return (bool)@preg_match('//u', '');
3902
  }
3903
3904
  /**
3905
   * Create an array containing a range of UTF-8 characters.
3906
   *
3907
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3908
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3909
   *
3910
   * @return array
3911
   */
3912 1
  public static function range($var1, $var2)
3913
  {
3914 1
    if (!$var1 || !$var2) {
3915 1
      return array();
3916
    }
3917
3918 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3919 1
      $start = (int)$var1;
3920 1
    } elseif (ctype_xdigit($var1)) {
3921
      $start = (int)self::hex_to_int($var1);
3922
    } else {
3923 1
      $start = self::ord($var1);
3924
    }
3925
3926 1
    if (!$start) {
3927
      return array();
3928
    }
3929
3930 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3931 1
      $end = (int)$var2;
3932 1
    } elseif (ctype_xdigit($var2)) {
3933
      $end = (int)self::hex_to_int($var2);
3934
    } else {
3935 1
      $end = self::ord($var2);
3936
    }
3937
3938 1
    if (!$end) {
3939
      return array();
3940
    }
3941
3942 1
    return array_map(
3943
        array(
3944 1
            '\\voku\\helper\\UTF8',
3945 1
            'chr',
3946 1
        ),
3947 1
        range($start, $end)
3948 1
    );
3949
  }
3950
3951
  /**
3952
   * Multi decode html entity & fix urlencoded-win1252-chars.
3953
   *
3954
   * e.g:
3955
   * 'test+test'                     => 'test+test'
3956
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3957
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3958
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3959
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3960
   * 'Düsseldorf'                   => 'Düsseldorf'
3961
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3962
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3963
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3964
   *
3965
   * @param string $str          <p>The input string.</p>
3966
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3967
   *
3968
   * @return string
3969
   */
3970 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3971
  {
3972 2
    $str = (string)$str;
3973
3974 2
    if (!isset($str[0])) {
3975 1
      return '';
3976
    }
3977
3978 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3979 2
    if (preg_match($pattern, $str)) {
3980 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3981 1
    }
3982
3983 2
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3984
3985
    do {
3986 2
      $str_compare = $str;
3987
3988 2
      $str = self::fix_simple_utf8(
3989 2
          rawurldecode(
3990 2
              self::html_entity_decode(
3991 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3992
                  $flags
3993 2
              )
3994 2
          )
3995 2
      );
3996
3997 2
    } while ($multi_decode === true && $str_compare !== $str);
3998
3999 2
    return (string)$str;
4000
  }
4001
4002
  /**
4003
   * alias for "UTF8::remove_bom()"
4004
   *
4005
   * @see UTF8::remove_bom()
4006
   *
4007
   * @param string $str
4008
   *
4009
   * @return string
4010
   *
4011
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4012
   */
4013
  public static function removeBOM($str)
4014
  {
4015
    return self::remove_bom($str);
4016
  }
4017
4018
  /**
4019
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4020
   *
4021
   * @param string $str <p>The input string.</p>
4022
   *
4023
   * @return string <p>String without UTF-BOM</p>
4024
   */
4025 40
  public static function remove_bom($str)
4026
  {
4027 40
    $str = (string)$str;
4028
4029 40
    if (!isset($str[0])) {
4030 5
      return '';
4031
    }
4032
4033 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
4034 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4035 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4036 5
        if ($strTmp === false) {
4037
          $strTmp = '';
4038
        }
4039 5
        $str = (string)$strTmp;
4040 5
      }
4041 40
    }
4042
4043 40
    return $str;
4044
  }
4045
4046
  /**
4047
   * Removes duplicate occurrences of a string in another string.
4048
   *
4049
   * @param string          $str  <p>The base string.</p>
4050
   * @param string|string[] $what <p>String to search for in the base string.</p>
4051
   *
4052
   * @return string <p>The result string with removed duplicates.</p>
4053
   */
4054 1
  public static function remove_duplicates($str, $what = ' ')
4055
  {
4056 1
    if (is_string($what) === true) {
4057 1
      $what = array($what);
4058 1
    }
4059
4060 1
    if (is_array($what) === true) {
4061
      /** @noinspection ForeachSourceInspection */
4062 1
      foreach ($what as $item) {
4063 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4064 1
      }
4065 1
    }
4066
4067 1
    return $str;
4068
  }
4069
4070
  /**
4071
   * Remove invisible characters from a string.
4072
   *
4073
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4074
   *
4075
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4076
   *
4077
   * @param string $str
4078
   * @param bool   $url_encoded
4079
   * @param string $replacement
4080
   *
4081
   * @return string
4082
   */
4083 57
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4084
  {
4085
    // init
4086 57
    $non_displayables = array();
4087
4088
    // every control character except newline (dec 10),
4089
    // carriage return (dec 13) and horizontal tab (dec 09)
4090 57
    if ($url_encoded) {
4091 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4092 57
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4093 57
    }
4094
4095 57
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4096
4097
    do {
4098 57
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4099 57
    } while ($count !== 0);
4100
4101 57
    return $str;
4102
  }
4103
4104
  /**
4105
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4106
   *
4107
   * @param string $str                <p>The input string</p>
4108
   * @param string $replacementChar    <p>The replacement character.</p>
4109
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4110
   *
4111
   * @return string
4112
   */
4113 57
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4114
  {
4115 57
    $str = (string)$str;
4116
4117 57
    if (!isset($str[0])) {
4118 4
      return '';
4119
    }
4120
4121 57
    if ($processInvalidUtf8 === true) {
4122 57
      $replacementCharHelper = $replacementChar;
4123 57
      if ($replacementChar === '') {
4124 57
        $replacementCharHelper = 'none';
4125 57
      }
4126
4127 57
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4128
        self::checkForSupport();
4129
      }
4130
4131 57
      $save = \mb_substitute_character();
4132 57
      \mb_substitute_character($replacementCharHelper);
4133 57
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4134 57
      \mb_substitute_character($save);
4135 57
    }
4136
4137 57
    return str_replace(
4138
        array(
4139 57
            "\xEF\xBF\xBD",
4140 57
            '�',
4141 57
        ),
4142
        array(
4143 57
            $replacementChar,
4144 57
            $replacementChar,
4145 57
        ),
4146
        $str
4147 57
    );
4148
  }
4149
4150
  /**
4151
   * Strip whitespace or other characters from end of a UTF-8 string.
4152
   *
4153
   * @param string $str   <p>The string to be trimmed.</p>
4154
   * @param string $chars <p>Optional characters to be stripped.</p>
4155
   *
4156
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4157
   */
4158 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4159
  {
4160 23
    $str = (string)$str;
4161
4162 23
    if (!isset($str[0])) {
4163 5
      return '';
4164
    }
4165
4166
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4167 19
    if ($chars === INF || !$chars) {
4168 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4169
    }
4170
4171 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4172
  }
4173
4174
  /**
4175
   * rxClass
4176
   *
4177
   * @param string $s
4178
   * @param string $class
4179
   *
4180
   * @return string
4181
   */
4182 60
  private static function rxClass($s, $class = '')
4183
  {
4184 60
    static $RX_CLASSS_CACHE = array();
4185
4186 60
    $cacheKey = $s . $class;
4187
4188 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4189 48
      return $RX_CLASSS_CACHE[$cacheKey];
4190
    }
4191
4192
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4193 19
    $class = array($class);
4194
4195
    /** @noinspection SuspiciousLoopInspection */
4196 19
    foreach (self::str_split($s) as $s) {
4197 18
      if ('-' === $s) {
4198
        $class[0] = '-' . $class[0];
4199 18
      } elseif (!isset($s[2])) {
4200 18
        $class[0] .= preg_quote($s, '/');
4201 18
      } elseif (1 === self::strlen($s)) {
4202 2
        $class[0] .= $s;
4203 2
      } else {
4204
        $class[] = $s;
4205
      }
4206 19
    }
4207
4208 19
    if ($class[0]) {
4209 19
      $class[0] = '[' . $class[0] . ']';
4210 19
    }
4211
4212 19
    if (1 === count($class)) {
4213 19
      $return = $class[0];
4214 19
    } else {
4215
      $return = '(?:' . implode('|', $class) . ')';
4216
    }
4217
4218 19
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4219
4220 19
    return $return;
4221
  }
4222
4223
  /**
4224
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4225
   */
4226
  public static function showSupport()
4227
  {
4228
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4229
      self::checkForSupport();
4230
    }
4231
4232
    foreach (self::$SUPPORT as $utf8Support) {
4233
      echo $utf8Support . "\n<br>";
4234
    }
4235
  }
4236
4237
  /**
4238
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4239
   *
4240
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4241
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4242
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4243
   *
4244
   * @return string <p>The HTML numbered entity.</p>
4245
   */
4246 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4247
  {
4248 1
    $char = (string)$char;
4249
4250 1
    if (!isset($char[0])) {
4251 1
      return '';
4252
    }
4253
4254
    if (
4255
        $keepAsciiChars === true
4256 1
        &&
4257 1
        self::is_ascii($char) === true
4258 1
    ) {
4259 1
      return $char;
4260
    }
4261
4262 1
    if ($encoding !== 'UTF-8') {
4263
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4264
    }
4265
4266 1
    return '&#' . self::ord($char, $encoding) . ';';
4267
  }
4268
4269
  /**
4270
   * Convert a string to an array of Unicode characters.
4271
   *
4272
   * @param string  $str       <p>The string to split into array.</p>
4273
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4274
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4275
   *
4276
   * @return string[] <p>An array containing chunks of the string.</p>
4277
   */
4278 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4279
  {
4280 39
    $str = (string)$str;
4281
4282 39
    if (!isset($str[0])) {
4283 3
      return array();
4284
    }
4285
4286
    // init
4287 38
    $ret = array();
4288
4289 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4290
      self::checkForSupport();
4291
    }
4292
4293 38
    if ($cleanUtf8 === true) {
4294 7
      $str = self::clean($str);
4295 7
    }
4296
4297 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4298
4299 38
      preg_match_all('/./us', $str, $retArray);
4300 38
      if (isset($retArray[0])) {
4301 38
        $ret = $retArray[0];
4302 38
      }
4303 38
      unset($retArray);
4304
4305 38
    } else {
4306
4307
      // fallback
4308
4309
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4310
        self::checkForSupport();
4311
      }
4312
4313 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4314
        $len = \mb_strlen($str, '8BIT');
4315
      } else {
4316
        $len = strlen($str);
4317
      }
4318
4319
      /** @noinspection ForeachInvariantsInspection */
4320
      for ($i = 0; $i < $len; $i++) {
4321
4322
        if (($str[$i] & "\x80") === "\x00") {
4323
4324
          $ret[] = $str[$i];
4325
4326
        } elseif (
4327
            isset($str[$i + 1])
4328
            &&
4329
            ($str[$i] & "\xE0") === "\xC0"
4330
        ) {
4331
4332
          if (($str[$i + 1] & "\xC0") === "\x80") {
4333
            $ret[] = $str[$i] . $str[$i + 1];
4334
4335
            $i++;
4336
          }
4337
4338 View Code Duplication
        } elseif (
4339
            isset($str[$i + 2])
4340
            &&
4341
            ($str[$i] & "\xF0") === "\xE0"
4342
        ) {
4343
4344
          if (
4345
              ($str[$i + 1] & "\xC0") === "\x80"
4346
              &&
4347
              ($str[$i + 2] & "\xC0") === "\x80"
4348
          ) {
4349
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4350
4351
            $i += 2;
4352
          }
4353
4354
        } elseif (
4355
            isset($str[$i + 3])
4356
            &&
4357
            ($str[$i] & "\xF8") === "\xF0"
4358
        ) {
4359
4360 View Code Duplication
          if (
4361
              ($str[$i + 1] & "\xC0") === "\x80"
4362
              &&
4363
              ($str[$i + 2] & "\xC0") === "\x80"
4364
              &&
4365
              ($str[$i + 3] & "\xC0") === "\x80"
4366
          ) {
4367
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4368
4369
            $i += 3;
4370
          }
4371
4372
        }
4373
      }
4374
    }
4375
4376 38
    if ($length > 1) {
4377 5
      $ret = array_chunk($ret, $length);
4378
4379 5
      return array_map(
4380
          function ($item) {
4381 5
            return implode('', $item);
4382 5
          }, $ret
4383 5
      );
4384
    }
4385
4386 34
    if (isset($ret[0]) && $ret[0] === '') {
4387
      return array();
4388
    }
4389
4390 34
    return $ret;
4391
  }
4392
4393
  /**
4394
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4395
   *
4396
   * @param string $str <p>The input string.</p>
4397
   *
4398
   * @return false|string <p>
4399
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4400
   *                      otherwise it will return false.
4401
   *                      </p>
4402
   */
4403 13
  public static function str_detect_encoding($str)
4404
  {
4405
    //
4406
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4407
    //
4408
4409 13
    if (self::is_binary($str) === true) {
4410
4411 3
      if (self::is_utf16($str) === 1) {
4412 1
        return 'UTF-16LE';
4413
      }
4414
4415 3
      if (self::is_utf16($str) === 2) {
4416 2
        return 'UTF-16BE';
4417
      }
4418
4419 1
      if (self::is_utf32($str) === 1) {
4420
        return 'UTF-32LE';
4421
      }
4422
4423 1
      if (self::is_utf32($str) === 2) {
4424
        return 'UTF-32BE';
4425
      }
4426
4427 1
    }
4428
4429
    //
4430
    // 2.) simple check for ASCII chars
4431
    //
4432
4433 12
    if (self::is_ascii($str) === true) {
4434 4
      return 'ASCII';
4435
    }
4436
4437
    //
4438
    // 3.) simple check for UTF-8 chars
4439
    //
4440
4441 12
    if (self::is_utf8($str) === true) {
4442 10
      return 'UTF-8';
4443
    }
4444
4445
    //
4446
    // 4.) check via "\mb_detect_encoding()"
4447
    //
4448
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4449
4450
    $detectOrder = array(
4451 6
        'ISO-8859-1',
4452 6
        'ISO-8859-2',
4453 6
        'ISO-8859-3',
4454 6
        'ISO-8859-4',
4455 6
        'ISO-8859-5',
4456 6
        'ISO-8859-6',
4457 6
        'ISO-8859-7',
4458 6
        'ISO-8859-8',
4459 6
        'ISO-8859-9',
4460 6
        'ISO-8859-10',
4461 6
        'ISO-8859-13',
4462 6
        'ISO-8859-14',
4463 6
        'ISO-8859-15',
4464 6
        'ISO-8859-16',
4465 6
        'WINDOWS-1251',
4466 6
        'WINDOWS-1252',
4467 6
        'WINDOWS-1254',
4468 6
        'ISO-2022-JP',
4469 6
        'JIS',
4470 6
        'EUC-JP',
4471 6
    );
4472
4473 6
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4474 6
    if ($encoding) {
4475 6
      return $encoding;
4476
    }
4477
4478
    //
4479
    // 5.) check via "iconv()"
4480
    //
4481
4482
    $md5 = md5($str);
4483
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4484
      # INFO: //IGNORE and //TRANSLIT still throw notice
4485
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4486
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4487
        return $encodingTmp;
4488
      }
4489
    }
4490
4491
    return false;
4492
  }
4493
4494
  /**
4495
   * Check if the string ends with the given substring.
4496
   *
4497
   * @param string $haystack <p>The string to search in.</p>
4498
   * @param string $needle   <p>The substring to search for.</p>
4499
   *
4500
   * @return bool
4501
   */
4502 2
  public static function str_ends_with($haystack, $needle)
4503
  {
4504 2
    $haystack = (string)$haystack;
4505 2
    $needle = (string)$needle;
4506
4507 2
    if (!isset($haystack[0], $needle[0])) {
4508 1
      return false;
4509
    }
4510
4511 2
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4512 2
    if ($haystackSub === false) {
4513
      return false;
4514
    }
4515
4516 2
    if ($needle === $haystackSub) {
4517 2
      return true;
4518
    }
4519
4520 2
    return false;
4521
  }
4522
4523
  /**
4524
   * Check if the string ends with the given substring, case insensitive.
4525
   *
4526
   * @param string $haystack <p>The string to search in.</p>
4527
   * @param string $needle   <p>The substring to search for.</p>
4528
   *
4529
   * @return bool
4530
   */
4531 2 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4532
  {
4533 2
    $haystack = (string)$haystack;
4534 2
    $needle = (string)$needle;
4535
4536 2
    if (!isset($haystack[0], $needle[0])) {
4537 1
      return false;
4538
    }
4539
4540 2
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4541 2
      return true;
4542
    }
4543
4544 2
    return false;
4545
  }
4546
4547
  /**
4548
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4549
   *
4550
   * @link  http://php.net/manual/en/function.str-ireplace.php
4551
   *
4552
   * @param mixed $search  <p>
4553
   *                       Every replacement with search array is
4554
   *                       performed on the result of previous replacement.
4555
   *                       </p>
4556
   * @param mixed $replace <p>
4557
   *                       </p>
4558
   * @param mixed $subject <p>
4559
   *                       If subject is an array, then the search and
4560
   *                       replace is performed with every entry of
4561
   *                       subject, and the return value is an array as
4562
   *                       well.
4563
   *                       </p>
4564
   * @param int   $count   [optional] <p>
4565
   *                       The number of matched and replaced needles will
4566
   *                       be returned in count which is passed by
4567
   *                       reference.
4568
   *                       </p>
4569
   *
4570
   * @return mixed <p>A string or an array of replacements.</p>
4571
   */
4572 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4573
  {
4574 26
    $search = (array)$search;
4575
4576
    /** @noinspection AlterInForeachInspection */
4577 26
    foreach ($search as &$s) {
4578 26
      if ('' === $s .= '') {
4579 2
        $s = '/^(?<=.)$/';
4580 2
      } else {
4581 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4582
      }
4583 26
    }
4584
4585 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4586 26
    $count = $replace; // used as reference parameter
4587
4588 26
    return $subject;
4589
  }
4590
4591
  /**
4592
   * Check if the string starts with the given substring, case insensitive.
4593
   *
4594
   * @param string $haystack <p>The string to search in.</p>
4595
   * @param string $needle   <p>The substring to search for.</p>
4596
   *
4597
   * @return bool
4598
   */
4599 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4600
  {
4601 2
    $haystack = (string)$haystack;
4602 2
    $needle = (string)$needle;
4603
4604 2
    if (!isset($haystack[0], $needle[0])) {
4605 1
      return false;
4606
    }
4607
4608 2
    if (self::stripos($haystack, $needle) === 0) {
4609 2
      return true;
4610
    }
4611
4612 2
    return false;
4613
  }
4614
4615
  /**
4616
   * Limit the number of characters in a string, but also after the next word.
4617
   *
4618
   * @param string $str
4619
   * @param int    $length
4620
   * @param string $strAddOn
4621
   *
4622
   * @return string
4623
   */
4624 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4625
  {
4626 1
    $str = (string)$str;
4627
4628 1
    if (!isset($str[0])) {
4629 1
      return '';
4630
    }
4631
4632 1
    $length = (int)$length;
4633
4634 1
    if (self::strlen($str) <= $length) {
4635 1
      return $str;
4636
    }
4637
4638 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4639 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4640
    }
4641
4642 1
    $str = (string)self::substr($str, 0, $length);
4643 1
    $array = explode(' ', $str);
4644 1
    array_pop($array);
4645 1
    $new_str = implode(' ', $array);
4646
4647 1
    if ($new_str === '') {
4648 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4649 1
    } else {
4650 1
      $str = $new_str . $strAddOn;
4651
    }
4652
4653 1
    return $str;
4654
  }
4655
4656
  /**
4657
   * Pad a UTF-8 string to given length with another string.
4658
   *
4659
   * @param string $str        <p>The input string.</p>
4660
   * @param int    $pad_length <p>The length of return string.</p>
4661
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4662
   * @param int    $pad_type   [optional] <p>
4663
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4664
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4665
   *                           </p>
4666
   *
4667
   * @return string <strong>Returns the padded string</strong>
4668
   */
4669 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4670
  {
4671 2
    $str_length = self::strlen($str);
4672
4673
    if (
4674 2
        is_int($pad_length) === true
4675 2
        &&
4676
        $pad_length > 0
4677 2
        &&
4678
        $pad_length >= $str_length
4679 2
    ) {
4680 2
      $ps_length = self::strlen($pad_string);
4681
4682 2
      $diff = $pad_length - $str_length;
4683
4684
      switch ($pad_type) {
4685 2 View Code Duplication
        case STR_PAD_LEFT:
4686 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4687 2
          $pre = (string)self::substr($pre, 0, $diff);
4688 2
          $post = '';
4689 2
          break;
4690
4691 2
        case STR_PAD_BOTH:
4692 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4693 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4694 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4695 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4696 2
          break;
4697
4698 2
        case STR_PAD_RIGHT:
4699 2 View Code Duplication
        default:
4700 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4701 2
          $post = (string)self::substr($post, 0, $diff);
4702 2
          $pre = '';
4703 2
      }
4704
4705 2
      return $pre . $str . $post;
4706
    }
4707
4708 2
    return $str;
4709
  }
4710
4711
  /**
4712
   * Repeat a string.
4713
   *
4714
   * @param string $str        <p>
4715
   *                           The string to be repeated.
4716
   *                           </p>
4717
   * @param int    $multiplier <p>
4718
   *                           Number of time the input string should be
4719
   *                           repeated.
4720
   *                           </p>
4721
   *                           <p>
4722
   *                           multiplier has to be greater than or equal to 0.
4723
   *                           If the multiplier is set to 0, the function
4724
   *                           will return an empty string.
4725
   *                           </p>
4726
   *
4727
   * @return string <p>The repeated string.</p>
4728
   */
4729 1
  public static function str_repeat($str, $multiplier)
4730
  {
4731 1
    $str = self::filter($str);
4732
4733 1
    return str_repeat($str, $multiplier);
4734
  }
4735
4736
  /**
4737
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4738
   *
4739
   * Replace all occurrences of the search string with the replacement string
4740
   *
4741
   * @link http://php.net/manual/en/function.str-replace.php
4742
   *
4743
   * @param mixed $search  <p>
4744
   *                       The value being searched for, otherwise known as the needle.
4745
   *                       An array may be used to designate multiple needles.
4746
   *                       </p>
4747
   * @param mixed $replace <p>
4748
   *                       The replacement value that replaces found search
4749
   *                       values. An array may be used to designate multiple replacements.
4750
   *                       </p>
4751
   * @param mixed $subject <p>
4752
   *                       The string or array being searched and replaced on,
4753
   *                       otherwise known as the haystack.
4754
   *                       </p>
4755
   *                       <p>
4756
   *                       If subject is an array, then the search and
4757
   *                       replace is performed with every entry of
4758
   *                       subject, and the return value is an array as
4759
   *                       well.
4760
   *                       </p>
4761
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4762
   *
4763
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4764
   */
4765 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4766
  {
4767 12
    return str_replace($search, $replace, $subject, $count);
4768
  }
4769
4770
  /**
4771
   * Replace the first "$search"-term with the "$replace"-term.
4772
   *
4773
   * @param string $search
4774
   * @param string $replace
4775
   * @param string $subject
4776
   *
4777
   * @return string
4778
   */
4779 1
  public static function str_replace_first($search, $replace, $subject)
4780
  {
4781 1
    $pos = self::strpos($subject, $search);
4782
4783 1
    if ($pos !== false) {
4784 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4785
    }
4786
4787 1
    return $subject;
4788
  }
4789
4790
  /**
4791
   * Shuffles all the characters in the string.
4792
   *
4793
   * @param string $str <p>The input string</p>
4794
   *
4795
   * @return string <p>The shuffled string.</p>
4796
   */
4797 1
  public static function str_shuffle($str)
4798
  {
4799 1
    $array = self::split($str);
4800
4801 1
    shuffle($array);
4802
4803 1
    return implode('', $array);
4804
  }
4805
4806
  /**
4807
   * Sort all characters according to code points.
4808
   *
4809
   * @param string $str    <p>A UTF-8 string.</p>
4810
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4811
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4812
   *
4813
   * @return string <p>String of sorted characters.</p>
4814
   */
4815 1
  public static function str_sort($str, $unique = false, $desc = false)
4816
  {
4817 1
    $array = self::codepoints($str);
4818
4819 1
    if ($unique) {
4820 1
      $array = array_flip(array_flip($array));
4821 1
    }
4822
4823 1
    if ($desc) {
4824 1
      arsort($array);
4825 1
    } else {
4826 1
      asort($array);
4827
    }
4828
4829 1
    return self::string($array);
4830
  }
4831
4832
  /**
4833
   * Split a string into an array.
4834
   *
4835
   * @param string $str
4836
   * @param int    $len
4837
   *
4838
   * @return array
4839
   */
4840 22
  public static function str_split($str, $len = 1)
4841
  {
4842 22
    $str = (string)$str;
4843
4844 22
    if (!isset($str[0])) {
4845 1
      return array();
4846
    }
4847
4848 21
    $len = (int)$len;
4849
4850 21
    if ($len < 1) {
4851
      return str_split($str, $len);
4852
    }
4853
4854
    /** @noinspection PhpInternalEntityUsedInspection */
4855 21
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4856 21
    $a = $a[0];
4857
4858 21
    if ($len === 1) {
4859 21
      return $a;
4860
    }
4861
4862 1
    $arrayOutput = array();
4863 1
    $p = -1;
4864
4865
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4866 1
    foreach ($a as $l => $a) {
4867 1
      if ($l % $len) {
4868 1
        $arrayOutput[$p] .= $a;
4869 1
      } else {
4870 1
        $arrayOutput[++$p] = $a;
4871
      }
4872 1
    }
4873
4874 1
    return $arrayOutput;
4875
  }
4876
4877
  /**
4878
   * Check if the string starts with the given substring.
4879
   *
4880
   * @param string $haystack <p>The string to search in.</p>
4881
   * @param string $needle   <p>The substring to search for.</p>
4882
   *
4883
   * @return bool
4884
   */
4885 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4886
  {
4887 2
    $haystack = (string)$haystack;
4888 2
    $needle = (string)$needle;
4889
4890 2
    if (!isset($haystack[0], $needle[0])) {
4891 1
      return false;
4892
    }
4893
4894 2
    if (self::strpos($haystack, $needle) === 0) {
4895 2
      return true;
4896
    }
4897
4898 2
    return false;
4899
  }
4900
4901
  /**
4902
   * Get a binary representation of a specific string.
4903
   *
4904
   * @param string $str <p>The input string.</p>
4905
   *
4906
   * @return string
4907
   */
4908 1
  public static function str_to_binary($str)
4909
  {
4910 1
    $str = (string)$str;
4911
4912 1
    $value = unpack('H*', $str);
4913
4914 1
    return base_convert($value[1], 16, 2);
4915
  }
4916
4917
  /**
4918
   * Convert a string into an array of words.
4919
   *
4920
   * @param string   $str
4921
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4922
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4923
   * @param null|int $removeShortValues
4924
   *
4925
   * @return array
4926
   */
4927 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4928
  {
4929 10
    $str = (string)$str;
4930
4931 10
    if ($removeShortValues !== null) {
4932 1
      $removeShortValues = (int)$removeShortValues;
4933 1
    }
4934
4935 10
    if (!isset($str[0])) {
4936 2
      if ($removeEmptyValues === true) {
4937
        return array();
4938
      }
4939
4940 2
      return array('');
4941
    }
4942
4943 10
    $charList = self::rxClass($charList, '\pL');
4944
4945 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4946
4947
    if (
4948
        $removeShortValues === null
4949 10
        &&
4950
        $removeEmptyValues === false
4951 10
    ) {
4952 10
      return $return;
4953
    }
4954
4955 1
    $tmpReturn = array();
4956 1
    foreach ($return as $returnValue) {
4957
      if (
4958
          $removeShortValues !== null
4959 1
          &&
4960 1
          self::strlen($returnValue) <= $removeShortValues
4961 1
      ) {
4962 1
        continue;
4963
      }
4964
4965
      if (
4966
          $removeEmptyValues === true
4967 1
          &&
4968 1
          trim($returnValue) === ''
4969 1
      ) {
4970 1
        continue;
4971
      }
4972
4973 1
      $tmpReturn[] = $returnValue;
4974 1
    }
4975
4976 1
    return $tmpReturn;
4977
  }
4978
4979
  /**
4980
   * alias for "UTF8::to_ascii()"
4981
   *
4982
   * @see UTF8::to_ascii()
4983
   *
4984
   * @param string $str
4985
   * @param string $unknown
4986
   * @param bool   $strict
4987
   *
4988
   * @return string
4989
   */
4990 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4991
  {
4992 7
    return self::to_ascii($str, $unknown, $strict);
4993
  }
4994
4995
  /**
4996
   * Counts number of words in the UTF-8 string.
4997
   *
4998
   * @param string $str      <p>The input string.</p>
4999
   * @param int    $format   [optional] <p>
5000
   *                         <strong>0</strong> => return a number of words (default)<br>
5001
   *                         <strong>1</strong> => return an array of words<br>
5002
   *                         <strong>2</strong> => return an array of words with word-offset as key
5003
   *                         </p>
5004
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5005
   *
5006
   * @return array|int <p>The number of words in the string</p>
5007
   */
5008 1
  public static function str_word_count($str, $format = 0, $charlist = '')
5009
  {
5010 1
    $strParts = self::str_to_words($str, $charlist);
5011
5012 1
    $len = count($strParts);
5013
5014 1
    if ($format === 1) {
5015
5016 1
      $numberOfWords = array();
5017 1
      for ($i = 1; $i < $len; $i += 2) {
5018 1
        $numberOfWords[] = $strParts[$i];
5019 1
      }
5020
5021 1
    } elseif ($format === 2) {
5022
5023 1
      $numberOfWords = array();
5024 1
      $offset = self::strlen($strParts[0]);
5025 1
      for ($i = 1; $i < $len; $i += 2) {
5026 1
        $numberOfWords[$offset] = $strParts[$i];
5027 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5028 1
      }
5029
5030 1
    } else {
5031
5032 1
      $numberOfWords = ($len - 1) / 2;
5033
5034
    }
5035
5036 1
    return $numberOfWords;
5037
  }
5038
5039
  /**
5040
   * Case-insensitive string comparison.
5041
   *
5042
   * INFO: Case-insensitive version of UTF8::strcmp()
5043
   *
5044
   * @param string $str1
5045
   * @param string $str2
5046
   *
5047
   * @return int <p>
5048
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5049
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5050
   *             <strong>0</strong> if they are equal.
5051
   *             </p>
5052
   */
5053 11
  public static function strcasecmp($str1, $str2)
5054
  {
5055 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5056
  }
5057
5058
  /**
5059
   * alias for "UTF8::strstr()"
5060
   *
5061
   * @see UTF8::strstr()
5062
   *
5063
   * @param string  $haystack
5064
   * @param string  $needle
5065
   * @param bool    $before_needle
5066
   * @param string  $encoding
5067
   * @param boolean $cleanUtf8
5068
   *
5069
   * @return string|false
5070
   */
5071 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5072
  {
5073 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5074
  }
5075
5076
  /**
5077
   * Case-sensitive string comparison.
5078
   *
5079
   * @param string $str1
5080
   * @param string $str2
5081
   *
5082
   * @return int  <p>
5083
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5084
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5085
   *              <strong>0</strong> if they are equal.
5086
   *              </p>
5087
   */
5088 14
  public static function strcmp($str1, $str2)
5089
  {
5090
    /** @noinspection PhpUndefinedClassInspection */
5091 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5092 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5093 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
5094 14
    );
5095
  }
5096
5097
  /**
5098
   * Find length of initial segment not matching mask.
5099
   *
5100
   * @param string $str
5101
   * @param string $charList
5102
   * @param int    $offset
5103
   * @param int    $length
5104
   *
5105
   * @return int|null
5106
   */
5107 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5108
  {
5109 15
    if ('' === $charList .= '') {
5110 1
      return null;
5111
    }
5112
5113 14 View Code Duplication
    if ($offset || $length !== null) {
5114 2
      $strTmp = self::substr($str, $offset, $length);
5115 2
      if ($strTmp === false) {
5116
        return null;
5117
      }
5118 2
      $str = (string)$strTmp;
5119 2
    }
5120
5121 14
    $str = (string)$str;
5122 14
    if (!isset($str[0])) {
5123 1
      return null;
5124
    }
5125
5126 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5127
      /** @noinspection OffsetOperationsInspection */
5128 13
      return self::strlen($length[1]);
5129
    }
5130
5131 1
    return self::strlen($str);
5132
  }
5133
5134
  /**
5135
   * alias for "UTF8::stristr()"
5136
   *
5137
   * @see UTF8::stristr()
5138
   *
5139
   * @param string  $haystack
5140
   * @param string  $needle
5141
   * @param bool    $before_needle
5142
   * @param string  $encoding
5143
   * @param boolean $cleanUtf8
5144
   *
5145
   * @return string|false
5146
   */
5147 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5148
  {
5149 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5150
  }
5151
5152
  /**
5153
   * Create a UTF-8 string from code points.
5154
   *
5155
   * INFO: opposite to UTF8::codepoints()
5156
   *
5157
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5158
   *
5159
   * @return string <p>UTF-8 encoded string.</p>
5160
   */
5161 2
  public static function string(array $array)
5162
  {
5163 2
    return implode(
5164 2
        '',
5165 2
        array_map(
5166
            array(
5167 2
                '\\voku\\helper\\UTF8',
5168 2
                'chr',
5169 2
            ),
5170
            $array
5171 2
        )
5172 2
    );
5173
  }
5174
5175
  /**
5176
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5177
   *
5178
   * @param string $str <p>The input string.</p>
5179
   *
5180
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5181
   */
5182 3
  public static function string_has_bom($str)
5183
  {
5184 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5185 3
      if (0 === strpos($str, $bomString)) {
5186 3
        return true;
5187
      }
5188 3
    }
5189
5190 3
    return false;
5191
  }
5192
5193
  /**
5194
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5195
   *
5196
   * @link http://php.net/manual/en/function.strip-tags.php
5197
   *
5198
   * @param string  $str            <p>
5199
   *                                The input string.
5200
   *                                </p>
5201
   * @param string  $allowable_tags [optional] <p>
5202
   *                                You can use the optional second parameter to specify tags which should
5203
   *                                not be stripped.
5204
   *                                </p>
5205
   *                                <p>
5206
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5207
   *                                can not be changed with allowable_tags.
5208
   *                                </p>
5209
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5210
   *
5211
   * @return string <p>The stripped string.</p>
5212
   */
5213 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5214
  {
5215 2
    $str = (string)$str;
5216
5217 2
    if (!isset($str[0])) {
5218 1
      return '';
5219
    }
5220
5221 2
    if ($cleanUtf8 === true) {
5222 1
      $str = self::clean($str);
5223 1
    }
5224
5225 2
    return strip_tags($str, $allowable_tags);
5226
  }
5227
5228
  /**
5229
   * Finds position of first occurrence of a string within another, case insensitive.
5230
   *
5231
   * @link http://php.net/manual/en/function.mb-stripos.php
5232
   *
5233
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5234
   * @param string  $needle    <p>The string to find in haystack.</p>
5235
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5236
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5237
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5238
   *
5239
   * @return int|false <p>
5240
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5241
   *                   or false if needle is not found.
5242
   *                   </p>
5243
   */
5244 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5245
  {
5246 10
    $haystack = (string)$haystack;
5247 10
    $needle = (string)$needle;
5248 10
    $offset = (int)$offset;
5249
5250 10
    if (!isset($haystack[0], $needle[0])) {
5251 3
      return false;
5252
    }
5253
5254 9
    if ($cleanUtf8 === true) {
5255
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5256
      // if invalid characters are found in $haystack before $needle
5257 1
      $haystack = self::clean($haystack);
5258 1
      $needle = self::clean($needle);
5259 1
    }
5260
5261 View Code Duplication
    if (
5262
        $encoding === 'UTF-8'
5263 9
        ||
5264 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5265 9
    ) {
5266 9
      $encoding = 'UTF-8';
5267 9
    } else {
5268 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5269
    }
5270
5271 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5272
      self::checkForSupport();
5273
    }
5274
5275
    if (
5276
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5277 9
        &&
5278 9
        self::$SUPPORT['intl'] === true
5279 9
        &&
5280 9
        Bootup::is_php('5.4') === true
5281 9
    ) {
5282 9
      return \grapheme_stripos($haystack, $needle, $offset);
5283
    }
5284
5285
    // fallback to "mb_"-function via polyfill
5286 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5287
  }
5288
5289
  /**
5290
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5291
   *
5292
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5293
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5294
   * @param bool    $before_needle [optional] <p>
5295
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5296
   *                               haystack before the first occurrence of the needle (excluding the needle).
5297
   *                               </p>
5298
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5299
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5300
   *
5301
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5302
   */
5303 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5304
  {
5305 17
    $haystack = (string)$haystack;
5306 17
    $needle = (string)$needle;
5307 17
    $before_needle = (bool)$before_needle;
5308
5309 17
    if (!isset($haystack[0], $needle[0])) {
5310 6
      return false;
5311
    }
5312
5313 11
    if ($encoding !== 'UTF-8') {
5314 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5315 1
    }
5316
5317 11
    if ($cleanUtf8 === true) {
5318
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5319
      // if invalid characters are found in $haystack before $needle
5320 1
      $needle = self::clean($needle);
5321 1
      $haystack = self::clean($haystack);
5322 1
    }
5323
5324 11
    if (!$needle) {
5325
      return $haystack;
5326
    }
5327
5328 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5329
      self::checkForSupport();
5330
    }
5331
5332 View Code Duplication
    if (
5333
        $encoding !== 'UTF-8'
5334 11
        &&
5335 1
        self::$SUPPORT['mbstring'] === false
5336 11
    ) {
5337
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5338
    }
5339
5340 11
    if (self::$SUPPORT['mbstring'] === true) {
5341 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5342
    }
5343
5344
    if (
5345
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5346
        &&
5347
        self::$SUPPORT['intl'] === true
5348
        &&
5349
        Bootup::is_php('5.4') === true
5350
    ) {
5351
      return \grapheme_stristr($haystack, $needle, $before_needle);
5352
    }
5353
5354
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
5355
      return stristr($haystack, $needle, $before_needle);
5356
    }
5357
5358
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5359
5360
    if (!isset($match[1])) {
5361
      return false;
5362
    }
5363
5364
    if ($before_needle) {
5365
      return $match[1];
5366
    }
5367
5368
    return self::substr($haystack, self::strlen($match[1]));
5369
  }
5370
5371
  /**
5372
   * Get the string length, not the byte-length!
5373
   *
5374
   * @link     http://php.net/manual/en/function.mb-strlen.php
5375
   *
5376
   * @param string  $str       <p>The string being checked for length.</p>
5377
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5378
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5379
   *
5380
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5381
   *             character counted as +1)</p>
5382
   */
5383 88
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5384
  {
5385 88
    $str = (string)$str;
5386
5387 88
    if (!isset($str[0])) {
5388 5
      return 0;
5389
    }
5390
5391 View Code Duplication
    if (
5392
        $encoding === 'UTF-8'
5393 87
        ||
5394 13
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5395 87
    ) {
5396 78
      $encoding = 'UTF-8';
5397 78
    } else {
5398 12
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5399
    }
5400
5401 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5402
      self::checkForSupport();
5403
    }
5404
5405
    switch ($encoding) {
5406 87
      case 'ASCII':
5407 87
      case 'CP850':
5408 87
      case '8BIT':
5409
        if (
5410
            $encoding === 'CP850'
5411 10
            &&
5412 10
            self::$SUPPORT['mbstring_func_overload'] === false
5413 10
        ) {
5414 10
          return strlen($str);
5415
        }
5416
5417
        return \mb_strlen($str, '8BIT');
5418
    }
5419
5420 79
    if ($cleanUtf8 === true) {
5421
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5422
      // if invalid characters are found in $str
5423 2
      $str = self::clean($str);
5424 2
    }
5425
5426 View Code Duplication
    if (
5427
        $encoding !== 'UTF-8'
5428 79
        &&
5429 2
        self::$SUPPORT['mbstring'] === false
5430 79
        &&
5431
        self::$SUPPORT['iconv'] === false
5432 79
    ) {
5433
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5434
    }
5435
5436
    if (
5437
        $encoding !== 'UTF-8'
5438 79
        &&
5439 2
        self::$SUPPORT['iconv'] === true
5440 79
        &&
5441 2
        self::$SUPPORT['mbstring'] === false
5442 79
    ) {
5443
      return \iconv_strlen($str, $encoding);
5444
    }
5445
5446 79
    if (self::$SUPPORT['mbstring'] === true) {
5447 79
      return \mb_strlen($str, $encoding);
5448
    }
5449
5450
    if (self::$SUPPORT['iconv'] === true) {
5451
      return \iconv_strlen($str, $encoding);
5452
    }
5453
5454
    if (
5455
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5456
        &&
5457
        self::$SUPPORT['intl'] === true
5458
        &&
5459
        Bootup::is_php('5.4') === true
5460
    ) {
5461
      return \grapheme_strlen($str);
5462
    }
5463
5464
    if (self::is_ascii($str)) {
5465
      return strlen($str);
5466
    }
5467
5468
    // fallback via vanilla php
5469
    preg_match_all('/./us', $str, $parts);
5470
    $returnTmp = count($parts[0]);
5471
    if ($returnTmp !== 0) {
5472
      return $returnTmp;
5473
    }
5474
5475
    // fallback to "mb_"-function via polyfill
5476
    return \mb_strlen($str, $encoding);
5477
  }
5478
5479
  /**
5480
   * Case insensitive string comparisons using a "natural order" algorithm.
5481
   *
5482
   * INFO: natural order version of UTF8::strcasecmp()
5483
   *
5484
   * @param string $str1 <p>The first string.</p>
5485
   * @param string $str2 <p>The second string.</p>
5486
   *
5487
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5488
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5489
   *             <strong>0</strong> if they are equal
5490
   */
5491 1
  public static function strnatcasecmp($str1, $str2)
5492
  {
5493 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5494
  }
5495
5496
  /**
5497
   * String comparisons using a "natural order" algorithm
5498
   *
5499
   * INFO: natural order version of UTF8::strcmp()
5500
   *
5501
   * @link  http://php.net/manual/en/function.strnatcmp.php
5502
   *
5503
   * @param string $str1 <p>The first string.</p>
5504
   * @param string $str2 <p>The second string.</p>
5505
   *
5506
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5507
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5508
   *             <strong>0</strong> if they are equal
5509
   */
5510 2
  public static function strnatcmp($str1, $str2)
5511
  {
5512 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5513
  }
5514
5515
  /**
5516
   * Case-insensitive string comparison of the first n characters.
5517
   *
5518
   * @link  http://php.net/manual/en/function.strncasecmp.php
5519
   *
5520
   * @param string $str1 <p>The first string.</p>
5521
   * @param string $str2 <p>The second string.</p>
5522
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5523
   *
5524
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5525
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5526
   *             <strong>0</strong> if they are equal
5527
   */
5528 1
  public static function strncasecmp($str1, $str2, $len)
5529
  {
5530 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5531
  }
5532
5533
  /**
5534
   * String comparison of the first n characters.
5535
   *
5536
   * @link  http://php.net/manual/en/function.strncmp.php
5537
   *
5538
   * @param string $str1 <p>The first string.</p>
5539
   * @param string $str2 <p>The second string.</p>
5540
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5541
   *
5542
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5543
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5544
   *             <strong>0</strong> if they are equal
5545
   */
5546 2
  public static function strncmp($str1, $str2, $len)
5547
  {
5548 2
    $str1 = (string)self::substr($str1, 0, $len);
5549 2
    $str2 = (string)self::substr($str2, 0, $len);
5550
5551 2
    return self::strcmp($str1, $str2);
5552
  }
5553
5554
  /**
5555
   * Search a string for any of a set of characters.
5556
   *
5557
   * @link  http://php.net/manual/en/function.strpbrk.php
5558
   *
5559
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5560
   * @param string $char_list <p>This parameter is case sensitive.</p>
5561
   *
5562
   * @return string String starting from the character found, or false if it is not found.
5563
   */
5564 1
  public static function strpbrk($haystack, $char_list)
5565
  {
5566 1
    $haystack = (string)$haystack;
5567 1
    $char_list = (string)$char_list;
5568
5569 1
    if (!isset($haystack[0], $char_list[0])) {
5570 1
      return false;
5571
    }
5572
5573 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5574 1
      return substr($haystack, strpos($haystack, $m[0]));
5575
    }
5576
5577 1
    return false;
5578
  }
5579
5580
  /**
5581
   * Find position of first occurrence of string in a string.
5582
   *
5583
   * @link http://php.net/manual/en/function.mb-strpos.php
5584
   *
5585
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5586
   * @param string  $needle    <p>The string to find in haystack.</p>
5587
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5588
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5589
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5590
   *
5591
   * @return int|false <p>
5592
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5593
   *                   If needle is not found it returns false.
5594
   *                   </p>
5595
   */
5596 58
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5597
  {
5598 58
    $haystack = (string)$haystack;
5599 58
    $needle = (string)$needle;
5600
5601 58
    if (!isset($haystack[0], $needle[0])) {
5602 3
      return false;
5603
    }
5604
5605
    // init
5606 57
    $offset = (int)$offset;
5607
5608
    // iconv and mbstring do not support integer $needle
5609
5610 57
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5611
      $needle = (string)self::chr($needle);
5612
    }
5613
5614 57
    if ($cleanUtf8 === true) {
5615
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5616
      // if invalid characters are found in $haystack before $needle
5617 2
      $needle = self::clean($needle);
5618 2
      $haystack = self::clean($haystack);
5619 2
    }
5620
5621 View Code Duplication
    if (
5622
        $encoding === 'UTF-8'
5623 57
        ||
5624 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5625 57
    ) {
5626 17
      $encoding = 'UTF-8';
5627 17
    } else {
5628 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5629
    }
5630
5631 57
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5632
      self::checkForSupport();
5633
    }
5634
5635
    if (
5636
        $encoding === 'CP850'
5637 57
        &&
5638 41
        self::$SUPPORT['mbstring_func_overload'] === false
5639 57
    ) {
5640 41
      return strpos($haystack, $needle, $offset);
5641
    }
5642
5643 View Code Duplication
    if (
5644
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5645 17
        &
5646 17
        self::$SUPPORT['iconv'] === true
5647 17
        &&
5648 1
        self::$SUPPORT['mbstring'] === false
5649 17
    ) {
5650
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5651
    }
5652
5653
    if (
5654
        $offset >= 0 // iconv_strpos() can't handle negative offset
5655 17
        &&
5656
        $encoding !== 'UTF-8'
5657 17
        &&
5658 1
        self::$SUPPORT['mbstring'] === false
5659 17
        &&
5660
        self::$SUPPORT['iconv'] === true
5661 17
    ) {
5662
      // ignore invalid negative offset to keep compatibility
5663
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5664
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5665
    }
5666
5667 17
    if (self::$SUPPORT['mbstring'] === true) {
5668 17
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5669
    }
5670
5671
    if (
5672
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5673
        &&
5674
        self::$SUPPORT['intl'] === true
5675
        &&
5676
        Bootup::is_php('5.4') === true
5677
    ) {
5678
      return \grapheme_strpos($haystack, $needle, $offset);
5679
    }
5680
5681
    if (
5682
        $offset >= 0 // iconv_strpos() can't handle negative offset
5683
        &&
5684
        self::$SUPPORT['iconv'] === true
5685
    ) {
5686
      // ignore invalid negative offset to keep compatibility
5687
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5688
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5689
    }
5690
5691
    $haystackIsAscii = self::is_ascii($haystack);
5692
    if ($haystackIsAscii && self::is_ascii($needle)) {
5693
      return strpos($haystack, $needle, $offset);
5694
    }
5695
5696
    // fallback via vanilla php
5697
5698
    if ($haystackIsAscii) {
5699
      $haystackTmp = substr($haystack, $offset);
5700
    } else {
5701
      $haystackTmp = self::substr($haystack, $offset);
5702
    }
5703
    if ($haystackTmp === false) {
5704
      $haystackTmp = '';
5705
    }
5706
    $haystack = (string)$haystackTmp;
5707
5708
    if ($offset < 0) {
5709
      $offset = 0;
5710
    }
5711
5712
    $pos = strpos($haystack, $needle);
5713
    if ($pos === false) {
5714
      return false;
5715
    }
5716
5717
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5718
    if ($returnTmp !== false) {
5719
      return $returnTmp;
5720
    }
5721
5722
    // fallback to "mb_"-function via polyfill
5723
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5724
  }
5725
5726
  /**
5727
   * Finds the last occurrence of a character in a string within another.
5728
   *
5729
   * @link http://php.net/manual/en/function.mb-strrchr.php
5730
   *
5731
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5732
   * @param string $needle        <p>The string to find in haystack</p>
5733
   * @param bool   $before_needle [optional] <p>
5734
   *                              Determines which portion of haystack
5735
   *                              this function returns.
5736
   *                              If set to true, it returns all of haystack
5737
   *                              from the beginning to the last occurrence of needle.
5738
   *                              If set to false, it returns all of haystack
5739
   *                              from the last occurrence of needle to the end,
5740
   *                              </p>
5741
   * @param string $encoding      [optional] <p>
5742
   *                              Character encoding name to use.
5743
   *                              If it is omitted, internal character encoding is used.
5744
   *                              </p>
5745
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5746
   *
5747
   * @return string|false The portion of haystack or false if needle is not found.
5748
   */
5749 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5750
  {
5751 1
    if ($encoding !== 'UTF-8') {
5752 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5753 1
    }
5754
5755 1
    if ($cleanUtf8 === true) {
5756
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5757
      // if invalid characters are found in $haystack before $needle
5758
      $needle = self::clean($needle);
5759
      $haystack = self::clean($haystack);
5760
    }
5761
5762
    // fallback to "mb_"-function via polyfill
5763 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5764
  }
5765
5766
  /**
5767
   * Reverses characters order in the string.
5768
   *
5769
   * @param string $str The input string
5770
   *
5771
   * @return string The string with characters in the reverse sequence
5772
   */
5773 4
  public static function strrev($str)
5774
  {
5775 4
    $str = (string)$str;
5776
5777 4
    if (!isset($str[0])) {
5778 2
      return '';
5779
    }
5780
5781 3
    return implode('', array_reverse(self::split($str)));
5782
  }
5783
5784
  /**
5785
   * Finds the last occurrence of a character in a string within another, case insensitive.
5786
   *
5787
   * @link http://php.net/manual/en/function.mb-strrichr.php
5788
   *
5789
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5790
   * @param string  $needle        <p>The string to find in haystack.</p>
5791
   * @param bool    $before_needle [optional] <p>
5792
   *                               Determines which portion of haystack
5793
   *                               this function returns.
5794
   *                               If set to true, it returns all of haystack
5795
   *                               from the beginning to the last occurrence of needle.
5796
   *                               If set to false, it returns all of haystack
5797
   *                               from the last occurrence of needle to the end,
5798
   *                               </p>
5799
   * @param string  $encoding      [optional] <p>
5800
   *                               Character encoding name to use.
5801
   *                               If it is omitted, internal character encoding is used.
5802
   *                               </p>
5803
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5804
   *
5805
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5806
   */
5807 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5808
  {
5809 1
    if ($encoding !== 'UTF-8') {
5810 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5811 1
    }
5812
5813 1
    if ($cleanUtf8 === true) {
5814
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5815
      // if invalid characters are found in $haystack before $needle
5816
      $needle = self::clean($needle);
5817
      $haystack = self::clean($haystack);
5818
    }
5819
5820 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5821
  }
5822
5823
  /**
5824
   * Find position of last occurrence of a case-insensitive string.
5825
   *
5826
   * @param string  $haystack  <p>The string to look in.</p>
5827
   * @param string  $needle    <p>The string to look for.</p>
5828
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5829
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5830
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5831
   *
5832
   * @return int|false <p>
5833
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5834
   *                   not found, it returns false.
5835
   *                   </p>
5836
   */
5837 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5838
  {
5839 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5840
      $needle = (string)self::chr($needle);
5841
    }
5842
5843
    // init
5844 1
    $haystack = (string)$haystack;
5845 1
    $needle = (string)$needle;
5846 1
    $offset = (int)$offset;
5847
5848 1
    if (!isset($haystack[0], $needle[0])) {
5849
      return false;
5850
    }
5851
5852 View Code Duplication
    if (
5853
        $cleanUtf8 === true
5854 1
        ||
5855
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5856 1
    ) {
5857
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5858
5859
      $needle = self::clean($needle);
5860
      $haystack = self::clean($haystack);
5861
    }
5862
5863 View Code Duplication
    if (
5864
        $encoding === 'UTF-8'
5865 1
        ||
5866
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5867 1
    ) {
5868 1
      $encoding = 'UTF-8';
5869 1
    } else {
5870
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5871
    }
5872
5873 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5874
      self::checkForSupport();
5875
    }
5876
5877 View Code Duplication
    if (
5878
        $encoding !== 'UTF-8'
5879 1
        &&
5880
        self::$SUPPORT['mbstring'] === false
5881 1
    ) {
5882
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5883
    }
5884
5885 1
    if (self::$SUPPORT['mbstring'] === true) {
5886 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5887
    }
5888
5889
    if (
5890
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5891
        &&
5892
        self::$SUPPORT['intl'] === true
5893
        &&
5894
        Bootup::is_php('5.4') === true
5895
    ) {
5896
      return \grapheme_strripos($haystack, $needle, $offset);
5897
    }
5898
5899
    // fallback via vanilla php
5900
5901
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5902
  }
5903
5904
  /**
5905
   * Find position of last occurrence of a string in a string.
5906
   *
5907
   * @link http://php.net/manual/en/function.mb-strrpos.php
5908
   *
5909
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5910
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5911
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5912
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5913
   *                              the end of the string.
5914
   *                              </p>
5915
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5916
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5917
   *
5918
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5919
   *                   is not found, it returns false.</p>
5920
   */
5921 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5922
  {
5923 10
    if ((int)$needle === $needle && $needle >= 0) {
5924 2
      $needle = (string)self::chr($needle);
5925 2
    }
5926
5927
    // init
5928 10
    $haystack = (string)$haystack;
5929 10
    $needle = (string)$needle;
5930 10
    $offset = (int)$offset;
5931
5932 10
    if (!isset($haystack[0], $needle[0])) {
5933 2
      return false;
5934
    }
5935
5936 View Code Duplication
    if (
5937
        $cleanUtf8 === true
5938 9
        ||
5939
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5940 9
    ) {
5941
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5942 3
      $needle = self::clean($needle);
5943 3
      $haystack = self::clean($haystack);
5944 3
    }
5945
5946 View Code Duplication
    if (
5947
        $encoding === 'UTF-8'
5948 9
        ||
5949 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5950 9
    ) {
5951 9
      $encoding = 'UTF-8';
5952 9
    } else {
5953 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5954
    }
5955
5956 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5957
      self::checkForSupport();
5958
    }
5959
5960 View Code Duplication
    if (
5961
        $encoding !== 'UTF-8'
5962 9
        &&
5963 1
        self::$SUPPORT['mbstring'] === false
5964 9
    ) {
5965
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5966
    }
5967
5968 9
    if (self::$SUPPORT['mbstring'] === true) {
5969 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5970
    }
5971
5972
    if (
5973
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5974
        &&
5975
        self::$SUPPORT['intl'] === true
5976
        &&
5977
        Bootup::is_php('5.4') === true
5978
    ) {
5979
      return \grapheme_strrpos($haystack, $needle, $offset);
5980
    }
5981
5982
    // fallback via vanilla php
5983
5984
    $haystackTmp = null;
5985
    if ($offset > 0) {
5986
      $haystackTmp = self::substr($haystack, $offset);
5987
    } elseif ($offset < 0) {
5988
      $haystackTmp = self::substr($haystack, 0, $offset);
5989
      $offset = 0;
5990
    }
5991
5992
    if ($haystackTmp !== null) {
5993
      if ($haystackTmp === false) {
5994
        $haystackTmp = '';
5995
      }
5996
      $haystack = (string)$haystackTmp;
5997
    }
5998
5999
    $pos = strrpos($haystack, $needle);
6000
    if ($pos === false) {
6001
      return false;
6002
    }
6003
6004
    return $offset + self::strlen(substr($haystack, 0, $pos));
6005
  }
6006
6007
  /**
6008
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6009
   * mask.
6010
   *
6011
   * @param string $str    <p>The input string.</p>
6012
   * @param string $mask   <p>The mask of chars</p>
6013
   * @param int    $offset [optional]
6014
   * @param int    $length [optional]
6015
   *
6016
   * @return int
6017
   */
6018 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
6019
  {
6020 10 View Code Duplication
    if ($offset || $length !== null) {
6021 2
      $strTmp = self::substr($str, $offset, $length);
6022 2
      if ($strTmp === false) {
6023
        $strTmp = '';
6024
      }
6025 2
      $str = (string)$strTmp;
6026 2
    }
6027
6028 10
    $str = (string)$str;
6029 10
    if (!isset($str[0], $mask[0])) {
6030 2
      return 0;
6031
    }
6032
6033 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6034
  }
6035
6036
  /**
6037
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6038
   *
6039
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6040
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6041
   * @param bool    $before_needle [optional] <p>
6042
   *                               If <b>TRUE</b>, strstr() returns the part of the
6043
   *                               haystack before the first occurrence of the needle (excluding the needle).
6044
   *                               </p>
6045
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6046
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6047
   *
6048
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6049
   */
6050 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6051
  {
6052 2
    $haystack = (string)$haystack;
6053 2
    $needle = (string)$needle;
6054
6055 2
    if (!isset($haystack[0], $needle[0])) {
6056 1
      return false;
6057
    }
6058
6059 2
    if ($cleanUtf8 === true) {
6060
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6061
      // if invalid characters are found in $haystack before $needle
6062
      $needle = self::clean($needle);
6063
      $haystack = self::clean($haystack);
6064
    }
6065
6066 2
    if ($encoding !== 'UTF-8') {
6067 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6068 1
    }
6069
6070 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6071
      self::checkForSupport();
6072
    }
6073
6074 View Code Duplication
    if (
6075
        $encoding !== 'UTF-8'
6076 2
        &&
6077 1
        self::$SUPPORT['mbstring'] === false
6078 2
    ) {
6079
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6080
    }
6081
6082 2
    if (self::$SUPPORT['mbstring'] === true) {
6083 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6084
    }
6085
6086
    if (
6087
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6088
        &&
6089
        self::$SUPPORT['intl'] === true
6090
        &&
6091
        Bootup::is_php('5.4') === true
6092
    ) {
6093
      return \grapheme_strstr($haystack, $needle, $before_needle);
6094
    }
6095
6096
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6097
6098
    if (!isset($match[1])) {
6099
      return false;
6100
    }
6101
6102
    if ($before_needle) {
6103
      return $match[1];
6104
    }
6105
6106
    return self::substr($haystack, self::strlen($match[1]));
6107
  }
6108
6109
  /**
6110
   * Unicode transformation for case-less matching.
6111
   *
6112
   * @link http://unicode.org/reports/tr21/tr21-5.html
6113
   *
6114
   * @param string  $str       <p>The input string.</p>
6115
   * @param bool    $full      [optional] <p>
6116
   *                           <b>true</b>, replace full case folding chars (default)<br>
6117
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6118
   *                           </p>
6119
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6120
   *
6121
   * @return string
6122
   */
6123 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6124
  {
6125
    // init
6126 13
    $str = (string)$str;
6127
6128 13
    if (!isset($str[0])) {
6129 4
      return '';
6130
    }
6131
6132 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6133 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6134
6135 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6136 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6137 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6138 1
    }
6139
6140 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6141
6142 12
    if ($full) {
6143
6144 12
      static $FULL_CASE_FOLD = null;
6145
6146 12
      if ($FULL_CASE_FOLD === null) {
6147 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6148 1
      }
6149
6150
      /** @noinspection OffsetOperationsInspection */
6151 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6152 12
    }
6153
6154 12
    if ($cleanUtf8 === true) {
6155 1
      $str = self::clean($str);
6156 1
    }
6157
6158 12
    return self::strtolower($str);
6159
  }
6160
6161
  /**
6162
   * Make a string lowercase.
6163
   *
6164
   * @link http://php.net/manual/en/function.mb-strtolower.php
6165
   *
6166
   * @param string      $str       <p>The string being lowercased.</p>
6167
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6168
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6169
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6170
   *
6171
   * @return string str with all alphabetic characters converted to lowercase.
6172
   */
6173 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6174
  {
6175
    // init
6176 25
    $str = (string)$str;
6177
6178 25
    if (!isset($str[0])) {
6179 3
      return '';
6180
    }
6181
6182 23
    if ($cleanUtf8 === true) {
6183
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6184
      // if invalid characters are found in $haystack before $needle
6185 1
      $str = self::clean($str);
6186 1
    }
6187
6188 23
    if ($encoding !== 'UTF-8') {
6189 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6190 2
    }
6191
6192 23
    if ($lang !== null) {
6193
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6194
        self::checkForSupport();
6195
      }
6196
6197
      if (
6198
          self::$SUPPORT['intl'] === true
6199
          &&
6200
          Bootup::is_php('5.4') === true
6201
      ) {
6202
6203
        $langCode = $lang . '-Lower';
6204
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6205
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6206
6207
          $langCode = 'Any-Lower';
6208
        }
6209
6210
        return transliterator_transliterate($langCode, $str);
6211
      }
6212
6213
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6214
    }
6215
6216 23
    return \mb_strtolower($str, $encoding);
6217
  }
6218
6219
  /**
6220
   * Generic case sensitive transformation for collation matching.
6221
   *
6222
   * @param string $str <p>The input string</p>
6223
   *
6224
   * @return string
6225
   */
6226 3
  private static function strtonatfold($str)
6227
  {
6228
    /** @noinspection PhpUndefinedClassInspection */
6229 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6230
  }
6231
6232
  /**
6233
   * Make a string uppercase.
6234
   *
6235
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6236
   *
6237
   * @param string      $str       <p>The string being uppercased.</p>
6238
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6239
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6240
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6241
   *
6242
   * @return string str with all alphabetic characters converted to uppercase.
6243
   */
6244 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6245
  {
6246 19
    $str = (string)$str;
6247
6248 19
    if (!isset($str[0])) {
6249 3
      return '';
6250
    }
6251
6252 17
    if ($cleanUtf8 === true) {
6253
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6254
      // if invalid characters are found in $haystack before $needle
6255 1
      $str = self::clean($str);
6256 1
    }
6257
6258 17
    if ($encoding !== 'UTF-8') {
6259 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6260 2
    }
6261
6262 17
    if ($lang !== null) {
6263
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6264
        self::checkForSupport();
6265
      }
6266
6267
      if (
6268
          self::$SUPPORT['intl'] === true
6269
          &&
6270
          Bootup::is_php('5.4') === true
6271
      ) {
6272
6273
        $langCode = $lang . '-Upper';
6274
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6275
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6276
6277
          $langCode = 'Any-Upper';
6278
        }
6279
6280
        return transliterator_transliterate($langCode, $str);
6281
      }
6282
6283
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6284
    }
6285
6286 17
    return \mb_strtoupper($str, $encoding);
6287
  }
6288
6289
  /**
6290
   * Translate characters or replace sub-strings.
6291
   *
6292
   * @link  http://php.net/manual/en/function.strtr.php
6293
   *
6294
   * @param string          $str  <p>The string being translated.</p>
6295
   * @param string|string[] $from <p>The string replacing from.</p>
6296
   * @param string|string[] $to   <p>The string being translated to to.</p>
6297
   *
6298
   * @return string <p>
6299
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6300
   *                corresponding character in to.
6301
   *                </p>
6302
   */
6303 1
  public static function strtr($str, $from, $to = INF)
6304
  {
6305 1
    $str = (string)$str;
6306
6307 1
    if (!isset($str[0])) {
6308
      return '';
6309
    }
6310
6311 1
    if ($from === $to) {
6312
      return $str;
6313
    }
6314
6315 1
    if (INF !== $to) {
6316 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6316 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6317 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6317 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6318 1
      $countFrom = count($from);
6319 1
      $countTo = count($to);
6320
6321 1
      if ($countFrom > $countTo) {
6322 1
        $from = array_slice($from, 0, $countTo);
6323 1
      } elseif ($countFrom < $countTo) {
6324 1
        $to = array_slice($to, 0, $countFrom);
6325 1
      }
6326
6327 1
      $from = array_combine($from, $to);
6328 1
    }
6329
6330 1
    if (is_string($from)) {
6331 1
      return str_replace($from, '', $str);
6332
    }
6333
6334 1
    return strtr($str, $from);
6335
  }
6336
6337
  /**
6338
   * Return the width of a string.
6339
   *
6340
   * @param string  $str       <p>The input string.</p>
6341
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6342
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6343
   *
6344
   * @return int
6345
   */
6346 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6347
  {
6348 1
    if ($encoding !== 'UTF-8') {
6349 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6350 1
    }
6351
6352 1
    if ($cleanUtf8 === true) {
6353
      // iconv and mbstring are not tolerant to invalid encoding
6354
      // further, their behaviour is inconsistent with that of PHP's substr
6355 1
      $str = self::clean($str);
6356 1
    }
6357
6358
    // fallback to "mb_"-function via polyfill
6359 1
    return \mb_strwidth($str, $encoding);
6360
  }
6361
6362
  /**
6363
   * Changes all keys in an array.
6364
   *
6365
   * @param array $array <p>The array to work on</p>
6366
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6367
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6368
   *
6369
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6370
   *                     input is not an array.</p>
6371
   */
6372 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
6373
  {
6374 1
    if (!is_array($array)) {
6375
      return false;
6376
    }
6377
6378
    if (
6379
        $case !== CASE_LOWER
6380 1
        &&
6381
        $case !== CASE_UPPER
6382 1
    ) {
6383
      $case = CASE_UPPER;
6384
    }
6385
6386 1
    $return = array();
6387 1
    foreach ($array as $key => $value) {
6388 1
      if ($case  === CASE_LOWER) {
6389 1
        $key = self::strtolower($key);
6390 1
      } else {
6391 1
        $key = self::strtoupper($key);
6392
      }
6393
6394 1
      $return[$key] = $value;
6395 1
    }
6396
6397 1
    return $return;
6398
  }
6399
6400
  /**
6401
   * Get part of a string.
6402
   *
6403
   * @link http://php.net/manual/en/function.mb-substr.php
6404
   *
6405
   * @param string  $str       <p>The string being checked.</p>
6406
   * @param int     $offset    <p>The first position used in str.</p>
6407
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6408
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6409
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6410
   *
6411
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6412
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6413
   *                      characters long, <b>FALSE</b> will be returned.</p>
6414
   */
6415 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6416
  {
6417
    // init
6418 76
    $str = (string)$str;
6419
6420 76
    if (!isset($str[0])) {
6421 10
      return '';
6422
    }
6423
6424
    // Empty string
6425 74
    if ($length === 0) {
6426 3
      return '';
6427
    }
6428
6429 73
    if ($cleanUtf8 === true) {
6430
      // iconv and mbstring are not tolerant to invalid encoding
6431
      // further, their behaviour is inconsistent with that of PHP's substr
6432 1
      $str = self::clean($str);
6433 1
    }
6434
6435
    // Whole string
6436 73
    if (!$offset && $length === null) {
6437 1
      return $str;
6438
    }
6439
6440 72
    $str_length = 0;
6441 72
    if ($offset || $length === null) {
6442 49
      $str_length = (int)self::strlen($str, $encoding);
6443 49
    }
6444
6445
    // Impossible
6446 72
    if ($offset && $offset > $str_length) {
6447 2
      return false;
6448
    }
6449
6450 70
    if ($length === null) {
6451 34
      $length = $str_length;
6452 34
    } else {
6453 59
      $length = (int)$length;
6454
    }
6455
6456 View Code Duplication
    if (
6457
        $encoding === 'UTF-8'
6458 70
        ||
6459 23
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6460 70
    ) {
6461 49
      $encoding = 'UTF-8';
6462 49
    } else {
6463 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6464
    }
6465
6466 70
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6467
      self::checkForSupport();
6468
    }
6469
6470
    if (
6471
        $encoding === 'CP850'
6472 70
        &&
6473 21
        self::$SUPPORT['mbstring_func_overload'] === false
6474 70
    ) {
6475 21
      return substr($str, $offset, $length === null ? $str_length : $length);
6476
    }
6477
6478 View Code Duplication
    if (
6479
        $encoding !== 'UTF-8'
6480 49
        &&
6481 1
        self::$SUPPORT['mbstring'] === false
6482 49
    ) {
6483
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6484
    }
6485
6486 49
    if (self::$SUPPORT['mbstring'] === true) {
6487 49
      return \mb_substr($str, $offset, $length, $encoding);
6488
    }
6489
6490
    if (
6491
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6492
        &&
6493
        self::$SUPPORT['intl'] === true
6494
        &&
6495
        Bootup::is_php('5.4') === true
6496
    ) {
6497
      return \grapheme_substr($str, $offset, $length);
6498
    }
6499
6500
    if (
6501
        $length >= 0 // "iconv_substr()" can't handle negative length
6502
        &&
6503
        self::$SUPPORT['iconv'] === true
6504
    ) {
6505
      return \iconv_substr($str, $offset, $length);
6506
    }
6507
6508
    if (self::is_ascii($str)) {
6509
      return ($length === null) ?
6510
          substr($str, $offset) :
6511
          substr($str, $offset, $length);
6512
    }
6513
6514
    // fallback via vanilla php
6515
6516
    // split to array, and remove invalid characters
6517
    $array = self::split($str);
6518
6519
    // extract relevant part, and join to make sting again
6520
    return implode('', array_slice($array, $offset, $length));
6521
  }
6522
6523
  /**
6524
   * Binary safe comparison of two strings from an offset, up to length characters.
6525
   *
6526
   * @param string  $str1               <p>The main string being compared.</p>
6527
   * @param string  $str2               <p>The secondary string being compared.</p>
6528
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6529
   *                                    counting from the end of the string.</p>
6530
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6531
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6532
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6533
   *                                    insensitive.</p>
6534
   *
6535
   * @return int <p>
6536
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6537
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6538
   *             <strong>0</strong> if they are equal.
6539
   *             </p>
6540
   */
6541 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6542
  {
6543
    if (
6544
        $offset !== 0
6545 1
        ||
6546
        $length !== null
6547 1
    ) {
6548 1
      $str1Tmp = self::substr($str1, $offset, $length);
6549 1
      if ($str1Tmp === false) {
6550
        $str1Tmp = '';
6551
      }
6552 1
      $str1 = (string)$str1Tmp;
6553
6554 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6555 1
      if ($str2Tmp === false) {
6556
        $str2Tmp = '';
6557
      }
6558 1
      $str2 = (string)$str2Tmp;
6559 1
    }
6560
6561 1
    if ($case_insensitivity === true) {
6562 1
      return self::strcasecmp($str1, $str2);
6563
    }
6564
6565 1
    return self::strcmp($str1, $str2);
6566
  }
6567
6568
  /**
6569
   * Count the number of substring occurrences.
6570
   *
6571
   * @link  http://php.net/manual/en/function.substr-count.php
6572
   *
6573
   * @param string  $haystack  <p>The string to search in.</p>
6574
   * @param string  $needle    <p>The substring to search for.</p>
6575
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6576
   * @param int     $length    [optional] <p>
6577
   *                           The maximum length after the specified offset to search for the
6578
   *                           substring. It outputs a warning if the offset plus the length is
6579
   *                           greater than the haystack length.
6580
   *                           </p>
6581
   * @param string  $encoding  <p>Set the charset.</p>
6582
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6583
   *
6584
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6585
   */
6586 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6587
  {
6588
    // init
6589 1
    $haystack = (string)$haystack;
6590 1
    $needle = (string)$needle;
6591
6592 1
    if (!isset($haystack[0], $needle[0])) {
6593 1
      return false;
6594
    }
6595
6596 1
    if ($offset || $length !== null) {
6597
6598 1
      if ($length === null) {
6599 1
        $length = (int)self::strlen($haystack);
6600 1
      }
6601
6602 1
      $offset = (int)$offset;
6603 1
      $length = (int)$length;
6604
6605
      if (
6606
          (
6607
              $length !== 0
6608 1
              &&
6609
              $offset !== 0
6610 1
          )
6611 1
          &&
6612 1
          $length + $offset <= 0
6613 1
          &&
6614 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6615 1
      ) {
6616 1
        return false;
6617
      }
6618
6619 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6620 1
      if ($haystackTmp === false) {
6621
        $haystackTmp = '';
6622
      }
6623 1
      $haystack = (string)$haystackTmp;
6624 1
    }
6625
6626 1
    if ($encoding !== 'UTF-8') {
6627 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6628 1
    }
6629
6630 1
    if ($cleanUtf8 === true) {
6631
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6632
      // if invalid characters are found in $haystack before $needle
6633
      $needle = self::clean($needle);
6634
      $haystack = self::clean($haystack);
6635
    }
6636
6637 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6638
      self::checkForSupport();
6639
    }
6640
6641 View Code Duplication
    if (
6642
        $encoding !== 'UTF-8'
6643 1
        &&
6644 1
        self::$SUPPORT['mbstring'] === false
6645 1
    ) {
6646
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6647
    }
6648
6649 1
    if (self::$SUPPORT['mbstring'] === true) {
6650 1
      return \mb_substr_count($haystack, $needle, $encoding);
6651
    }
6652
6653
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6654
6655
    return count($matches);
6656
  }
6657
6658
  /**
6659
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6660
   *
6661
   * @param string $haystack <p>The string to search in.</p>
6662
   * @param string $needle   <p>The substring to search for.</p>
6663
   *
6664
   * @return string <p>Return the sub-string.</p>
6665
   */
6666 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6667
  {
6668
    // init
6669 1
    $haystack = (string)$haystack;
6670 1
    $needle = (string)$needle;
6671
6672 1
    if (!isset($haystack[0])) {
6673 1
      return '';
6674
    }
6675
6676 1
    if (!isset($needle[0])) {
6677 1
      return $haystack;
6678
    }
6679
6680 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6681 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6682 1
      if ($haystackTmp === false) {
6683
        $haystackTmp = '';
6684
      }
6685 1
      $haystack = (string)$haystackTmp;
6686 1
    }
6687
6688 1
    return $haystack;
6689
  }
6690
6691
  /**
6692
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6693
   *
6694
   * @param string $haystack <p>The string to search in.</p>
6695
   * @param string $needle   <p>The substring to search for.</p>
6696
   *
6697
   * @return string <p>Return the sub-string.</p>
6698
   */
6699 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6700
  {
6701
    // init
6702 1
    $haystack = (string)$haystack;
6703 1
    $needle = (string)$needle;
6704
6705 1
    if (!isset($haystack[0])) {
6706 1
      return '';
6707
    }
6708
6709 1
    if (!isset($needle[0])) {
6710 1
      return $haystack;
6711
    }
6712
6713 1
    if (self::str_iends_with($haystack, $needle) === true) {
6714 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6715 1
      if ($haystackTmp === false) {
6716
        $haystackTmp = '';
6717
      }
6718 1
      $haystack = (string)$haystackTmp;
6719 1
    }
6720
6721 1
    return $haystack;
6722
  }
6723
6724
  /**
6725
   * Removes an prefix ($needle) from start of the string ($haystack).
6726
   *
6727
   * @param string $haystack <p>The string to search in.</p>
6728
   * @param string $needle   <p>The substring to search for.</p>
6729
   *
6730
   * @return string <p>Return the sub-string.</p>
6731
   */
6732 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6733
  {
6734
    // init
6735 1
    $haystack = (string)$haystack;
6736 1
    $needle = (string)$needle;
6737
6738 1
    if (!isset($haystack[0])) {
6739 1
      return '';
6740
    }
6741
6742 1
    if (!isset($needle[0])) {
6743 1
      return $haystack;
6744
    }
6745
6746 1
    if (self::str_starts_with($haystack, $needle) === true) {
6747 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6748 1
      if ($haystackTmp === false) {
6749
        $haystackTmp = '';
6750
      }
6751 1
      $haystack = (string)$haystackTmp;
6752 1
    }
6753
6754 1
    return $haystack;
6755
  }
6756
6757
  /**
6758
   * Replace text within a portion of a string.
6759
   *
6760
   * source: https://gist.github.com/stemar/8287074
6761
   *
6762
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6763
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6764
   * @param int|int[]       $offset           <p>
6765
   *                                          If start is positive, the replacing will begin at the start'th offset
6766
   *                                          into string.
6767
   *                                          <br><br>
6768
   *                                          If start is negative, the replacing will begin at the start'th character
6769
   *                                          from the end of string.
6770
   *                                          </p>
6771
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6772
   *                                          portion of string which is to be replaced. If it is negative, it
6773
   *                                          represents the number of characters from the end of string at which to
6774
   *                                          stop replacing. If it is not given, then it will default to strlen(
6775
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6776
   *                                          length is zero then this function will have the effect of inserting
6777
   *                                          replacement into string at the given start offset.</p>
6778
   *
6779
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6780
   */
6781 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6782
  {
6783 7
    if (is_array($str) === true) {
6784 1
      $num = count($str);
6785
6786
      // the replacement
6787 1
      if (is_array($replacement) === true) {
6788 1
        $replacement = array_slice($replacement, 0, $num);
6789 1
      } else {
6790 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6791
      }
6792
6793
      // the offset
6794 1 View Code Duplication
      if (is_array($offset) === true) {
6795 1
        $offset = array_slice($offset, 0, $num);
6796 1
        foreach ($offset as &$valueTmp) {
6797 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6798 1
        }
6799 1
        unset($valueTmp);
6800 1
      } else {
6801 1
        $offset = array_pad(array($offset), $num, $offset);
6802
      }
6803
6804
      // the length
6805 1
      if (!isset($length)) {
6806 1
        $length = array_fill(0, $num, 0);
6807 1 View Code Duplication
      } elseif (is_array($length) === true) {
6808 1
        $length = array_slice($length, 0, $num);
6809 1
        foreach ($length as &$valueTmpV2) {
6810 1
          if (isset($valueTmpV2)) {
6811 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6812 1
          } else {
6813
            $valueTmpV2 = 0;
6814
          }
6815 1
        }
6816 1
        unset($valueTmpV2);
6817 1
      } else {
6818 1
        $length = array_pad(array($length), $num, $length);
6819
      }
6820
6821
      // recursive call
6822 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6823
    }
6824
6825 7
    if (is_array($replacement) === true) {
6826 1
      if (count($replacement) > 0) {
6827 1
        $replacement = $replacement[0];
6828 1
      } else {
6829 1
        $replacement = '';
6830
      }
6831 1
    }
6832
6833
    // init
6834 7
    $str = (string)$str;
6835 7
    $replacement = (string)$replacement;
6836
6837 7
    if (!isset($str[0])) {
6838 1
      return $replacement;
6839
    }
6840
6841 6
    if (self::is_ascii($str)) {
6842 3
      return ($length === null) ?
6843 3
          substr_replace($str, $replacement, $offset) :
6844 3
          substr_replace($str, $replacement, $offset, $length);
6845
    }
6846
6847 5
    preg_match_all('/./us', $str, $smatches);
6848 5
    preg_match_all('/./us', $replacement, $rmatches);
6849
6850 5
    if ($length === null) {
6851 3
      $length = (int)self::strlen($str);
6852 3
    }
6853
6854 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6855
6856 5
    return implode('', $smatches[0]);
6857
  }
6858
6859
  /**
6860
   * Removes an suffix ($needle) from end of the string ($haystack).
6861
   *
6862
   * @param string $haystack <p>The string to search in.</p>
6863
   * @param string $needle   <p>The substring to search for.</p>
6864
   *
6865
   * @return string <p>Return the sub-string.</p>
6866
   */
6867 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6868
  {
6869 1
    $haystack = (string)$haystack;
6870 1
    $needle = (string)$needle;
6871
6872 1
    if (!isset($haystack[0])) {
6873 1
      return '';
6874
    }
6875
6876 1
    if (!isset($needle[0])) {
6877 1
      return $haystack;
6878
    }
6879
6880 1
    if (self::str_ends_with($haystack, $needle) === true) {
6881 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6882 1
      if ($haystackTmp === false) {
6883
        $haystackTmp = '';
6884
      }
6885 1
      $haystack = (string)$haystackTmp;
6886 1
    }
6887
6888 1
    return $haystack;
6889
  }
6890
6891
  /**
6892
   * Returns a case swapped version of the string.
6893
   *
6894
   * @param string  $str       <p>The input string.</p>
6895
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6896
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6897
   *
6898
   * @return string <p>Each character's case swapped.</p>
6899
   */
6900 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6901
  {
6902 1
    $str = (string)$str;
6903
6904 1
    if (!isset($str[0])) {
6905 1
      return '';
6906
    }
6907
6908 1
    if ($encoding !== 'UTF-8') {
6909 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6910 1
    }
6911
6912 1
    if ($cleanUtf8 === true) {
6913
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6914
      // if invalid characters are found in $haystack before $needle
6915 1
      $str = self::clean($str);
6916 1
    }
6917
6918 1
    $strSwappedCase = preg_replace_callback(
6919 1
        '/[\S]/u',
6920
        function ($match) use ($encoding) {
6921 1
          $marchToUpper = self::strtoupper($match[0], $encoding);
6922
6923 1
          if ($match[0] === $marchToUpper) {
6924 1
            return self::strtolower($match[0], $encoding);
6925
          }
6926
6927 1
          return $marchToUpper;
6928 1
        },
6929
        $str
6930 1
    );
6931
6932 1
    return $strSwappedCase;
6933
  }
6934
6935
  /**
6936
   * alias for "UTF8::to_ascii()"
6937
   *
6938
   * @see UTF8::to_ascii()
6939
   *
6940
   * @param string $s
6941
   * @param string $subst_chr
6942
   * @param bool   $strict
6943
   *
6944
   * @return string
6945
   *
6946
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6947
   */
6948
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6949
  {
6950
    return self::to_ascii($s, $subst_chr, $strict);
6951
  }
6952
6953
  /**
6954
   * alias for "UTF8::to_iso8859()"
6955
   *
6956
   * @see UTF8::to_iso8859()
6957
   *
6958
   * @param string $str
6959
   *
6960
   * @return string|string[]
6961
   *
6962
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6963
   */
6964
  public static function toIso8859($str)
6965
  {
6966
    return self::to_iso8859($str);
6967
  }
6968
6969
  /**
6970
   * alias for "UTF8::to_latin1()"
6971
   *
6972
   * @see UTF8::to_latin1()
6973
   *
6974
   * @param $str
6975
   *
6976
   * @return string
6977
   *
6978
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6979
   */
6980
  public static function toLatin1($str)
6981
  {
6982
    return self::to_latin1($str);
6983
  }
6984
6985
  /**
6986
   * alias for "UTF8::to_utf8()"
6987
   *
6988
   * @see UTF8::to_utf8()
6989
   *
6990
   * @param string $str
6991
   *
6992
   * @return string
6993
   *
6994
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6995
   */
6996
  public static function toUTF8($str)
6997
  {
6998
    return self::to_utf8($str);
6999
  }
7000
7001
  /**
7002
   * Convert a string into ASCII.
7003
   *
7004
   * @param string $str     <p>The input string.</p>
7005
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7006
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7007
   *                        performance</p>
7008
   *
7009
   * @return string
7010
   */
7011 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
7012
  {
7013 21
    static $UTF8_TO_ASCII;
7014
7015
    // init
7016 21
    $str = (string)$str;
7017
7018 21
    if (!isset($str[0])) {
7019 4
      return '';
7020
    }
7021
7022
    // check if we only have ASCII, first (better performance)
7023 18
    if (self::is_ascii($str) === true) {
7024 6
      return $str;
7025
    }
7026
7027 13
    $str = self::clean($str, true, true, true);
7028
7029
    // check again, if we only have ASCII, now ...
7030 13
    if (self::is_ascii($str) === true) {
7031 7
      return $str;
7032
    }
7033
7034 7
    if ($strict === true) {
7035 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7036
        self::checkForSupport();
7037
      }
7038
7039
      if (
7040 1
          self::$SUPPORT['intl'] === true
7041 1
          &&
7042 1
          Bootup::is_php('5.4') === true
7043 1
      ) {
7044
7045
        // HACK for issue from "transliterator_transliterate()"
7046 1
        $str = str_replace(
7047 1
            'ℌ',
7048 1
            'H',
7049
            $str
7050 1
        );
7051
7052 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7053
7054
        // check again, if we only have ASCII, now ...
7055 1
        if (self::is_ascii($str) === true) {
7056 1
          return $str;
7057
        }
7058
7059 1
      }
7060 1
    }
7061
7062 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7063 7
    $chars = $ar[0];
7064 7
    foreach ($chars as &$c) {
7065
7066 7
      $ordC0 = ord($c[0]);
7067
7068 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7069 7
        continue;
7070
      }
7071
7072 7
      $ordC1 = ord($c[1]);
7073
7074
      // ASCII - next please
7075 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7076 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7077 7
      }
7078
7079 7
      if ($ordC0 >= 224) {
7080 2
        $ordC2 = ord($c[2]);
7081
7082 2
        if ($ordC0 <= 239) {
7083 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7084 2
        }
7085
7086 2
        if ($ordC0 >= 240) {
7087 1
          $ordC3 = ord($c[3]);
7088
7089 1
          if ($ordC0 <= 247) {
7090 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7091 1
          }
7092
7093 1
          if ($ordC0 >= 248) {
7094
            $ordC4 = ord($c[4]);
7095
7096 View Code Duplication
            if ($ordC0 <= 251) {
7097
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7098
            }
7099
7100
            if ($ordC0 >= 252) {
7101
              $ordC5 = ord($c[5]);
7102
7103 View Code Duplication
              if ($ordC0 <= 253) {
7104
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7105
              }
7106
            }
7107
          }
7108 1
        }
7109 2
      }
7110
7111 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7112
        $c = $unknown;
7113
        continue;
7114
      }
7115
7116 7
      if (!isset($ord)) {
7117
        $c = $unknown;
7118
        continue;
7119
      }
7120
7121 7
      $bank = $ord >> 8;
7122 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7123 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7124 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7125 1
          $UTF8_TO_ASCII[$bank] = array();
7126 1
        }
7127 3
      }
7128
7129 7
      $newchar = $ord & 255;
7130
7131 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7132
7133
        // keep for debugging
7134
        /*
7135
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7136
        echo "char: " . $c . "\n";
7137
        echo "ord: " . $ord . "\n";
7138
        echo "newchar: " . $newchar . "\n";
7139
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7140
        echo "bank:" . $bank . "\n\n";
7141
        */
7142
7143 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7144 7
      } else {
7145
7146
        // keep for debugging missing chars
7147
        /*
7148
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7149
        echo "char: " . $c . "\n";
7150
        echo "ord: " . $ord . "\n";
7151
        echo "newchar: " . $newchar . "\n";
7152
        echo "bank:" . $bank . "\n\n";
7153
        */
7154
7155 1
        $c = $unknown;
7156
      }
7157 7
    }
7158
7159 7
    return implode('', $chars);
7160
  }
7161
7162
  /**
7163
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7164
   *
7165
   * @param string|string[] $str
7166
   *
7167
   * @return string|string[]
7168
   */
7169 3
  public static function to_iso8859($str)
7170
  {
7171 3
    if (is_array($str) === true) {
7172
7173
      /** @noinspection ForeachSourceInspection */
7174 1
      foreach ($str as $k => $v) {
7175
        /** @noinspection AlterInForeachInspection */
7176
        /** @noinspection OffsetOperationsInspection */
7177 1
        $str[$k] = self::to_iso8859($v);
7178 1
      }
7179
7180 1
      return $str;
7181
    }
7182
7183 3
    $str = (string)$str;
7184
7185 3
    if (!isset($str[0])) {
7186 1
      return '';
7187
    }
7188
7189 3
    return self::utf8_decode($str);
7190
  }
7191
7192
  /**
7193
   * alias for "UTF8::to_iso8859()"
7194
   *
7195
   * @see UTF8::to_iso8859()
7196
   *
7197
   * @param string|string[] $str
7198
   *
7199
   * @return string|string[]
7200
   */
7201 1
  public static function to_latin1($str)
7202
  {
7203 1
    return self::to_iso8859($str);
7204
  }
7205
7206
  /**
7207
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7208
   *
7209
   * <ul>
7210
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7211
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7212
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7213
   * case.</li>
7214
   * </ul>
7215
   *
7216
   * @param string|string[] $str                    <p>Any string or array.</p>
7217
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7218
   *
7219
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7220
   */
7221 21
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7222
  {
7223 21
    if (is_array($str) === true) {
7224
      /** @noinspection ForeachSourceInspection */
7225 2
      foreach ($str as $k => $v) {
7226
        /** @noinspection AlterInForeachInspection */
7227
        /** @noinspection OffsetOperationsInspection */
7228 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7229 2
      }
7230
7231 2
      return $str;
7232
    }
7233
7234 21
    $str = (string)$str;
7235
7236 21
    if (!isset($str[0])) {
7237 3
      return $str;
7238
    }
7239
7240 21
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7241
      self::checkForSupport();
7242
    }
7243
7244 21 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7245
      $max = \mb_strlen($str, '8BIT');
7246
    } else {
7247 21
      $max = strlen($str);
7248
    }
7249
7250 21
    $buf = '';
7251
7252
    /** @noinspection ForeachInvariantsInspection */
7253 21
    for ($i = 0; $i < $max; $i++) {
7254 21
      $c1 = $str[$i];
7255
7256 21
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7257
7258 21
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7259
7260 19
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7261
7262 19
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7263 17
            $buf .= $c1 . $c2;
7264 17
            $i++;
7265 17
          } else { // not valid UTF8 - convert it
7266 7
            $buf .= self::to_utf8_convert($c1);
7267
          }
7268
7269 21
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7270
7271 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7272 20
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7273
7274 20
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7275 14
            $buf .= $c1 . $c2 . $c3;
7276 14
            $i += 2;
7277 14
          } else { // not valid UTF8 - convert it
7278 10
            $buf .= self::to_utf8_convert($c1);
7279
          }
7280
7281 21
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7282
7283 11
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7284 11
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7285 11
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7286
7287 11
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7288 4
            $buf .= $c1 . $c2 . $c3 . $c4;
7289 4
            $i += 3;
7290 4
          } else { // not valid UTF8 - convert it
7291 8
            $buf .= self::to_utf8_convert($c1);
7292
          }
7293
7294 11
        } else { // doesn't look like UTF8, but should be converted
7295 8
          $buf .= self::to_utf8_convert($c1);
7296
        }
7297
7298 21
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7299
7300 4
        $buf .= self::to_utf8_convert($c1);
7301
7302 4
      } else { // it doesn't need conversion
7303 19
        $buf .= $c1;
7304
      }
7305 21
    }
7306
7307
    // decode unicode escape sequences
7308 21
    $buf = preg_replace_callback(
7309 21
        '/\\\\u([0-9a-f]{4})/i',
7310 21
        function ($match) {
7311 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7312 21
        },
7313
        $buf
7314 21
    );
7315
7316
    // decode UTF-8 codepoints
7317 21
    if ($decodeHtmlEntityToUtf8 === true) {
7318 1
      $buf = self::html_entity_decode($buf);
7319 1
    }
7320
7321 21
    return $buf;
7322
  }
7323
7324
  /**
7325
   * @param int $int
7326
   *
7327
   * @return string
7328
   */
7329 15
  private static function to_utf8_convert($int)
7330
  {
7331 15
    $buf = '';
7332
7333 15
    $ordC1 = ord($int);
7334 15
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7335 1
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7336 1
    } else {
7337 15
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7338 15
      $cc2 = ($int & "\x3F") | "\x80";
7339 15
      $buf .= $cc1 . $cc2;
7340
    }
7341
7342 15
    return $buf;
7343
  }
7344
7345
  /**
7346
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7347
   *
7348
   * INFO: This is slower then "trim()"
7349
   *
7350
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7351
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7352
   *
7353
   * @param string $str   <p>The string to be trimmed</p>
7354
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7355
   *
7356
   * @return string <p>The trimmed string.</p>
7357
   */
7358 26
  public static function trim($str = '', $chars = INF)
7359
  {
7360 26
    $str = (string)$str;
7361
7362 26
    if (!isset($str[0])) {
7363 5
      return '';
7364
    }
7365
7366
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7367 22
    if ($chars === INF || !$chars) {
7368 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7369
    }
7370
7371 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7372
  }
7373
7374
  /**
7375
   * Makes string's first char uppercase.
7376
   *
7377
   * @param string  $str       <p>The input string.</p>
7378
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7379
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7380
   *
7381
   * @return string <p>The resulting string</p>
7382
   */
7383 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7384
  {
7385 14
    if ($cleanUtf8 === true) {
7386
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7387
      // if invalid characters are found in $haystack before $needle
7388
      $str = self::clean($str);
7389
    }
7390
7391 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7392 14
    if ($strPartTwo === false) {
7393
      $strPartTwo = '';
7394
    }
7395
7396 14
    $strPartOne = self::strtoupper(
7397 14
        (string)self::substr($str, 0, 1, $encoding),
7398 14
        $encoding,
7399
        $cleanUtf8
7400 14
    );
7401
7402 14
    return $strPartOne . $strPartTwo;
7403
  }
7404
7405
  /**
7406
   * alias for "UTF8::ucfirst()"
7407
   *
7408
   * @see UTF8::ucfirst()
7409
   *
7410
   * @param string  $word
7411
   * @param string  $encoding
7412
   * @param boolean $cleanUtf8
7413
   *
7414
   * @return string
7415
   */
7416 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7417
  {
7418 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7419
  }
7420
7421
  /**
7422
   * Uppercase for all words in the string.
7423
   *
7424
   * @param string   $str        <p>The input string.</p>
7425
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7426
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7427
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7428
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7429
   *
7430
   * @return string
7431
   */
7432 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7433
  {
7434 8
    if (!$str) {
7435 2
      return '';
7436
    }
7437
7438
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7439
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7440
7441 7
    if ($cleanUtf8 === true) {
7442
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7443
      // if invalid characters are found in $haystack before $needle
7444
      $str = self::clean($str);
7445
    }
7446
7447 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7448
7449
    if (
7450
        $usePhpDefaultFunctions === true
7451 7
        &&
7452 7
        self::is_ascii($str) === true
7453 7
    ) {
7454
      return ucwords($str);
7455
    }
7456
7457 7
    $words = self::str_to_words($str, $charlist);
7458 7
    $newWords = array();
7459
7460 7
    if (count($exceptions) > 0) {
7461 1
      $useExceptions = true;
7462 1
    } else {
7463 7
      $useExceptions = false;
7464
    }
7465
7466 7 View Code Duplication
    foreach ($words as $word) {
7467
7468 7
      if (!$word) {
7469 7
        continue;
7470
      }
7471
7472
      if (
7473
          $useExceptions === false
7474 7
          ||
7475
          (
7476
              $useExceptions === true
7477 1
              &&
7478 1
              !in_array($word, $exceptions, true)
7479 1
          )
7480 7
      ) {
7481 7
        $word = self::ucfirst($word, $encoding);
7482 7
      }
7483
7484 7
      $newWords[] = $word;
7485 7
    }
7486
7487 7
    return implode('', $newWords);
7488
  }
7489
7490
  /**
7491
   * Multi decode html entity & fix urlencoded-win1252-chars.
7492
   *
7493
   * e.g:
7494
   * 'test+test'                     => 'test test'
7495
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7496
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7497
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7498
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7499
   * 'Düsseldorf'                   => 'Düsseldorf'
7500
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7501
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7502
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7503
   *
7504
   * @param string $str          <p>The input string.</p>
7505
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7506
   *
7507
   * @return string
7508
   */
7509 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7510
  {
7511 1
    $str = (string)$str;
7512
7513 1
    if (!isset($str[0])) {
7514 1
      return '';
7515
    }
7516
7517 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7518 1
    if (preg_match($pattern, $str)) {
7519 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7520 1
    }
7521
7522 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7523
7524
    do {
7525 1
      $str_compare = $str;
7526
7527 1
      $str = self::fix_simple_utf8(
7528 1
          urldecode(
7529 1
              self::html_entity_decode(
7530 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7531
                  $flags
7532 1
              )
7533 1
          )
7534 1
      );
7535
7536 1
    } while ($multi_decode === true && $str_compare !== $str);
7537
7538 1
    return (string)$str;
7539
  }
7540
7541
  /**
7542
   * Return a array with "urlencoded"-win1252 -> UTF-8
7543
   *
7544
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7545
   *
7546
   * @return array
7547
   */
7548
  public static function urldecode_fix_win1252_chars()
7549
  {
7550
    return array(
7551
        '%20' => ' ',
7552
        '%21' => '!',
7553
        '%22' => '"',
7554
        '%23' => '#',
7555
        '%24' => '$',
7556
        '%25' => '%',
7557
        '%26' => '&',
7558
        '%27' => "'",
7559
        '%28' => '(',
7560
        '%29' => ')',
7561
        '%2A' => '*',
7562
        '%2B' => '+',
7563
        '%2C' => ',',
7564
        '%2D' => '-',
7565
        '%2E' => '.',
7566
        '%2F' => '/',
7567
        '%30' => '0',
7568
        '%31' => '1',
7569
        '%32' => '2',
7570
        '%33' => '3',
7571
        '%34' => '4',
7572
        '%35' => '5',
7573
        '%36' => '6',
7574
        '%37' => '7',
7575
        '%38' => '8',
7576
        '%39' => '9',
7577
        '%3A' => ':',
7578
        '%3B' => ';',
7579
        '%3C' => '<',
7580
        '%3D' => '=',
7581
        '%3E' => '>',
7582
        '%3F' => '?',
7583
        '%40' => '@',
7584
        '%41' => 'A',
7585
        '%42' => 'B',
7586
        '%43' => 'C',
7587
        '%44' => 'D',
7588
        '%45' => 'E',
7589
        '%46' => 'F',
7590
        '%47' => 'G',
7591
        '%48' => 'H',
7592
        '%49' => 'I',
7593
        '%4A' => 'J',
7594
        '%4B' => 'K',
7595
        '%4C' => 'L',
7596
        '%4D' => 'M',
7597
        '%4E' => 'N',
7598
        '%4F' => 'O',
7599
        '%50' => 'P',
7600
        '%51' => 'Q',
7601
        '%52' => 'R',
7602
        '%53' => 'S',
7603
        '%54' => 'T',
7604
        '%55' => 'U',
7605
        '%56' => 'V',
7606
        '%57' => 'W',
7607
        '%58' => 'X',
7608
        '%59' => 'Y',
7609
        '%5A' => 'Z',
7610
        '%5B' => '[',
7611
        '%5C' => '\\',
7612
        '%5D' => ']',
7613
        '%5E' => '^',
7614
        '%5F' => '_',
7615
        '%60' => '`',
7616
        '%61' => 'a',
7617
        '%62' => 'b',
7618
        '%63' => 'c',
7619
        '%64' => 'd',
7620
        '%65' => 'e',
7621
        '%66' => 'f',
7622
        '%67' => 'g',
7623
        '%68' => 'h',
7624
        '%69' => 'i',
7625
        '%6A' => 'j',
7626
        '%6B' => 'k',
7627
        '%6C' => 'l',
7628
        '%6D' => 'm',
7629
        '%6E' => 'n',
7630
        '%6F' => 'o',
7631
        '%70' => 'p',
7632
        '%71' => 'q',
7633
        '%72' => 'r',
7634
        '%73' => 's',
7635
        '%74' => 't',
7636
        '%75' => 'u',
7637
        '%76' => 'v',
7638
        '%77' => 'w',
7639
        '%78' => 'x',
7640
        '%79' => 'y',
7641
        '%7A' => 'z',
7642
        '%7B' => '{',
7643
        '%7C' => '|',
7644
        '%7D' => '}',
7645
        '%7E' => '~',
7646
        '%7F' => '',
7647
        '%80' => '`',
7648
        '%81' => '',
7649
        '%82' => '‚',
7650
        '%83' => 'ƒ',
7651
        '%84' => '„',
7652
        '%85' => '…',
7653
        '%86' => '†',
7654
        '%87' => '‡',
7655
        '%88' => 'ˆ',
7656
        '%89' => '‰',
7657
        '%8A' => 'Š',
7658
        '%8B' => '‹',
7659
        '%8C' => 'Œ',
7660
        '%8D' => '',
7661
        '%8E' => 'Ž',
7662
        '%8F' => '',
7663
        '%90' => '',
7664
        '%91' => '‘',
7665
        '%92' => '’',
7666
        '%93' => '“',
7667
        '%94' => '”',
7668
        '%95' => '•',
7669
        '%96' => '–',
7670
        '%97' => '—',
7671
        '%98' => '˜',
7672
        '%99' => '™',
7673
        '%9A' => 'š',
7674
        '%9B' => '›',
7675
        '%9C' => 'œ',
7676
        '%9D' => '',
7677
        '%9E' => 'ž',
7678
        '%9F' => 'Ÿ',
7679
        '%A0' => '',
7680
        '%A1' => '¡',
7681
        '%A2' => '¢',
7682
        '%A3' => '£',
7683
        '%A4' => '¤',
7684
        '%A5' => '¥',
7685
        '%A6' => '¦',
7686
        '%A7' => '§',
7687
        '%A8' => '¨',
7688
        '%A9' => '©',
7689
        '%AA' => 'ª',
7690
        '%AB' => '«',
7691
        '%AC' => '¬',
7692
        '%AD' => '',
7693
        '%AE' => '®',
7694
        '%AF' => '¯',
7695
        '%B0' => '°',
7696
        '%B1' => '±',
7697
        '%B2' => '²',
7698
        '%B3' => '³',
7699
        '%B4' => '´',
7700
        '%B5' => 'µ',
7701
        '%B6' => '¶',
7702
        '%B7' => '·',
7703
        '%B8' => '¸',
7704
        '%B9' => '¹',
7705
        '%BA' => 'º',
7706
        '%BB' => '»',
7707
        '%BC' => '¼',
7708
        '%BD' => '½',
7709
        '%BE' => '¾',
7710
        '%BF' => '¿',
7711
        '%C0' => 'À',
7712
        '%C1' => 'Á',
7713
        '%C2' => 'Â',
7714
        '%C3' => 'Ã',
7715
        '%C4' => 'Ä',
7716
        '%C5' => 'Å',
7717
        '%C6' => 'Æ',
7718
        '%C7' => 'Ç',
7719
        '%C8' => 'È',
7720
        '%C9' => 'É',
7721
        '%CA' => 'Ê',
7722
        '%CB' => 'Ë',
7723
        '%CC' => 'Ì',
7724
        '%CD' => 'Í',
7725
        '%CE' => 'Î',
7726
        '%CF' => 'Ï',
7727
        '%D0' => 'Ð',
7728
        '%D1' => 'Ñ',
7729
        '%D2' => 'Ò',
7730
        '%D3' => 'Ó',
7731
        '%D4' => 'Ô',
7732
        '%D5' => 'Õ',
7733
        '%D6' => 'Ö',
7734
        '%D7' => '×',
7735
        '%D8' => 'Ø',
7736
        '%D9' => 'Ù',
7737
        '%DA' => 'Ú',
7738
        '%DB' => 'Û',
7739
        '%DC' => 'Ü',
7740
        '%DD' => 'Ý',
7741
        '%DE' => 'Þ',
7742
        '%DF' => 'ß',
7743
        '%E0' => 'à',
7744
        '%E1' => 'á',
7745
        '%E2' => 'â',
7746
        '%E3' => 'ã',
7747
        '%E4' => 'ä',
7748
        '%E5' => 'å',
7749
        '%E6' => 'æ',
7750
        '%E7' => 'ç',
7751
        '%E8' => 'è',
7752
        '%E9' => 'é',
7753
        '%EA' => 'ê',
7754
        '%EB' => 'ë',
7755
        '%EC' => 'ì',
7756
        '%ED' => 'í',
7757
        '%EE' => 'î',
7758
        '%EF' => 'ï',
7759
        '%F0' => 'ð',
7760
        '%F1' => 'ñ',
7761
        '%F2' => 'ò',
7762
        '%F3' => 'ó',
7763
        '%F4' => 'ô',
7764
        '%F5' => 'õ',
7765
        '%F6' => 'ö',
7766
        '%F7' => '÷',
7767
        '%F8' => 'ø',
7768
        '%F9' => 'ù',
7769
        '%FA' => 'ú',
7770
        '%FB' => 'û',
7771
        '%FC' => 'ü',
7772
        '%FD' => 'ý',
7773
        '%FE' => 'þ',
7774
        '%FF' => 'ÿ',
7775
    );
7776
  }
7777
7778
  /**
7779
   * Decodes an UTF-8 string to ISO-8859-1.
7780
   *
7781
   * @param string $str <p>The input string.</p>
7782
   *
7783
   * @return string
7784
   */
7785 6
  public static function utf8_decode($str)
7786
  {
7787
    // init
7788 6
    $str = (string)$str;
7789
7790 6
    if (!isset($str[0])) {
7791 3
      return '';
7792
    }
7793
7794 6
    $str = (string)self::to_utf8($str);
7795
7796 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7797 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7798
7799 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7800 1
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7801 1
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7802 1
    }
7803
7804
    /** @noinspection PhpInternalEntityUsedInspection */
7805 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7806
7807 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7808
      self::checkForSupport();
7809
    }
7810
7811 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7812
      $len = \mb_strlen($str, '8BIT');
7813
    } else {
7814 6
      $len = strlen($str);
7815
    }
7816
7817
    /** @noinspection ForeachInvariantsInspection */
7818 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7819 6
      switch ($str[$i] & "\xF0") {
7820 6
        case "\xC0":
7821 6
        case "\xD0":
7822 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7823 6
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7824 6
          break;
7825
7826
        /** @noinspection PhpMissingBreakStatementInspection */
7827 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7828
          ++$i;
7829 6
        case "\xE0":
7830 4
          $str[$j] = '?';
7831 4
          $i += 2;
7832 4
          break;
7833
7834 6
        default:
7835 6
          $str[$j] = $str[$i];
7836 6
      }
7837 6
    }
7838
7839 6
    return (string)self::substr($str, 0, $j, '8BIT');
7840
  }
7841
7842
  /**
7843
   * Encodes an ISO-8859-1 string to UTF-8.
7844
   *
7845
   * @param string $str <p>The input string.</p>
7846
   *
7847
   * @return string
7848
   */
7849 7
  public static function utf8_encode($str)
7850
  {
7851
    // init
7852 7
    $str = (string)$str;
7853
7854 7
    if (!isset($str[0])) {
7855 7
      return '';
7856
    }
7857
7858 7
    $strTmp = \utf8_encode($str);
7859 7
    if ($strTmp === false) {
7860
      return '';
7861
    }
7862
7863 7
    $str = (string)$strTmp;
7864 7
    if (false === strpos($str, "\xC2")) {
7865 3
      return $str;
7866
    }
7867
7868 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7869 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7870
7871 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7872 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7873 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7874 1
    }
7875
7876 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7877
  }
7878
7879
  /**
7880
   * fix -> utf8-win1252 chars
7881
   *
7882
   * @param string $str <p>The input string.</p>
7883
   *
7884
   * @return string
7885
   *
7886
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7887
   */
7888
  public static function utf8_fix_win1252_chars($str)
7889
  {
7890
    return self::fix_simple_utf8($str);
7891
  }
7892
7893
  /**
7894
   * Returns an array with all utf8 whitespace characters.
7895
   *
7896
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7897
   *
7898
   * @author: Derek E. [email protected]
7899
   *
7900
   * @return array <p>
7901
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7902
   *               as defined in above URL.
7903
   *               </p>
7904
   */
7905 1
  public static function whitespace_table()
7906
  {
7907 1
    return self::$WHITESPACE_TABLE;
7908
  }
7909
7910
  /**
7911
   * Limit the number of words in a string.
7912
   *
7913
   * @param string $str      <p>The input string.</p>
7914
   * @param int    $limit    <p>The limit of words as integer.</p>
7915
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7916
   *
7917
   * @return string
7918
   */
7919 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7920
  {
7921 1
    $str = (string)$str;
7922
7923 1
    if (!isset($str[0])) {
7924 1
      return '';
7925
    }
7926
7927
    // init
7928 1
    $limit = (int)$limit;
7929
7930 1
    if ($limit < 1) {
7931 1
      return '';
7932
    }
7933
7934 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7935
7936
    if (
7937 1
        !isset($matches[0])
7938 1
        ||
7939 1
        self::strlen($str) === self::strlen($matches[0])
7940 1
    ) {
7941 1
      return $str;
7942
    }
7943
7944 1
    return self::rtrim($matches[0]) . $strAddOn;
7945
  }
7946
7947
  /**
7948
   * Wraps a string to a given number of characters
7949
   *
7950
   * @link  http://php.net/manual/en/function.wordwrap.php
7951
   *
7952
   * @param string $str   <p>The input string.</p>
7953
   * @param int    $width [optional] <p>The column width.</p>
7954
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7955
   * @param bool   $cut   [optional] <p>
7956
   *                      If the cut is set to true, the string is
7957
   *                      always wrapped at or before the specified width. So if you have
7958
   *                      a word that is larger than the given width, it is broken apart.
7959
   *                      </p>
7960
   *
7961
   * @return string <p>The given string wrapped at the specified column.</p>
7962
   */
7963 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7964
  {
7965 10
    $str = (string)$str;
7966 10
    $break = (string)$break;
7967
7968 10
    if (!isset($str[0], $break[0])) {
7969 3
      return '';
7970
    }
7971
7972 8
    $w = '';
7973 8
    $strSplit = explode($break, $str);
7974 8
    $count = count($strSplit);
7975
7976 8
    $chars = array();
7977
    /** @noinspection ForeachInvariantsInspection */
7978 8
    for ($i = 0; $i < $count; ++$i) {
7979
7980 8
      if ($i) {
7981 1
        $chars[] = $break;
7982 1
        $w .= '#';
7983 1
      }
7984
7985 8
      $c = $strSplit[$i];
7986 8
      unset($strSplit[$i]);
7987
7988 8
      foreach (self::split($c) as $c) {
7989 8
        $chars[] = $c;
7990 8
        $w .= ' ' === $c ? ' ' : '?';
7991 8
      }
7992 8
    }
7993
7994 8
    $strReturn = '';
7995 8
    $j = 0;
7996 8
    $b = $i = -1;
7997 8
    $w = wordwrap($w, $width, '#', $cut);
7998
7999 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8000 6
      for (++$i; $i < $b; ++$i) {
8001 6
        $strReturn .= $chars[$j];
8002 6
        unset($chars[$j++]);
8003 6
      }
8004
8005 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8006 3
        unset($chars[$j++]);
8007 3
      }
8008
8009 6
      $strReturn .= $break;
8010 6
    }
8011
8012 8
    return $strReturn . implode('', $chars);
8013
  }
8014
8015
  /**
8016
   * Returns an array of Unicode White Space characters.
8017
   *
8018
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
8019
   */
8020 1
  public static function ws()
8021
  {
8022 1
    return self::$WHITESPACE;
8023
  }
8024
8025
}
8026