Completed
Push — master ( 3f0b0b...cb7a01 )
by Lars
10:06
created

UTF8::strip_whitespace()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 5
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 5
cts 5
cp 1
rs 9.4285
c 0
b 0
f 0
cc 2
eloc 5
nc 2
nop 1
crap 2
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
  );
48
49
  /**
50
   * @var array
51
   */
52
  private static $CP1252_TO_UTF8 = array(
53
      '€' => '€',
54
      '‚' => '‚',
55
      'ƒ' => 'ƒ',
56
      '„' => '„',
57
      '…' => '…',
58
      '†' => '†',
59
      '‡' => '‡',
60
      'ˆ' => 'ˆ',
61
      '‰' => '‰',
62
      'Š' => 'Š',
63
      '‹' => '‹',
64
      'Œ' => 'Œ',
65
      'Ž' => 'Ž',
66
      '‘' => '‘',
67
      '’' => '’',
68
      '“' => '“',
69
      '”' => '”',
70
      '•' => '•',
71
      '–' => '–',
72
      '—' => '—',
73
      '˜' => '˜',
74
      '™' => '™',
75
      'š' => 'š',
76
      '›' => '›',
77
      'œ' => 'œ',
78
      'ž' => 'ž',
79
      'Ÿ' => 'Ÿ',
80
  );
81
82
  /**
83
   * Bom => Byte-Length
84
   *
85
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
86
   *
87
   * @var array
88
   */
89
  private static $BOM = array(
90
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
91
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
92
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
93
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
94
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
95
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
96
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
97
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
98
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
99
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
100
  );
101
102
  /**
103
   * Numeric code point => UTF-8 Character
104
   *
105
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
106
   *
107
   * @var array
108
   */
109
  private static $WHITESPACE = array(
110
    // NUL Byte
111
    0     => "\x0",
112
    // Tab
113
    9     => "\x9",
114
    // New Line
115
    10    => "\xa",
116
    // Vertical Tab
117
    11    => "\xb",
118
    // Carriage Return
119
    13    => "\xd",
120
    // Ordinary Space
121
    32    => "\x20",
122
    // NO-BREAK SPACE
123
    160   => "\xc2\xa0",
124
    // OGHAM SPACE MARK
125
    5760  => "\xe1\x9a\x80",
126
    // MONGOLIAN VOWEL SEPARATOR
127
    6158  => "\xe1\xa0\x8e",
128
    // EN QUAD
129
    8192  => "\xe2\x80\x80",
130
    // EM QUAD
131
    8193  => "\xe2\x80\x81",
132
    // EN SPACE
133
    8194  => "\xe2\x80\x82",
134
    // EM SPACE
135
    8195  => "\xe2\x80\x83",
136
    // THREE-PER-EM SPACE
137
    8196  => "\xe2\x80\x84",
138
    // FOUR-PER-EM SPACE
139
    8197  => "\xe2\x80\x85",
140
    // SIX-PER-EM SPACE
141
    8198  => "\xe2\x80\x86",
142
    // FIGURE SPACE
143
    8199  => "\xe2\x80\x87",
144
    // PUNCTUATION SPACE
145
    8200  => "\xe2\x80\x88",
146
    // THIN SPACE
147
    8201  => "\xe2\x80\x89",
148
    //HAIR SPACE
149
    8202  => "\xe2\x80\x8a",
150
    // LINE SEPARATOR
151
    8232  => "\xe2\x80\xa8",
152
    // PARAGRAPH SEPARATOR
153
    8233  => "\xe2\x80\xa9",
154
    // NARROW NO-BREAK SPACE
155
    8239  => "\xe2\x80\xaf",
156
    // MEDIUM MATHEMATICAL SPACE
157
    8287  => "\xe2\x81\x9f",
158
    // IDEOGRAPHIC SPACE
159
    12288 => "\xe3\x80\x80",
160
  );
161
162
  /**
163
   * @var array
164
   */
165
  private static $WHITESPACE_TABLE = array(
166
      'SPACE'                     => "\x20",
167
      'NO-BREAK SPACE'            => "\xc2\xa0",
168
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
169
      'EN QUAD'                   => "\xe2\x80\x80",
170
      'EM QUAD'                   => "\xe2\x80\x81",
171
      'EN SPACE'                  => "\xe2\x80\x82",
172
      'EM SPACE'                  => "\xe2\x80\x83",
173
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
174
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
175
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
176
      'FIGURE SPACE'              => "\xe2\x80\x87",
177
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
178
      'THIN SPACE'                => "\xe2\x80\x89",
179
      'HAIR SPACE'                => "\xe2\x80\x8a",
180
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
181
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
182
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
183
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
184
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
185
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
186
  );
187
188
  /**
189
   * bidirectional text chars
190
   *
191
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
192
   *
193
   * @var array
194
   */
195
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
196
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
197
    8234 => "\xE2\x80\xAA",
198
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
199
    8235 => "\xE2\x80\xAB",
200
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
201
    8236 => "\xE2\x80\xAC",
202
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
203
    8237 => "\xE2\x80\xAD",
204
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
205
    8238 => "\xE2\x80\xAE",
206
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
207
    8294 => "\xE2\x81\xA6",
208
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
209
    8295 => "\xE2\x81\xA7",
210
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
211
    8296 => "\xE2\x81\xA8",
212
    // POP DIRECTIONAL ISOLATE
213
    8297 => "\xE2\x81\xA9",
214
  );
215
216
  /**
217
   * @var array
218
   */
219
  private static $COMMON_CASE_FOLD = array(
220
      'ſ'            => 's',
221
      "\xCD\x85"     => 'ι',
222
      'ς'            => 'σ',
223
      "\xCF\x90"     => 'β',
224
      "\xCF\x91"     => 'θ',
225
      "\xCF\x95"     => 'φ',
226
      "\xCF\x96"     => 'π',
227
      "\xCF\xB0"     => 'κ',
228
      "\xCF\xB1"     => 'ρ',
229
      "\xCF\xB5"     => 'ε',
230
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
231
      "\xE1\xBE\xBE" => 'ι',
232
  );
233
234
  /**
235
   * @var array
236
   */
237
  private static $BROKEN_UTF8_FIX = array(
238
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
239
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
240
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
241
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
242
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
243
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
244
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
245
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
246
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
247
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
248
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
249
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
250
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
251
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
252
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
253
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
254
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
255
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
256
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
257
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
258
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
259
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
260
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
261
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
262
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
263
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
264
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
265
      'ü'       => 'ü',
266
      'ä'       => 'ä',
267
      'ö'       => 'ö',
268
      'Ö'       => 'Ö',
269
      'ß'       => 'ß',
270
      'Ã '       => 'à',
271
      'á'       => 'á',
272
      'â'       => 'â',
273
      'ã'       => 'ã',
274
      'ù'       => 'ù',
275
      'ú'       => 'ú',
276
      'û'       => 'û',
277
      'Ù'       => 'Ù',
278
      'Ú'       => 'Ú',
279
      'Û'       => 'Û',
280
      'Ü'       => 'Ü',
281
      'ò'       => 'ò',
282
      'ó'       => 'ó',
283
      'ô'       => 'ô',
284
      'è'       => 'è',
285
      'é'       => 'é',
286
      'ê'       => 'ê',
287
      'ë'       => 'ë',
288
      'À'       => 'À',
289
      'Á'       => 'Á',
290
      'Â'       => 'Â',
291
      'Ã'       => 'Ã',
292
      'Ä'       => 'Ä',
293
      'Ã…'       => 'Å',
294
      'Ç'       => 'Ç',
295
      'È'       => 'È',
296
      'É'       => 'É',
297
      'Ê'       => 'Ê',
298
      'Ë'       => 'Ë',
299
      'ÃŒ'       => 'Ì',
300
      'Í'       => 'Í',
301
      'ÃŽ'       => 'Î',
302
      'Ï'       => 'Ï',
303
      'Ñ'       => 'Ñ',
304
      'Ã’'       => 'Ò',
305
      'Ó'       => 'Ó',
306
      'Ô'       => 'Ô',
307
      'Õ'       => 'Õ',
308
      'Ø'       => 'Ø',
309
      'Ã¥'       => 'å',
310
      'æ'       => 'æ',
311
      'ç'       => 'ç',
312
      'ì'       => 'ì',
313
      'í'       => 'í',
314
      'î'       => 'î',
315
      'ï'       => 'ï',
316
      'ð'       => 'ð',
317
      'ñ'       => 'ñ',
318
      'õ'       => 'õ',
319
      'ø'       => 'ø',
320
      'ý'       => 'ý',
321
      'ÿ'       => 'ÿ',
322
      '€'      => '€',
323
      '’'      => '’',
324
  );
325
326
  /**
327
   * @var array
328
   */
329
  private static $UTF8_TO_WIN1252 = array(
330
      "\xe2\x82\xac" => "\x80", // EURO SIGN
331
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
332
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
333
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
334
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
335
      "\xe2\x80\xa0" => "\x86", // DAGGER
336
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
337
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
338
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
339
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
340
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
341
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
342
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
343
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
344
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
345
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
346
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
347
      "\xe2\x80\xa2" => "\x95", // BULLET
348
      "\xe2\x80\x93" => "\x96", // EN DASH
349
      "\xe2\x80\x94" => "\x97", // EM DASH
350
      "\xcb\x9c"     => "\x98", // SMALL TILDE
351
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
352
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
353
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
354
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
355
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
356
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
357
  );
358
359
  /**
360
   * @var array
361
   */
362
  private static $UTF8_MSWORD = array(
363
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
364
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
365
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
366
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
367
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
368
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
369
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
370
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
371
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
372
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
373
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
374
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
375
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
376
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
377
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
378
  );
379
380
  /**
381
   * @var array
382
   */
383
  private static $ICONV_ENCODING = array(
384
      'ANSI_X3.4-1968',
385
      'ANSI_X3.4-1986',
386
      'ASCII',
387
      'CP367',
388
      'IBM367',
389
      'ISO-IR-6',
390
      'ISO646-US',
391
      'ISO_646.IRV:1991',
392
      'US',
393
      'US-ASCII',
394
      'CSASCII',
395
      'UTF-8',
396
      'ISO-10646-UCS-2',
397
      'UCS-2',
398
      'CSUNICODE',
399
      'UCS-2BE',
400
      'UNICODE-1-1',
401
      'UNICODEBIG',
402
      'CSUNICODE11',
403
      'UCS-2LE',
404
      'UNICODELITTLE',
405
      'ISO-10646-UCS-4',
406
      'UCS-4',
407
      'CSUCS4',
408
      'UCS-4BE',
409
      'UCS-4LE',
410
      'UTF-16',
411
      'UTF-16BE',
412
      'UTF-16LE',
413
      'UTF-32',
414
      'UTF-32BE',
415
      'UTF-32LE',
416
      'UNICODE-1-1-UTF-7',
417
      'UTF-7',
418
      'CSUNICODE11UTF7',
419
      'UCS-2-INTERNAL',
420
      'UCS-2-SWAPPED',
421
      'UCS-4-INTERNAL',
422
      'UCS-4-SWAPPED',
423
      'C99',
424
      'JAVA',
425
      'CP819',
426
      'IBM819',
427
      'ISO-8859-1',
428
      'ISO-IR-100',
429
      'ISO8859-1',
430
      'ISO_8859-1',
431
      'ISO_8859-1:1987',
432
      'L1',
433
      'LATIN1',
434
      'CSISOLATIN1',
435
      'ISO-8859-2',
436
      'ISO-IR-101',
437
      'ISO8859-2',
438
      'ISO_8859-2',
439
      'ISO_8859-2:1987',
440
      'L2',
441
      'LATIN2',
442
      'CSISOLATIN2',
443
      'ISO-8859-3',
444
      'ISO-IR-109',
445
      'ISO8859-3',
446
      'ISO_8859-3',
447
      'ISO_8859-3:1988',
448
      'L3',
449
      'LATIN3',
450
      'CSISOLATIN3',
451
      'ISO-8859-4',
452
      'ISO-IR-110',
453
      'ISO8859-4',
454
      'ISO_8859-4',
455
      'ISO_8859-4:1988',
456
      'L4',
457
      'LATIN4',
458
      'CSISOLATIN4',
459
      'CYRILLIC',
460
      'ISO-8859-5',
461
      'ISO-IR-144',
462
      'ISO8859-5',
463
      'ISO_8859-5',
464
      'ISO_8859-5:1988',
465
      'CSISOLATINCYRILLIC',
466
      'ARABIC',
467
      'ASMO-708',
468
      'ECMA-114',
469
      'ISO-8859-6',
470
      'ISO-IR-127',
471
      'ISO8859-6',
472
      'ISO_8859-6',
473
      'ISO_8859-6:1987',
474
      'CSISOLATINARABIC',
475
      'ECMA-118',
476
      'ELOT_928',
477
      'GREEK',
478
      'GREEK8',
479
      'ISO-8859-7',
480
      'ISO-IR-126',
481
      'ISO8859-7',
482
      'ISO_8859-7',
483
      'ISO_8859-7:1987',
484
      'ISO_8859-7:2003',
485
      'CSISOLATINGREEK',
486
      'HEBREW',
487
      'ISO-8859-8',
488
      'ISO-IR-138',
489
      'ISO8859-8',
490
      'ISO_8859-8',
491
      'ISO_8859-8:1988',
492
      'CSISOLATINHEBREW',
493
      'ISO-8859-9',
494
      'ISO-IR-148',
495
      'ISO8859-9',
496
      'ISO_8859-9',
497
      'ISO_8859-9:1989',
498
      'L5',
499
      'LATIN5',
500
      'CSISOLATIN5',
501
      'ISO-8859-10',
502
      'ISO-IR-157',
503
      'ISO8859-10',
504
      'ISO_8859-10',
505
      'ISO_8859-10:1992',
506
      'L6',
507
      'LATIN6',
508
      'CSISOLATIN6',
509
      'ISO-8859-11',
510
      'ISO8859-11',
511
      'ISO_8859-11',
512
      'ISO-8859-13',
513
      'ISO-IR-179',
514
      'ISO8859-13',
515
      'ISO_8859-13',
516
      'L7',
517
      'LATIN7',
518
      'ISO-8859-14',
519
      'ISO-CELTIC',
520
      'ISO-IR-199',
521
      'ISO8859-14',
522
      'ISO_8859-14',
523
      'ISO_8859-14:1998',
524
      'L8',
525
      'LATIN8',
526
      'ISO-8859-15',
527
      'ISO-IR-203',
528
      'ISO8859-15',
529
      'ISO_8859-15',
530
      'ISO_8859-15:1998',
531
      'LATIN-9',
532
      'ISO-8859-16',
533
      'ISO-IR-226',
534
      'ISO8859-16',
535
      'ISO_8859-16',
536
      'ISO_8859-16:2001',
537
      'L10',
538
      'LATIN10',
539
      'KOI8-R',
540
      'CSKOI8R',
541
      'KOI8-U',
542
      'KOI8-RU',
543
      'CP1250',
544
      'MS-EE',
545
      'WINDOWS-1250',
546
      'CP1251',
547
      'MS-CYRL',
548
      'WINDOWS-1251',
549
      'CP1252',
550
      'MS-ANSI',
551
      'WINDOWS-1252',
552
      'CP1253',
553
      'MS-GREEK',
554
      'WINDOWS-1253',
555
      'CP1254',
556
      'MS-TURK',
557
      'WINDOWS-1254',
558
      'CP1255',
559
      'MS-HEBR',
560
      'WINDOWS-1255',
561
      'CP1256',
562
      'MS-ARAB',
563
      'WINDOWS-1256',
564
      'CP1257',
565
      'WINBALTRIM',
566
      'WINDOWS-1257',
567
      'CP1258',
568
      'WINDOWS-1258',
569
      '850',
570
      'CP850',
571
      'IBM850',
572
      'CSPC850MULTILINGUAL',
573
      '862',
574
      'CP862',
575
      'IBM862',
576
      'CSPC862LATINHEBREW',
577
      '866',
578
      'CP866',
579
      'IBM866',
580
      'CSIBM866',
581
      'MAC',
582
      'MACINTOSH',
583
      'MACROMAN',
584
      'CSMACINTOSH',
585
      'MACCENTRALEUROPE',
586
      'MACICELAND',
587
      'MACCROATIAN',
588
      'MACROMANIA',
589
      'MACCYRILLIC',
590
      'MACUKRAINE',
591
      'MACGREEK',
592
      'MACTURKISH',
593
      'MACHEBREW',
594
      'MACARABIC',
595
      'MACTHAI',
596
      'HP-ROMAN8',
597
      'R8',
598
      'ROMAN8',
599
      'CSHPROMAN8',
600
      'NEXTSTEP',
601
      'ARMSCII-8',
602
      'GEORGIAN-ACADEMY',
603
      'GEORGIAN-PS',
604
      'KOI8-T',
605
      'CP154',
606
      'CYRILLIC-ASIAN',
607
      'PT154',
608
      'PTCP154',
609
      'CSPTCP154',
610
      'KZ-1048',
611
      'RK1048',
612
      'STRK1048-2002',
613
      'CSKZ1048',
614
      'MULELAO-1',
615
      'CP1133',
616
      'IBM-CP1133',
617
      'ISO-IR-166',
618
      'TIS-620',
619
      'TIS620',
620
      'TIS620-0',
621
      'TIS620.2529-1',
622
      'TIS620.2533-0',
623
      'TIS620.2533-1',
624
      'CP874',
625
      'WINDOWS-874',
626
      'VISCII',
627
      'VISCII1.1-1',
628
      'CSVISCII',
629
      'TCVN',
630
      'TCVN-5712',
631
      'TCVN5712-1',
632
      'TCVN5712-1:1993',
633
      'ISO-IR-14',
634
      'ISO646-JP',
635
      'JIS_C6220-1969-RO',
636
      'JP',
637
      'CSISO14JISC6220RO',
638
      'JISX0201-1976',
639
      'JIS_X0201',
640
      'X0201',
641
      'CSHALFWIDTHKATAKANA',
642
      'ISO-IR-87',
643
      'JIS0208',
644
      'JIS_C6226-1983',
645
      'JIS_X0208',
646
      'JIS_X0208-1983',
647
      'JIS_X0208-1990',
648
      'X0208',
649
      'CSISO87JISX0208',
650
      'ISO-IR-159',
651
      'JIS_X0212',
652
      'JIS_X0212-1990',
653
      'JIS_X0212.1990-0',
654
      'X0212',
655
      'CSISO159JISX02121990',
656
      'CN',
657
      'GB_1988-80',
658
      'ISO-IR-57',
659
      'ISO646-CN',
660
      'CSISO57GB1988',
661
      'CHINESE',
662
      'GB_2312-80',
663
      'ISO-IR-58',
664
      'CSISO58GB231280',
665
      'CN-GB-ISOIR165',
666
      'ISO-IR-165',
667
      'ISO-IR-149',
668
      'KOREAN',
669
      'KSC_5601',
670
      'KS_C_5601-1987',
671
      'KS_C_5601-1989',
672
      'CSKSC56011987',
673
      'EUC-JP',
674
      'EUCJP',
675
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
676
      'CSEUCPKDFMTJAPANESE',
677
      'MS_KANJI',
678
      'SHIFT-JIS',
679
      'SHIFT_JIS',
680
      'SJIS',
681
      'CSSHIFTJIS',
682
      'CP932',
683
      'ISO-2022-JP',
684
      'CSISO2022JP',
685
      'ISO-2022-JP-1',
686
      'ISO-2022-JP-2',
687
      'CSISO2022JP2',
688
      'CN-GB',
689
      'EUC-CN',
690
      'EUCCN',
691
      'GB2312',
692
      'CSGB2312',
693
      'GBK',
694
      'CP936',
695
      'MS936',
696
      'WINDOWS-936',
697
      'GB18030',
698
      'ISO-2022-CN',
699
      'CSISO2022CN',
700
      'ISO-2022-CN-EXT',
701
      'HZ',
702
      'HZ-GB-2312',
703
      'EUC-TW',
704
      'EUCTW',
705
      'CSEUCTW',
706
      'BIG-5',
707
      'BIG-FIVE',
708
      'BIG5',
709
      'BIGFIVE',
710
      'CN-BIG5',
711
      'CSBIG5',
712
      'CP950',
713
      'BIG5-HKSCS:1999',
714
      'BIG5-HKSCS:2001',
715
      'BIG5-HKSCS',
716
      'BIG5-HKSCS:2004',
717
      'BIG5HKSCS',
718
      'EUC-KR',
719
      'EUCKR',
720
      'CSEUCKR',
721
      'CP949',
722
      'UHC',
723
      'CP1361',
724
      'JOHAB',
725
      'ISO-2022-KR',
726
      'CSISO2022KR',
727
      'CP856',
728
      'CP922',
729
      'CP943',
730
      'CP1046',
731
      'CP1124',
732
      'CP1129',
733
      'CP1161',
734
      'IBM-1161',
735
      'IBM1161',
736
      'CSIBM1161',
737
      'CP1162',
738
      'IBM-1162',
739
      'IBM1162',
740
      'CSIBM1162',
741
      'CP1163',
742
      'IBM-1163',
743
      'IBM1163',
744
      'CSIBM1163',
745
      'DEC-KANJI',
746
      'DEC-HANYU',
747
      '437',
748
      'CP437',
749
      'IBM437',
750
      'CSPC8CODEPAGE437',
751
      'CP737',
752
      'CP775',
753
      'IBM775',
754
      'CSPC775BALTIC',
755
      '852',
756
      'CP852',
757
      'IBM852',
758
      'CSPCP852',
759
      'CP853',
760
      '855',
761
      'CP855',
762
      'IBM855',
763
      'CSIBM855',
764
      '857',
765
      'CP857',
766
      'IBM857',
767
      'CSIBM857',
768
      'CP858',
769
      '860',
770
      'CP860',
771
      'IBM860',
772
      'CSIBM860',
773
      '861',
774
      'CP-IS',
775
      'CP861',
776
      'IBM861',
777
      'CSIBM861',
778
      '863',
779
      'CP863',
780
      'IBM863',
781
      'CSIBM863',
782
      'CP864',
783
      'IBM864',
784
      'CSIBM864',
785
      '865',
786
      'CP865',
787
      'IBM865',
788
      'CSIBM865',
789
      '869',
790
      'CP-GR',
791
      'CP869',
792
      'IBM869',
793
      'CSIBM869',
794
      'CP1125',
795
      'EUC-JISX0213',
796
      'SHIFT_JISX0213',
797
      'ISO-2022-JP-3',
798
      'BIG5-2003',
799
      'ISO-IR-230',
800
      'TDS565',
801
      'ATARI',
802
      'ATARIST',
803
      'RISCOS-LATIN1',
804
  );
805
806
  /**
807
   * @var array
808
   */
809
  private static $SUPPORT = array();
810
811
  /**
812
   * __construct()
813
   */
814 1
  public function __construct()
815
  {
816 1
    self::checkForSupport();
817 1
  }
818
819
  /**
820
   * Return the character at the specified position: $str[1] like functionality.
821
   *
822
   * @param string $str <p>A UTF-8 string.</p>
823
   * @param int    $pos <p>The position of character to return.</p>
824
   *
825
   * @return string <p>Single Multi-Byte character.</p>
826
   */
827 3
  public static function access($str, $pos)
828
  {
829 3
    $str = (string)$str;
830
831 3
    if (!isset($str[0])) {
832 1
      return '';
833
    }
834
835 3
    $pos = (int)$pos;
836
837 3
    if ($pos < 0) {
838
      return '';
839
    }
840
841 3
    return (string)self::substr($str, $pos, 1);
842
  }
843
844
  /**
845
   * Prepends UTF-8 BOM character to the string and returns the whole string.
846
   *
847
   * INFO: If BOM already existed there, the Input string is returned.
848
   *
849
   * @param string $str <p>The input string.</p>
850
   *
851
   * @return string <p>The output string that contains BOM.</p>
852
   */
853 1
  public static function add_bom_to_string($str)
854
  {
855 1
    if (self::string_has_bom($str) === false) {
856 1
      $str = self::bom() . $str;
857 1
    }
858
859 1
    return $str;
860
  }
861
862
  /**
863
   * Convert binary into an string.
864
   *
865
   * @param mixed $bin 1|0
866
   *
867
   * @return string
868
   */
869 1
  public static function binary_to_str($bin)
870
  {
871 1
    if (!isset($bin[0])) {
872
      return '';
873
    }
874
875 1
    return pack('H*', base_convert($bin, 2, 16));
876
  }
877
878
  /**
879
   * Returns the UTF-8 Byte Order Mark Character.
880
   *
881
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
882
   *
883
   * @return string UTF-8 Byte Order Mark
884
   */
885 2
  public static function bom()
886
  {
887 2
    return "\xef\xbb\xbf";
888
  }
889
890
  /**
891
   * @alias of UTF8::chr_map()
892
   *
893
   * @see   UTF8::chr_map()
894
   *
895
   * @param string|array $callback
896
   * @param string       $str
897
   *
898
   * @return array
899
   */
900 1
  public static function callback($callback, $str)
901
  {
902 1
    return self::chr_map($callback, $str);
903
  }
904
905
  /**
906
   * This method will auto-detect your server environment for UTF-8 support.
907
   *
908
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
909
   */
910 4
  public static function checkForSupport()
911
  {
912 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
913
914 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
915
916
      // http://php.net/manual/en/book.mbstring.php
917 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
918
919
      if (
920 1
          defined('MB_OVERLOAD_STRING')
921 1
          &&
922 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
923 1
      ) {
924
        self::$SUPPORT['mbstring_func_overload'] = true;
925
      } else {
926 1
        self::$SUPPORT['mbstring_func_overload'] = false;
927
      }
928
929
      // http://php.net/manual/en/book.iconv.php
930 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
931
932
      // http://php.net/manual/en/book.intl.php
933 1
      self::$SUPPORT['intl'] = self::intl_loaded();
934 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
935
      if (
936 1
          self::$SUPPORT['intl'] === true
937 1
          &&
938 1
          function_exists('transliterator_list_ids') === true
939 1
      ) {
940 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
941 1
      }
942
943
      // http://php.net/manual/en/class.intlchar.php
944 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
945
946
      // http://php.net/manual/en/book.pcre.php
947 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
948 1
    }
949 4
  }
950
951
  /**
952
   * Generates a UTF-8 encoded character from the given code point.
953
   *
954
   * INFO: opposite to UTF8::ord()
955
   *
956
   * @param int    $code_point <p>The code point for which to generate a character.</p>
957
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
958
   *
959
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
960
   */
961 9
  public static function chr($code_point, $encoding = 'UTF-8')
962
  {
963 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
964
      self::checkForSupport();
965
    }
966
967 9
    if ($encoding !== 'UTF-8') {
968 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
969 9
    } elseif (self::$SUPPORT['intlChar'] === true) {
970
      return \IntlChar::chr($code_point);
971
    }
972
973
    // check type of code_point, only if there is no support for "\IntlChar"
974 9
    $i = (int)$code_point;
975 9
    if ($i !== $code_point) {
976 1
      return null;
977
    }
978
979
    // use static cache, only if there is no support for "\IntlChar"
980 9
    static $CHAR_CACHE = array();
981 9
    $cacheKey = $code_point . $encoding;
982 9
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
983 8
      return $CHAR_CACHE[$cacheKey];
984
    }
985
986 8
    if ($code_point <= 0x7F) {
987 6
      $str = self::chr_and_parse_int($code_point);
988 8
    } elseif ($code_point <= 0x7FF) {
989 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
990 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
991 7
    } elseif ($code_point <= 0xFFFF) {
992 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
993 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
994 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
995 7
    } else {
996 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
997 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
998 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
999 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1000
    }
1001
1002 8
    if ($encoding !== 'UTF-8') {
1003 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1004 1
    }
1005
1006
    // add into static cache
1007 8
    $CHAR_CACHE[$cacheKey] = $str;
1008
1009 8
    return $str;
1010
  }
1011
1012
  /**
1013
   * @param int $int
1014
   *
1015
   * @return string
1016
   */
1017 24
  private static function chr_and_parse_int($int)
1018
  {
1019 24
    return chr((int)$int);
1020
  }
1021
1022
  /**
1023
   * Applies callback to all characters of a string.
1024
   *
1025
   * @param string|array $callback <p>The callback function.</p>
1026
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1027
   *
1028
   * @return array <p>The outcome of callback.</p>
1029
   */
1030 1
  public static function chr_map($callback, $str)
1031
  {
1032 1
    $chars = self::split($str);
1033
1034 1
    return array_map($callback, $chars);
1035
  }
1036
1037
  /**
1038
   * Generates an array of byte length of each character of a Unicode string.
1039
   *
1040
   * 1 byte => U+0000  - U+007F
1041
   * 2 byte => U+0080  - U+07FF
1042
   * 3 byte => U+0800  - U+FFFF
1043
   * 4 byte => U+10000 - U+10FFFF
1044
   *
1045
   * @param string $str <p>The original Unicode string.</p>
1046
   *
1047
   * @return array <p>An array of byte lengths of each character.</p>
1048
   */
1049 4
  public static function chr_size_list($str)
1050
  {
1051 4
    $str = (string)$str;
1052
1053 4
    if (!isset($str[0])) {
1054 3
      return array();
1055
    }
1056
1057 4
    return array_map(
1058
        function ($data) {
1059 4
          return UTF8::strlen($data, '8BIT');
1060 4
        },
1061 4
        self::split($str)
1062 4
    );
1063
  }
1064
1065
  /**
1066
   * Get a decimal code representation of a specific character.
1067
   *
1068
   * @param string $char <p>The input character.</p>
1069
   *
1070
   * @return int
1071
   */
1072 2
  public static function chr_to_decimal($char)
1073
  {
1074 2
    $char = (string)$char;
1075 2
    $code = self::ord($char[0]);
1076 2
    $bytes = 1;
1077
1078 2
    if (!($code & 0x80)) {
1079
      // 0xxxxxxx
1080 2
      return $code;
1081
    }
1082
1083 2
    if (($code & 0xe0) === 0xc0) {
1084
      // 110xxxxx
1085 2
      $bytes = 2;
1086 2
      $code &= ~0xc0;
1087 2
    } elseif (($code & 0xf0) === 0xe0) {
1088
      // 1110xxxx
1089 2
      $bytes = 3;
1090 2
      $code &= ~0xe0;
1091 2
    } elseif (($code & 0xf8) === 0xf0) {
1092
      // 11110xxx
1093 1
      $bytes = 4;
1094 1
      $code &= ~0xf0;
1095 1
    }
1096
1097 2
    for ($i = 2; $i <= $bytes; $i++) {
1098
      // 10xxxxxx
1099 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1100 2
    }
1101
1102 2
    return $code;
1103
  }
1104
1105
  /**
1106
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1107
   *
1108
   * @param string $char <p>The input character</p>
1109
   * @param string $pfix [optional]
1110
   *
1111
   * @return string <p>The code point encoded as U+xxxx<p>
1112
   */
1113 1
  public static function chr_to_hex($char, $pfix = 'U+')
1114
  {
1115 1
    $char = (string)$char;
1116
1117 1
    if (!isset($char[0])) {
1118 1
      return '';
1119
    }
1120
1121 1
    if ($char === '&#0;') {
1122 1
      $char = '';
1123 1
    }
1124
1125 1
    return self::int_to_hex(self::ord($char), $pfix);
1126
  }
1127
1128
  /**
1129
   * alias for "UTF8::chr_to_decimal()"
1130
   *
1131
   * @see UTF8::chr_to_decimal()
1132
   *
1133
   * @param string $chr
1134
   *
1135
   * @return int
1136
   */
1137 1
  public static function chr_to_int($chr)
1138
  {
1139 1
    return self::chr_to_decimal($chr);
1140
  }
1141
1142
  /**
1143
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1144
   *
1145
   * @param string $body     <p>The original string to be split.</p>
1146
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1147
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1148
   *
1149
   * @return string <p>The chunked string</p>
1150
   */
1151 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1152
  {
1153 1
    return implode($end, self::split($body, $chunklen));
1154
  }
1155
1156
  /**
1157
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1158
   *
1159
   * @param string $str                     <p>The string to be sanitized.</p>
1160
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1161
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1162
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1163
   *                                        => "..."</p>
1164
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1165
   *                                        $normalize_whitespace</p>
1166
   *
1167
   * @return string <p>Clean UTF-8 encoded string.</p>
1168
   */
1169 56
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1170
  {
1171
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1172
    // caused connection reset problem on larger strings
1173
1174
    $regx = '/
1175
      (
1176
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1177
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1178
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1179
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1180
        ){1,100}                      # ...one or more times
1181
      )
1182
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1183
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1184 56
    /x';
1185 56
    $str = preg_replace($regx, '$1', $str);
1186
1187 56
    $str = self::replace_diamond_question_mark($str, '');
1188 56
    $str = self::remove_invisible_characters($str);
1189
1190 56
    if ($normalize_whitespace === true) {
1191 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1192 36
    }
1193
1194 56
    if ($normalize_msword === true) {
1195 15
      $str = self::normalize_msword($str);
1196 15
    }
1197
1198 56
    if ($remove_bom === true) {
1199 35
      $str = self::remove_bom($str);
1200 35
    }
1201
1202 56
    return $str;
1203
  }
1204
1205
  /**
1206
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1207
   *
1208
   * @param string $str <p>The input string.</p>
1209
   *
1210
   * @return string
1211
   */
1212 22 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1213
  {
1214 22
    $str = (string)$str;
1215
1216 22
    if (!isset($str[0])) {
1217 2
      return '';
1218
    }
1219
1220
    // fixed ISO <-> UTF-8 Errors
1221 22
    $str = self::fix_simple_utf8($str);
1222
1223
    // remove all none UTF-8 symbols
1224
    // && remove diamond question mark (�)
1225
    // && remove remove invisible characters (e.g. "\0")
1226
    // && remove BOM
1227
    // && normalize whitespace chars (but keep non-breaking-spaces)
1228 22
    $str = self::clean($str, true, true, false, true);
1229
1230 22
    return (string)$str;
1231
  }
1232
1233
  /**
1234
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1235
   *
1236
   * INFO: opposite to UTF8::string()
1237
   *
1238
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1239
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1240
   *                                    default, code points will be returned as integers.</p>
1241
   *
1242
   * @return array <p>The array of code points.</p>
1243
   */
1244 7
  public static function codepoints($arg, $u_style = false)
1245
  {
1246 7
    if (is_string($arg) === true) {
1247 7
      $arg = self::split($arg);
1248 7
    }
1249
1250 7
    $arg = array_map(
1251
        array(
1252 7
            '\\voku\\helper\\UTF8',
1253 7
            'ord',
1254 7
        ),
1255
        $arg
1256 7
    );
1257
1258 7
    if ($u_style) {
1259 1
      $arg = array_map(
1260
          array(
1261 1
              '\\voku\\helper\\UTF8',
1262 1
              'int_to_hex',
1263 1
          ),
1264
          $arg
1265 1
      );
1266 1
    }
1267
1268 7
    return $arg;
1269
  }
1270
1271
  /**
1272
   * Returns count of characters used in a string.
1273
   *
1274
   * @param string $str       <p>The input string.</p>
1275
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1276
   *
1277
   * @return array <p>An associative array of Character as keys and
1278
   *               their count as values.</p>
1279
   */
1280 7
  public static function count_chars($str, $cleanUtf8 = false)
1281
  {
1282 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1283
  }
1284
1285
  /**
1286
   * Converts a int-value into an UTF-8 character.
1287
   *
1288
   * @param mixed $int
1289
   *
1290
   * @return string
1291
   */
1292 5
  public static function decimal_to_chr($int)
1293
  {
1294 5
    if (Bootup::is_php('5.4') === true) {
1295 5
      $flags = ENT_QUOTES | ENT_HTML5;
1296 5
    } else {
1297
      $flags = ENT_QUOTES;
1298
    }
1299
1300 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1301
  }
1302
1303
  /**
1304
   * Encode a string with a new charset-encoding.
1305
   *
1306
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1307
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1308
   *
1309
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1310
   * @param string $str      <p>The input string</p>
1311
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1312
   *                         /> otherwise we auto-detect the current string-encoding</p>
1313
   *
1314
   * @return string
1315
   */
1316 13
  public static function encode($encoding, $str, $force = true)
1317
  {
1318 13
    $str = (string)$str;
1319 13
    $encoding = (string)$encoding;
1320
1321 13
    if (!isset($str[0], $encoding[0])) {
1322 5
      return $str;
1323
    }
1324
1325 13
    if ($encoding !== 'UTF-8') {
1326 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1327 2
    }
1328
1329 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1330
      self::checkForSupport();
1331
    }
1332
1333 13
    $encodingDetected = self::str_detect_encoding($str);
1334
1335
    if (
1336
        $encodingDetected !== false
1337 13
        &&
1338
        (
1339
            $force === true
1340 13
            ||
1341
            $encodingDetected !== $encoding
1342 4
        )
1343 13
    ) {
1344
1345
      if (
1346
          $encoding === 'UTF-8'
1347 13
          &&
1348
          (
1349
              $force === true
1350 13
              || $encodingDetected === 'UTF-8'
1351 3
              || $encodingDetected === 'WINDOWS-1252'
1352 3
              || $encodingDetected === 'ISO-8859-1'
1353 3
          )
1354 13
      ) {
1355 12
        return self::to_utf8($str);
1356
      }
1357
1358
      if (
1359
          $encoding === 'ISO-8859-1'
1360 4
          &&
1361
          (
1362
              $force === true
1363 2
              || $encodingDetected === 'ISO-8859-1'
1364 1
              || $encodingDetected === 'UTF-8'
1365 1
          )
1366 4
      ) {
1367 2
        return self::to_iso8859($str);
1368
      }
1369
1370
      if (
1371
          $encoding !== 'UTF-8'
1372 3
          &&
1373
          $encoding !== 'WINDOWS-1252'
1374 3
          &&
1375 1
          self::$SUPPORT['mbstring'] === false
1376 3
      ) {
1377
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1378
      }
1379
1380 3
      $strEncoded = \mb_convert_encoding(
1381 3
          $str,
1382 3
          $encoding,
1383
          $encodingDetected
1384 3
      );
1385
1386 3
      if ($strEncoded) {
1387 3
        return $strEncoded;
1388
      }
1389
    }
1390
1391 2
    return $str;
1392
  }
1393
1394
  /**
1395
   * Reads entire file into a string.
1396
   *
1397
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1398
   *
1399
   * @link http://php.net/manual/en/function.file-get-contents.php
1400
   *
1401
   * @param string        $filename      <p>
1402
   *                                     Name of the file to read.
1403
   *                                     </p>
1404
   * @param int|false     $flags         [optional] <p>
1405
   *                                     Prior to PHP 6, this parameter is called
1406
   *                                     use_include_path and is a bool.
1407
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1408
   *                                     to trigger include path
1409
   *                                     search.
1410
   *                                     </p>
1411
   *                                     <p>
1412
   *                                     The value of flags can be any combination of
1413
   *                                     the following flags (with some restrictions), joined with the
1414
   *                                     binary OR (|)
1415
   *                                     operator.
1416
   *                                     </p>
1417
   *                                     <p>
1418
   *                                     <table>
1419
   *                                     Available flags
1420
   *                                     <tr valign="top">
1421
   *                                     <td>Flag</td>
1422
   *                                     <td>Description</td>
1423
   *                                     </tr>
1424
   *                                     <tr valign="top">
1425
   *                                     <td>
1426
   *                                     FILE_USE_INCLUDE_PATH
1427
   *                                     </td>
1428
   *                                     <td>
1429
   *                                     Search for filename in the include directory.
1430
   *                                     See include_path for more
1431
   *                                     information.
1432
   *                                     </td>
1433
   *                                     </tr>
1434
   *                                     <tr valign="top">
1435
   *                                     <td>
1436
   *                                     FILE_TEXT
1437
   *                                     </td>
1438
   *                                     <td>
1439
   *                                     As of PHP 6, the default encoding of the read
1440
   *                                     data is UTF-8. You can specify a different encoding by creating a
1441
   *                                     custom context or by changing the default using
1442
   *                                     stream_default_encoding. This flag cannot be
1443
   *                                     used with FILE_BINARY.
1444
   *                                     </td>
1445
   *                                     </tr>
1446
   *                                     <tr valign="top">
1447
   *                                     <td>
1448
   *                                     FILE_BINARY
1449
   *                                     </td>
1450
   *                                     <td>
1451
   *                                     With this flag, the file is read in binary mode. This is the default
1452
   *                                     setting and cannot be used with FILE_TEXT.
1453
   *                                     </td>
1454
   *                                     </tr>
1455
   *                                     </table>
1456
   *                                     </p>
1457
   * @param resource|null $context       [optional] <p>
1458
   *                                     A valid context resource created with
1459
   *                                     stream_context_create. If you don't need to use a
1460
   *                                     custom context, you can skip this parameter by &null;.
1461
   *                                     </p>
1462
   * @param int|null $offset             [optional] <p>
1463
   *                                     The offset where the reading starts.
1464
   *                                     </p>
1465
   * @param int|null $maxLength          [optional] <p>
1466
   *                                     Maximum length of data read. The default is to read until end
1467
   *                                     of file is reached.
1468
   *                                     </p>
1469
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1470
   *
1471
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1472
   *                                     or pdf, because they used non default utf-8 chars</p>
1473
   *
1474
   * @return string <p>The function returns the read data or false on failure.</p>
1475
   */
1476 4
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1477
  {
1478
    // init
1479 4
    $timeout = (int)$timeout;
1480 4
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1481
1482 4
    if ($timeout && $context === null) {
1483 3
      $context = stream_context_create(
1484
          array(
1485
              'http' =>
1486
                  array(
1487 3
                      'timeout' => $timeout,
1488 3
                  ),
1489
          )
1490 3
      );
1491 3
    }
1492
1493 4
    if (!$flags) {
1494 4
      $flags = false;
1495 4
    }
1496
1497 4
    if ($offset === null) {
1498 4
      $offset = 0;
1499 4
    }
1500
1501 4
    if (is_int($maxLength) === true) {
1502 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1503 1
    } else {
1504 4
      $data = file_get_contents($filename, $flags, $context, $offset);
1505
    }
1506
1507
    // return false on error
1508 4
    if ($data === false) {
1509 1
      return false;
1510
    }
1511
1512 3
    if ($convertToUtf8 === true) {
1513 3
      $data = self::encode('UTF-8', $data, false);
1514 3
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1515 3
    }
1516
1517 3
    return $data;
1518
  }
1519
1520
  /**
1521
   * Checks if a file starts with BOM (Byte Order Mark) character.
1522
   *
1523
   * @param string $file_path <p>Path to a valid file.</p>
1524
   *
1525
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1526
   */
1527 1
  public static function file_has_bom($file_path)
1528
  {
1529 1
    return self::string_has_bom(file_get_contents($file_path));
1530
  }
1531
1532
  /**
1533
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1534
   *
1535
   * @param mixed  $var
1536
   * @param int    $normalization_form
1537
   * @param string $leading_combining
1538
   *
1539
   * @return mixed
1540
   */
1541 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1542
  {
1543 9
    switch (gettype($var)) {
1544 9 View Code Duplication
      case 'array':
1545 3
        foreach ($var as $k => $v) {
1546
          /** @noinspection AlterInForeachInspection */
1547 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1548 3
        }
1549 3
        break;
1550 9 View Code Duplication
      case 'object':
1551 2
        foreach ($var as $k => $v) {
1552 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1553 2
        }
1554 2
        break;
1555 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1556
1557 8
        if (false !== strpos($var, "\r")) {
1558
          // Workaround https://bugs.php.net/65732
1559 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1560 2
        }
1561
1562 8
        if (self::is_ascii($var) === false) {
1563
          /** @noinspection PhpUndefinedClassInspection */
1564 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1565 6
            $n = '-';
1566 6
          } else {
1567
            /** @noinspection PhpUndefinedClassInspection */
1568 6
            $n = \Normalizer::normalize($var, $normalization_form);
1569
1570 6
            if (isset($n[0])) {
1571 3
              $var = $n;
1572 3
            } else {
1573 5
              $var = self::encode('UTF-8', $var);
1574
            }
1575
          }
1576
1577
          if (
1578 8
              $var[0] >= "\x80"
1579 8
              &&
1580 6
              isset($n[0], $leading_combining[0])
1581 8
              &&
1582 5
              preg_match('/^\p{Mn}/u', $var)
1583 8
          ) {
1584
            // Prevent leading combining chars
1585
            // for NFC-safe concatenations.
1586 2
            $var = $leading_combining . $var;
1587 2
          }
1588 8
        }
1589
1590 8
        break;
1591 9
    }
1592
1593 9
    return $var;
1594
  }
1595
1596
  /**
1597
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1598
   *
1599
   * Gets a specific external variable by name and optionally filters it
1600
   *
1601
   * @link  http://php.net/manual/en/function.filter-input.php
1602
   *
1603
   * @param int    $type          <p>
1604
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1605
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1606
   *                              <b>INPUT_ENV</b>.
1607
   *                              </p>
1608
   * @param string $variable_name <p>
1609
   *                              Name of a variable to get.
1610
   *                              </p>
1611
   * @param int    $filter        [optional] <p>
1612
   *                              The ID of the filter to apply. The
1613
   *                              manual page lists the available filters.
1614
   *                              </p>
1615
   * @param mixed  $options       [optional] <p>
1616
   *                              Associative array of options or bitwise disjunction of flags. If filter
1617
   *                              accepts options, flags can be provided in "flags" field of array.
1618
   *                              </p>
1619
   *
1620
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1621
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1622
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1623
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1624
   * @since 5.2.0
1625
   */
1626 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1627
  {
1628
    if (4 > func_num_args()) {
1629
      $var = filter_input($type, $variable_name, $filter);
1630
    } else {
1631
      $var = filter_input($type, $variable_name, $filter, $options);
1632
    }
1633
1634
    return self::filter($var);
1635
  }
1636
1637
  /**
1638
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1639
   *
1640
   * Gets external variables and optionally filters them
1641
   *
1642
   * @link  http://php.net/manual/en/function.filter-input-array.php
1643
   *
1644
   * @param int   $type       <p>
1645
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1646
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1647
   *                          <b>INPUT_ENV</b>.
1648
   *                          </p>
1649
   * @param mixed $definition [optional] <p>
1650
   *                          An array defining the arguments. A valid key is a string
1651
   *                          containing a variable name and a valid value is either a filter type, or an array
1652
   *                          optionally specifying the filter, flags and options. If the value is an
1653
   *                          array, valid keys are filter which specifies the
1654
   *                          filter type,
1655
   *                          flags which specifies any flags that apply to the
1656
   *                          filter, and options which specifies any options that
1657
   *                          apply to the filter. See the example below for a better understanding.
1658
   *                          </p>
1659
   *                          <p>
1660
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1661
   *                          input array are filtered by this filter.
1662
   *                          </p>
1663
   * @param bool  $add_empty  [optional] <p>
1664
   *                          Add missing keys as <b>NULL</b> to the return value.
1665
   *                          </p>
1666
   *
1667
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1668
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1669
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1670
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1671
   * fails.
1672
   * @since 5.2.0
1673
   */
1674 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1675
  {
1676
    if (2 > func_num_args()) {
1677
      $a = filter_input_array($type);
1678
    } else {
1679
      $a = filter_input_array($type, $definition, $add_empty);
1680
    }
1681
1682
    return self::filter($a);
1683
  }
1684
1685
  /**
1686
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1687
   *
1688
   * Filters a variable with a specified filter
1689
   *
1690
   * @link  http://php.net/manual/en/function.filter-var.php
1691
   *
1692
   * @param mixed $variable <p>
1693
   *                        Value to filter.
1694
   *                        </p>
1695
   * @param int   $filter   [optional] <p>
1696
   *                        The ID of the filter to apply. The
1697
   *                        manual page lists the available filters.
1698
   *                        </p>
1699
   * @param mixed $options  [optional] <p>
1700
   *                        Associative array of options or bitwise disjunction of flags. If filter
1701
   *                        accepts options, flags can be provided in "flags" field of array. For
1702
   *                        the "callback" filter, callable type should be passed. The
1703
   *                        callback must accept one argument, the value to be filtered, and return
1704
   *                        the value after filtering/sanitizing it.
1705
   *                        </p>
1706
   *                        <p>
1707
   *                        <code>
1708
   *                        // for filters that accept options, use this format
1709
   *                        $options = array(
1710
   *                        'options' => array(
1711
   *                        'default' => 3, // value to return if the filter fails
1712
   *                        // other options here
1713
   *                        'min_range' => 0
1714
   *                        ),
1715
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1716
   *                        );
1717
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1718
   *                        // for filter that only accept flags, you can pass them directly
1719
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1720
   *                        // for filter that only accept flags, you can also pass as an array
1721
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1722
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1723
   *                        // callback validate filter
1724
   *                        function foo($value)
1725
   *                        {
1726
   *                        // Expected format: Surname, GivenNames
1727
   *                        if (strpos($value, ", ") === false) return false;
1728
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1729
   *                        $empty = (empty($surname) || empty($givennames));
1730
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1731
   *                        if ($empty || $notstrings) {
1732
   *                        return false;
1733
   *                        } else {
1734
   *                        return $value;
1735
   *                        }
1736
   *                        }
1737
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1738
   *                        </code>
1739
   *                        </p>
1740
   *
1741
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1742
   * @since 5.2.0
1743
   */
1744 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1745
  {
1746 1
    if (3 > func_num_args()) {
1747 1
      $variable = filter_var($variable, $filter);
1748 1
    } else {
1749 1
      $variable = filter_var($variable, $filter, $options);
1750
    }
1751
1752 1
    return self::filter($variable);
1753
  }
1754
1755
  /**
1756
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1757
   *
1758
   * Gets multiple variables and optionally filters them
1759
   *
1760
   * @link  http://php.net/manual/en/function.filter-var-array.php
1761
   *
1762
   * @param array $data       <p>
1763
   *                          An array with string keys containing the data to filter.
1764
   *                          </p>
1765
   * @param mixed $definition [optional] <p>
1766
   *                          An array defining the arguments. A valid key is a string
1767
   *                          containing a variable name and a valid value is either a
1768
   *                          filter type, or an
1769
   *                          array optionally specifying the filter, flags and options.
1770
   *                          If the value is an array, valid keys are filter
1771
   *                          which specifies the filter type,
1772
   *                          flags which specifies any flags that apply to the
1773
   *                          filter, and options which specifies any options that
1774
   *                          apply to the filter. See the example below for a better understanding.
1775
   *                          </p>
1776
   *                          <p>
1777
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1778
   *                          input array are filtered by this filter.
1779
   *                          </p>
1780
   * @param bool  $add_empty  [optional] <p>
1781
   *                          Add missing keys as <b>NULL</b> to the return value.
1782
   *                          </p>
1783
   *
1784
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1785
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1786
   * the variable is not set.
1787
   * @since 5.2.0
1788
   */
1789 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1790
  {
1791 1
    if (2 > func_num_args()) {
1792 1
      $a = filter_var_array($data);
1793 1
    } else {
1794 1
      $a = filter_var_array($data, $definition, $add_empty);
1795
    }
1796
1797 1
    return self::filter($a);
1798
  }
1799
1800
  /**
1801
   * Check if the number of unicode characters are not more than the specified integer.
1802
   *
1803
   * @param string $str      The original string to be checked.
1804
   * @param int    $box_size The size in number of chars to be checked against string.
1805
   *
1806
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1807
   */
1808 1
  public static function fits_inside($str, $box_size)
1809
  {
1810 1
    return (self::strlen($str) <= $box_size);
1811
  }
1812
1813
  /**
1814
   * Try to fix simple broken UTF-8 strings.
1815
   *
1816
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1817
   *
1818
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1819
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1820
   * See: http://en.wikipedia.org/wiki/Windows-1252
1821
   *
1822
   * @param string $str <p>The input string</p>
1823
   *
1824
   * @return string
1825
   */
1826 26 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1827
  {
1828
    // init
1829 26
    $str = (string)$str;
1830
1831 26
    if (!isset($str[0])) {
1832 2
      return '';
1833
    }
1834
1835 26
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1836 26
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1837
1838 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1839 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1840 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1841 1
    }
1842
1843 26
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1844
  }
1845
1846
  /**
1847
   * Fix a double (or multiple) encoded UTF8 string.
1848
   *
1849
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1850
   *
1851
   * @return string|string[] <p>Will return the fixed input-"array" or
1852
   *                         the fixed input-"string".</p>
1853
   */
1854 1
  public static function fix_utf8($str)
1855
  {
1856 1
    if (is_array($str) === true) {
1857
1858
      /** @noinspection ForeachSourceInspection */
1859 1
      foreach ($str as $k => $v) {
1860
        /** @noinspection AlterInForeachInspection */
1861
        /** @noinspection OffsetOperationsInspection */
1862 1
        $str[$k] = self::fix_utf8($v);
1863 1
      }
1864
1865 1
      return $str;
1866
    }
1867
1868 1
    $last = '';
1869 1
    while ($last !== $str) {
1870 1
      $last = $str;
1871 1
      $str = self::to_utf8(
1872 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1871 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1873 1
      );
1874 1
    }
1875
1876 1
    return $str;
1877
  }
1878
1879
  /**
1880
   * Get character of a specific character.
1881
   *
1882
   * @param string $char
1883
   *
1884
   * @return string <p>'RTL' or 'LTR'</p>
1885
   */
1886 1
  public static function getCharDirection($char)
1887
  {
1888 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1889
      self::checkForSupport();
1890
    }
1891
1892 1
    if (self::$SUPPORT['intlChar'] === true) {
1893
      $tmpReturn = \IntlChar::charDirection($char);
1894
1895
      // from "IntlChar"-Class
1896
      $charDirection = array(
1897
          'RTL' => array(1, 13, 14, 15, 21),
1898
          'LTR' => array(0, 11, 12, 20),
1899
      );
1900
1901
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1902
        return 'LTR';
1903
      }
1904
1905
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1906
        return 'RTL';
1907
      }
1908
    }
1909
1910 1
    $c = static::chr_to_decimal($char);
1911
1912 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1913 1
      return 'LTR';
1914
    }
1915
1916 1
    if (0x85e >= $c) {
1917
1918 1
      if (0x5be === $c ||
1919 1
          0x5c0 === $c ||
1920 1
          0x5c3 === $c ||
1921 1
          0x5c6 === $c ||
1922 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1923 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1924 1
          0x608 === $c ||
1925 1
          0x60b === $c ||
1926 1
          0x60d === $c ||
1927 1
          0x61b === $c ||
1928 1
          (0x61e <= $c && 0x64a >= $c) ||
1929 1
          (0x66d <= $c && 0x66f >= $c) ||
1930 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1931 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1932 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1933 1
          (0x6fa <= $c && 0x70d >= $c) ||
1934 1
          0x710 === $c ||
1935 1
          (0x712 <= $c && 0x72f >= $c) ||
1936 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1937 1
          0x7b1 === $c ||
1938 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1939 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1940 1
          0x7fa === $c ||
1941 1
          (0x800 <= $c && 0x815 >= $c) ||
1942 1
          0x81a === $c ||
1943 1
          0x824 === $c ||
1944 1
          0x828 === $c ||
1945 1
          (0x830 <= $c && 0x83e >= $c) ||
1946 1
          (0x840 <= $c && 0x858 >= $c) ||
1947
          0x85e === $c
1948 1
      ) {
1949 1
        return 'RTL';
1950
      }
1951
1952 1
    } elseif (0x200f === $c) {
1953
1954
      return 'RTL';
1955
1956 1
    } elseif (0xfb1d <= $c) {
1957
1958 1
      if (0xfb1d === $c ||
1959 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1960 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1961 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1962 1
          0xfb3e === $c ||
1963 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1964 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1965 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1966 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1967 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1968 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1969 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1970 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1971 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1972 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1973 1
          0x10808 === $c ||
1974 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1975 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1976 1
          0x1083c === $c ||
1977 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1978 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1979 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1980 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1981 1
          0x1093f === $c ||
1982 1
          0x10a00 === $c ||
1983 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1984 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1985 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1986 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1987 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1988 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1989 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1990 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1991 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1992
          (0x10b78 <= $c && 0x10b7f >= $c)
1993 1
      ) {
1994 1
        return 'RTL';
1995
      }
1996
    }
1997
1998 1
    return 'LTR';
1999
  }
2000
2001
  /**
2002
   * get data from "/data/*.ser"
2003
   *
2004
   * @param string $file
2005
   *
2006
   * @return bool|string|array|int <p>Will return false on error.</p>
2007
   */
2008 4
  private static function getData($file)
2009
  {
2010 4
    $file = __DIR__ . '/data/' . $file . '.php';
2011 4
    if (file_exists($file)) {
2012
      /** @noinspection PhpIncludeInspection */
2013 4
      return require $file;
2014
    }
2015
2016 1
    return false;
2017
  }
2018
2019
  /**
2020
   * Check for php-support.
2021
   *
2022
   * @param string|null $key
2023
   *
2024
   * @return mixed <p>Return the full support-"array", if $key === null<br />
2025
   *               return bool-value, if $key is used and available<br />
2026
   *               otherwise return null</p>
2027
   */
2028 7
  public static function getSupportInfo($key = null)
2029
  {
2030 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2031
      self::checkForSupport();
2032
    }
2033
2034 7
    if ($key === null) {
2035 2
      return self::$SUPPORT;
2036
    }
2037
2038 5
    if (!isset(self::$SUPPORT[$key])) {
2039
      return null;
2040
    }
2041
2042 5
    return self::$SUPPORT[$key];
2043
  }
2044
2045
  /**
2046
   * alias for "UTF8::string_has_bom()"
2047
   *
2048
   * @see UTF8::string_has_bom()
2049
   *
2050
   * @param string $str
2051
   *
2052
   * @return bool
2053
   *
2054
   * @deprecated
2055
   */
2056
  public static function hasBom($str)
2057
  {
2058
    return self::string_has_bom($str);
2059
  }
2060
2061
  /**
2062
   * Converts a hexadecimal-value into an UTF-8 character.
2063
   *
2064
   * @param string $hexdec <p>The hexadecimal value.</p>
2065
   *
2066
   * @return string|false <p>One single UTF-8 character.</p>
2067
   */
2068 2
  public static function hex_to_chr($hexdec)
2069
  {
2070 2
    return self::decimal_to_chr(hexdec($hexdec));
2071
  }
2072
2073
  /**
2074
   * Converts hexadecimal U+xxxx code point representation to integer.
2075
   *
2076
   * INFO: opposite to UTF8::int_to_hex()
2077
   *
2078
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2079
   *
2080
   * @return int|false <p>The code point, or false on failure.</p>
2081
   */
2082 1
  public static function hex_to_int($hexDec)
2083
  {
2084 1
    $hexDec = (string)$hexDec;
2085
2086 1
    if (!isset($hexDec[0])) {
2087 1
      return false;
2088
    }
2089
2090 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2091 1
      return intval($match[1], 16);
2092
    }
2093
2094 1
    return false;
2095
  }
2096
2097
  /**
2098
   * alias for "UTF8::html_entity_decode()"
2099
   *
2100
   * @see UTF8::html_entity_decode()
2101
   *
2102
   * @param string $str
2103
   * @param int    $flags
2104
   * @param string $encoding
2105
   *
2106
   * @return string
2107
   */
2108 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2109
  {
2110 1
    return self::html_entity_decode($str, $flags, $encoding);
2111
  }
2112
2113
  /**
2114
   * Converts a UTF-8 string to a series of HTML numbered entities.
2115
   *
2116
   * INFO: opposite to UTF8::html_decode()
2117
   *
2118
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2119
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2120
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2121
   *
2122
   * @return string <p>HTML numbered entities.</p>
2123
   */
2124 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2125
  {
2126
    // init
2127 2
    $str = (string)$str;
2128
2129 2
    if (!isset($str[0])) {
2130 1
      return '';
2131
    }
2132
2133 2
    if ($encoding !== 'UTF-8') {
2134 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2135 1
    }
2136
2137
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2138 2
    if (function_exists('mb_encode_numericentity')) {
2139
2140 2
      $startCode = 0x00;
2141 2
      if ($keepAsciiChars === true) {
2142 1
        $startCode = 0x80;
2143 1
      }
2144
2145 2
      return mb_encode_numericentity(
2146 2
          $str,
2147 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2148
          $encoding
2149 2
      );
2150
    }
2151
2152
    return implode(
2153
        '',
2154
        array_map(
2155
            function ($data) use ($keepAsciiChars, $encoding) {
2156
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2157
            },
2158
            self::split($str)
2159
        )
2160
    );
2161
  }
2162
2163
  /**
2164
   * UTF-8 version of html_entity_decode()
2165
   *
2166
   * The reason we are not using html_entity_decode() by itself is because
2167
   * while it is not technically correct to leave out the semicolon
2168
   * at the end of an entity most browsers will still interpret the entity
2169
   * correctly. html_entity_decode() does not convert entities without
2170
   * semicolons, so we are left with our own little solution here. Bummer.
2171
   *
2172
   * Convert all HTML entities to their applicable characters
2173
   *
2174
   * INFO: opposite to UTF8::html_encode()
2175
   *
2176
   * @link http://php.net/manual/en/function.html-entity-decode.php
2177
   *
2178
   * @param string $str      <p>
2179
   *                         The input string.
2180
   *                         </p>
2181
   * @param int    $flags    [optional] <p>
2182
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2183
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2184
   *                         <table>
2185
   *                         Available <i>flags</i> constants
2186
   *                         <tr valign="top">
2187
   *                         <td>Constant Name</td>
2188
   *                         <td>Description</td>
2189
   *                         </tr>
2190
   *                         <tr valign="top">
2191
   *                         <td><b>ENT_COMPAT</b></td>
2192
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2193
   *                         </tr>
2194
   *                         <tr valign="top">
2195
   *                         <td><b>ENT_QUOTES</b></td>
2196
   *                         <td>Will convert both double and single quotes.</td>
2197
   *                         </tr>
2198
   *                         <tr valign="top">
2199
   *                         <td><b>ENT_NOQUOTES</b></td>
2200
   *                         <td>Will leave both double and single quotes unconverted.</td>
2201
   *                         </tr>
2202
   *                         <tr valign="top">
2203
   *                         <td><b>ENT_HTML401</b></td>
2204
   *                         <td>
2205
   *                         Handle code as HTML 4.01.
2206
   *                         </td>
2207
   *                         </tr>
2208
   *                         <tr valign="top">
2209
   *                         <td><b>ENT_XML1</b></td>
2210
   *                         <td>
2211
   *                         Handle code as XML 1.
2212
   *                         </td>
2213
   *                         </tr>
2214
   *                         <tr valign="top">
2215
   *                         <td><b>ENT_XHTML</b></td>
2216
   *                         <td>
2217
   *                         Handle code as XHTML.
2218
   *                         </td>
2219
   *                         </tr>
2220
   *                         <tr valign="top">
2221
   *                         <td><b>ENT_HTML5</b></td>
2222
   *                         <td>
2223
   *                         Handle code as HTML 5.
2224
   *                         </td>
2225
   *                         </tr>
2226
   *                         </table>
2227
   *                         </p>
2228
   * @param string $encoding [optional] <p>Encoding to use.</p>
2229
   *
2230
   * @return string <p>The decoded string.</p>
2231
   */
2232 16
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2233
  {
2234
    // init
2235 16
    $str = (string)$str;
2236
2237 16
    if (!isset($str[0])) {
2238 6
      return '';
2239
    }
2240
2241 16
    if (!isset($str[3])) { // examples: &; || &x;
2242 9
      return $str;
2243
    }
2244
2245
    if (
2246 16
        strpos($str, '&') === false
2247 16
        ||
2248
        (
2249 16
            strpos($str, '&#') === false
2250 16
            &&
2251 10
            strpos($str, ';') === false
2252 10
        )
2253 16
    ) {
2254 9
      return $str;
2255
    }
2256
2257 16
    if ($encoding !== 'UTF-8') {
2258 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2259 2
    }
2260
2261 16
    if ($flags === null) {
2262 5
      if (Bootup::is_php('5.4') === true) {
2263 5
        $flags = ENT_QUOTES | ENT_HTML5;
2264 5
      } else {
2265
        $flags = ENT_QUOTES;
2266
      }
2267 5
    }
2268
2269
    do {
2270 16
      $str_compare = $str;
2271
2272 16
      $str = preg_replace_callback(
2273 16
          "/&#\d{2,6};/",
2274
          function ($matches) use ($encoding) {
2275 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2276
2277 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2278 13
              return $returnTmp;
2279
            }
2280
2281 7
            return $matches[0];
2282 16
          },
2283
          $str
2284 16
      );
2285
2286
      // decode numeric & UTF16 two byte entities
2287 16
      $str = html_entity_decode(
2288 16
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2289 16
          $flags,
2290
          $encoding
2291 16
      );
2292
2293 16
    } while ($str_compare !== $str);
2294
2295 16
    return $str;
2296
  }
2297
2298
  /**
2299
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2300
   *
2301
   * @link http://php.net/manual/en/function.htmlentities.php
2302
   *
2303
   * @param string $str           <p>
2304
   *                              The input string.
2305
   *                              </p>
2306
   * @param int    $flags         [optional] <p>
2307
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2308
   *                              invalid code unit sequences and the used document type. The default is
2309
   *                              ENT_COMPAT | ENT_HTML401.
2310
   *                              <table>
2311
   *                              Available <i>flags</i> constants
2312
   *                              <tr valign="top">
2313
   *                              <td>Constant Name</td>
2314
   *                              <td>Description</td>
2315
   *                              </tr>
2316
   *                              <tr valign="top">
2317
   *                              <td><b>ENT_COMPAT</b></td>
2318
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2319
   *                              </tr>
2320
   *                              <tr valign="top">
2321
   *                              <td><b>ENT_QUOTES</b></td>
2322
   *                              <td>Will convert both double and single quotes.</td>
2323
   *                              </tr>
2324
   *                              <tr valign="top">
2325
   *                              <td><b>ENT_NOQUOTES</b></td>
2326
   *                              <td>Will leave both double and single quotes unconverted.</td>
2327
   *                              </tr>
2328
   *                              <tr valign="top">
2329
   *                              <td><b>ENT_IGNORE</b></td>
2330
   *                              <td>
2331
   *                              Silently discard invalid code unit sequences instead of returning
2332
   *                              an empty string. Using this flag is discouraged as it
2333
   *                              may have security implications.
2334
   *                              </td>
2335
   *                              </tr>
2336
   *                              <tr valign="top">
2337
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2338
   *                              <td>
2339
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2340
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2341
   *                              </td>
2342
   *                              </tr>
2343
   *                              <tr valign="top">
2344
   *                              <td><b>ENT_DISALLOWED</b></td>
2345
   *                              <td>
2346
   *                              Replace invalid code points for the given document type with a
2347
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2348
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2349
   *                              instance, to ensure the well-formedness of XML documents with
2350
   *                              embedded external content.
2351
   *                              </td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_HTML401</b></td>
2355
   *                              <td>
2356
   *                              Handle code as HTML 4.01.
2357
   *                              </td>
2358
   *                              </tr>
2359
   *                              <tr valign="top">
2360
   *                              <td><b>ENT_XML1</b></td>
2361
   *                              <td>
2362
   *                              Handle code as XML 1.
2363
   *                              </td>
2364
   *                              </tr>
2365
   *                              <tr valign="top">
2366
   *                              <td><b>ENT_XHTML</b></td>
2367
   *                              <td>
2368
   *                              Handle code as XHTML.
2369
   *                              </td>
2370
   *                              </tr>
2371
   *                              <tr valign="top">
2372
   *                              <td><b>ENT_HTML5</b></td>
2373
   *                              <td>
2374
   *                              Handle code as HTML 5.
2375
   *                              </td>
2376
   *                              </tr>
2377
   *                              </table>
2378
   *                              </p>
2379
   * @param string $encoding      [optional] <p>
2380
   *                              Like <b>htmlspecialchars</b>,
2381
   *                              <b>htmlentities</b> takes an optional third argument
2382
   *                              <i>encoding</i> which defines encoding used in
2383
   *                              conversion.
2384
   *                              Although this argument is technically optional, you are highly
2385
   *                              encouraged to specify the correct value for your code.
2386
   *                              </p>
2387
   * @param bool   $double_encode [optional] <p>
2388
   *                              When <i>double_encode</i> is turned off PHP will not
2389
   *                              encode existing html entities. The default is to convert everything.
2390
   *                              </p>
2391
   *
2392
   *
2393
   * @return string the encoded string.
2394
   * </p>
2395
   * <p>
2396
   * If the input <i>string</i> contains an invalid code unit
2397
   * sequence within the given <i>encoding</i> an empty string
2398
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2399
   * <b>ENT_SUBSTITUTE</b> flags are set.
2400
   */
2401 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2402
  {
2403 2
    if ($encoding !== 'UTF-8') {
2404 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2405 1
    }
2406
2407 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2408
2409 2
    if ($encoding !== 'UTF-8') {
2410 1
      return $str;
2411
    }
2412
2413 2
    $byteLengths = self::chr_size_list($str);
2414 2
    $search = array();
2415 2
    $replacements = array();
2416 2
    foreach ($byteLengths as $counter => $byteLength) {
2417 2
      if ($byteLength >= 3) {
2418 1
        $char = self::access($str, $counter);
2419
2420 1
        if (!isset($replacements[$char])) {
2421 1
          $search[$char] = $char;
2422 1
          $replacements[$char] = self::html_encode($char);
2423 1
        }
2424 1
      }
2425 2
    }
2426
2427 2
    return str_replace($search, $replacements, $str);
2428
  }
2429
2430
  /**
2431
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2432
   *
2433
   * INFO: Take a look at "UTF8::htmlentities()"
2434
   *
2435
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2436
   *
2437
   * @param string $str           <p>
2438
   *                              The string being converted.
2439
   *                              </p>
2440
   * @param int    $flags         [optional] <p>
2441
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2442
   *                              invalid code unit sequences and the used document type. The default is
2443
   *                              ENT_COMPAT | ENT_HTML401.
2444
   *                              <table>
2445
   *                              Available <i>flags</i> constants
2446
   *                              <tr valign="top">
2447
   *                              <td>Constant Name</td>
2448
   *                              <td>Description</td>
2449
   *                              </tr>
2450
   *                              <tr valign="top">
2451
   *                              <td><b>ENT_COMPAT</b></td>
2452
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2453
   *                              </tr>
2454
   *                              <tr valign="top">
2455
   *                              <td><b>ENT_QUOTES</b></td>
2456
   *                              <td>Will convert both double and single quotes.</td>
2457
   *                              </tr>
2458
   *                              <tr valign="top">
2459
   *                              <td><b>ENT_NOQUOTES</b></td>
2460
   *                              <td>Will leave both double and single quotes unconverted.</td>
2461
   *                              </tr>
2462
   *                              <tr valign="top">
2463
   *                              <td><b>ENT_IGNORE</b></td>
2464
   *                              <td>
2465
   *                              Silently discard invalid code unit sequences instead of returning
2466
   *                              an empty string. Using this flag is discouraged as it
2467
   *                              may have security implications.
2468
   *                              </td>
2469
   *                              </tr>
2470
   *                              <tr valign="top">
2471
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2472
   *                              <td>
2473
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2474
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2475
   *                              </td>
2476
   *                              </tr>
2477
   *                              <tr valign="top">
2478
   *                              <td><b>ENT_DISALLOWED</b></td>
2479
   *                              <td>
2480
   *                              Replace invalid code points for the given document type with a
2481
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2482
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2483
   *                              instance, to ensure the well-formedness of XML documents with
2484
   *                              embedded external content.
2485
   *                              </td>
2486
   *                              </tr>
2487
   *                              <tr valign="top">
2488
   *                              <td><b>ENT_HTML401</b></td>
2489
   *                              <td>
2490
   *                              Handle code as HTML 4.01.
2491
   *                              </td>
2492
   *                              </tr>
2493
   *                              <tr valign="top">
2494
   *                              <td><b>ENT_XML1</b></td>
2495
   *                              <td>
2496
   *                              Handle code as XML 1.
2497
   *                              </td>
2498
   *                              </tr>
2499
   *                              <tr valign="top">
2500
   *                              <td><b>ENT_XHTML</b></td>
2501
   *                              <td>
2502
   *                              Handle code as XHTML.
2503
   *                              </td>
2504
   *                              </tr>
2505
   *                              <tr valign="top">
2506
   *                              <td><b>ENT_HTML5</b></td>
2507
   *                              <td>
2508
   *                              Handle code as HTML 5.
2509
   *                              </td>
2510
   *                              </tr>
2511
   *                              </table>
2512
   *                              </p>
2513
   * @param string $encoding      [optional] <p>
2514
   *                              Defines encoding used in conversion.
2515
   *                              </p>
2516
   *                              <p>
2517
   *                              For the purposes of this function, the encodings
2518
   *                              ISO-8859-1, ISO-8859-15,
2519
   *                              UTF-8, cp866,
2520
   *                              cp1251, cp1252, and
2521
   *                              KOI8-R are effectively equivalent, provided the
2522
   *                              <i>string</i> itself is valid for the encoding, as
2523
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2524
   *                              the same positions in all of these encodings.
2525
   *                              </p>
2526
   * @param bool   $double_encode [optional] <p>
2527
   *                              When <i>double_encode</i> is turned off PHP will not
2528
   *                              encode existing html entities, the default is to convert everything.
2529
   *                              </p>
2530
   *
2531
   * @return string The converted string.
2532
   * </p>
2533
   * <p>
2534
   * If the input <i>string</i> contains an invalid code unit
2535
   * sequence within the given <i>encoding</i> an empty string
2536
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2537
   * <b>ENT_SUBSTITUTE</b> flags are set.
2538
   */
2539 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2540
  {
2541 1
    if ($encoding !== 'UTF-8') {
2542 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2543 1
    }
2544
2545 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2546
  }
2547
2548
  /**
2549
   * Checks whether iconv is available on the server.
2550
   *
2551
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2552
   */
2553 1
  public static function iconv_loaded()
2554
  {
2555 1
    $return = extension_loaded('iconv') ? true : false;
2556
2557
    // INFO: "default_charset" is already set by the "Bootup"-class
2558
2559 1
    if (Bootup::is_php('5.6') === false) {
2560
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2561 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2562 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2563 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2564 1
    }
2565
2566 1
    return $return;
2567
  }
2568
2569
  /**
2570
   * alias for "UTF8::decimal_to_chr()"
2571
   *
2572
   * @see UTF8::decimal_to_chr()
2573
   *
2574
   * @param mixed $int
2575
   *
2576
   * @return string
2577
   */
2578 2
  public static function int_to_chr($int)
2579
  {
2580 2
    return self::decimal_to_chr($int);
2581
  }
2582
2583
  /**
2584
   * Converts Integer to hexadecimal U+xxxx code point representation.
2585
   *
2586
   * INFO: opposite to UTF8::hex_to_int()
2587
   *
2588
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2589
   * @param string $pfix [optional]
2590
   *
2591
   * @return string <p>The code point, or empty string on failure.</p>
2592
   */
2593 3
  public static function int_to_hex($int, $pfix = 'U+')
2594
  {
2595 3
    if ((int)$int === $int) {
2596 3
      $hex = dechex($int);
2597
2598 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2599
2600 3
      return $pfix . $hex;
2601
    }
2602
2603 1
    return '';
2604
  }
2605
2606
  /**
2607
   * Checks whether intl-char is available on the server.
2608
   *
2609
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2610
   */
2611 1
  public static function intlChar_loaded()
2612
  {
2613
    return (
2614 1
        Bootup::is_php('7.0') === true
2615 1
        &&
2616
        class_exists('IntlChar') === true
2617 1
    );
2618
  }
2619
2620
  /**
2621
   * Checks whether intl is available on the server.
2622
   *
2623
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2624
   */
2625 4
  public static function intl_loaded()
2626
  {
2627 4
    return extension_loaded('intl') ? true : false;
2628
  }
2629
2630
  /**
2631
   * alias for "UTF8::is_ascii()"
2632
   *
2633
   * @see UTF8::is_ascii()
2634
   *
2635
   * @param string $str
2636
   *
2637
   * @return boolean
2638
   *
2639
   * @deprecated
2640
   */
2641
  public static function isAscii($str)
2642
  {
2643
    return self::is_ascii($str);
2644
  }
2645
2646
  /**
2647
   * alias for "UTF8::is_base64()"
2648
   *
2649
   * @see UTF8::is_base64()
2650
   *
2651
   * @param string $str
2652
   *
2653
   * @return bool
2654
   *
2655
   * @deprecated
2656
   */
2657
  public static function isBase64($str)
2658
  {
2659
    return self::is_base64($str);
2660
  }
2661
2662
  /**
2663
   * alias for "UTF8::is_binary()"
2664
   *
2665
   * @see UTF8::is_binary()
2666
   *
2667
   * @param string $str
2668
   *
2669
   * @return bool
2670
   *
2671
   * @deprecated
2672
   */
2673
  public static function isBinary($str)
2674
  {
2675
    return self::is_binary($str);
2676
  }
2677
2678
  /**
2679
   * alias for "UTF8::is_bom()"
2680
   *
2681
   * @see UTF8::is_bom()
2682
   *
2683
   * @param string $utf8_chr
2684
   *
2685
   * @return boolean
2686
   *
2687
   * @deprecated
2688
   */
2689
  public static function isBom($utf8_chr)
2690
  {
2691
    return self::is_bom($utf8_chr);
2692
  }
2693
2694
  /**
2695
   * alias for "UTF8::is_html()"
2696
   *
2697
   * @see UTF8::is_html()
2698
   *
2699
   * @param string $str
2700
   *
2701
   * @return boolean
2702
   *
2703
   * @deprecated
2704
   */
2705
  public static function isHtml($str)
2706
  {
2707
    return self::is_html($str);
2708
  }
2709
2710
  /**
2711
   * alias for "UTF8::is_json()"
2712
   *
2713
   * @see UTF8::is_json()
2714
   *
2715
   * @param string $str
2716
   *
2717
   * @return bool
2718
   *
2719
   * @deprecated
2720
   */
2721
  public static function isJson($str)
2722
  {
2723
    return self::is_json($str);
2724
  }
2725
2726
  /**
2727
   * alias for "UTF8::is_utf16()"
2728
   *
2729
   * @see UTF8::is_utf16()
2730
   *
2731
   * @param string $str
2732
   *
2733
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2734
   *
2735
   * @deprecated
2736
   */
2737
  public static function isUtf16($str)
2738
  {
2739
    return self::is_utf16($str);
2740
  }
2741
2742
  /**
2743
   * alias for "UTF8::is_utf32()"
2744
   *
2745
   * @see UTF8::is_utf32()
2746
   *
2747
   * @param string $str
2748
   *
2749
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2750
   *
2751
   * @deprecated
2752
   */
2753
  public static function isUtf32($str)
2754
  {
2755
    return self::is_utf32($str);
2756
  }
2757
2758
  /**
2759
   * alias for "UTF8::is_utf8()"
2760
   *
2761
   * @see UTF8::is_utf8()
2762
   *
2763
   * @param string $str
2764
   * @param bool   $strict
2765
   *
2766
   * @return bool
2767
   *
2768
   * @deprecated
2769
   */
2770
  public static function isUtf8($str, $strict = false)
2771
  {
2772
    return self::is_utf8($str, $strict);
2773
  }
2774
2775
  /**
2776
   * Checks if a string is 7 bit ASCII.
2777
   *
2778
   * @param string $str <p>The string to check.</p>
2779
   *
2780
   * @return bool <p>
2781
   *              <strong>true</strong> if it is ASCII<br />
2782
   *              <strong>false</strong> otherwise
2783
   *              </p>
2784
   */
2785 42
  public static function is_ascii($str)
2786
  {
2787 42
    $str = (string)$str;
2788
2789 42
    if (!isset($str[0])) {
2790 6
      return true;
2791
    }
2792
2793 41
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2794
  }
2795
2796
  /**
2797
   * Returns true if the string is base64 encoded, false otherwise.
2798
   *
2799
   * @param string $str <p>The input string.</p>
2800
   *
2801
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2802
   */
2803 1
  public static function is_base64($str)
2804
  {
2805 1
    $str = (string)$str;
2806
2807 1
    if (!isset($str[0])) {
2808 1
      return false;
2809
    }
2810
2811 1
    $base64String = (string)base64_decode($str, true);
2812 1
    if ($base64String && base64_encode($base64String) === $str) {
2813 1
      return true;
2814
    }
2815
2816 1
    return false;
2817
  }
2818
2819
  /**
2820
   * Check if the input is binary... (is look like a hack).
2821
   *
2822
   * @param mixed $input
2823
   *
2824
   * @return bool
2825
   */
2826 18
  public static function is_binary($input)
2827
  {
2828 18
    $input = (string)$input;
2829
2830 18
    if (!isset($input[0])) {
2831 4
      return false;
2832
    }
2833
2834 18
    if (preg_match('~^[01]+$~', $input)) {
2835 4
      return true;
2836
    }
2837
2838 18
    $testLength = strlen($input);
2839 18
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2840 5
      return true;
2841
    }
2842
2843 17
    if (substr_count($input, "\x00") > 0) {
2844 1
      return true;
2845
    }
2846
2847 17
    return false;
2848
  }
2849
2850
  /**
2851
   * Check if the file is binary.
2852
   *
2853
   * @param string $file
2854
   *
2855
   * @return boolean
2856
   */
2857
  public static function is_binary_file($file)
2858
  {
2859
    try {
2860
      $fp = fopen($file, 'rb');
2861
      $block = fread($fp, 512);
2862
      fclose($fp);
2863
    } catch (\Exception $e) {
2864
      $block = '';
2865
    }
2866
2867
    return self::is_binary($block);
2868
  }
2869
2870
  /**
2871
   * Checks if the given string is equal to any "Byte Order Mark".
2872
   *
2873
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2874
   *
2875
   * @param string $str <p>The input string.</p>
2876
   *
2877
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2878
   */
2879 1
  public static function is_bom($str)
2880
  {
2881 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2882 1
      if ($str === $bomString) {
2883 1
        return true;
2884
      }
2885 1
    }
2886
2887 1
    return false;
2888
  }
2889
2890
  /**
2891
   * Check if the string contains any html-tags <lall>.
2892
   *
2893
   * @param string $str <p>The input string.</p>
2894
   *
2895
   * @return boolean
2896
   */
2897 1
  public static function is_html($str)
2898
  {
2899 1
    $str = (string)$str;
2900
2901 1
    if (!isset($str[0])) {
2902 1
      return false;
2903
    }
2904
2905
    // init
2906 1
    $matches = array();
2907
2908 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2909
2910 1
    if (count($matches) === 0) {
2911 1
      return false;
2912
    }
2913
2914 1
    return true;
2915
  }
2916
2917
  /**
2918
   * Try to check if "$str" is an json-string.
2919
   *
2920
   * @param string $str <p>The input string.</p>
2921
   *
2922
   * @return bool
2923
   */
2924 1
  public static function is_json($str)
2925
  {
2926 1
    $str = (string)$str;
2927
2928 1
    if (!isset($str[0])) {
2929
      return false;
2930
    }
2931
2932 1
    $json = self::json_decode($str);
2933
2934
    if (
2935
        (
2936 1
            is_object($json) === true
2937 1
            ||
2938 1
            is_array($json) === true
2939 1
        )
2940 1
        &&
2941 1
        json_last_error() === JSON_ERROR_NONE
2942 1
    ) {
2943 1
      return true;
2944
    }
2945
2946 1
    return false;
2947
  }
2948
2949
  /**
2950
   * Check if the string is UTF-16.
2951
   *
2952
   * @param string $str <p>The input string.</p>
2953
   *
2954
   * @return int|false <p>
2955
   *                   <strong>false</strong> if is't not UTF-16,<br />
2956
   *                   <strong>1</strong> for UTF-16LE,<br />
2957
   *                   <strong>2</strong> for UTF-16BE.
2958
   *                   </p>
2959
   */
2960 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2961
  {
2962 5
    $str = self::remove_bom($str);
2963
2964 5
    if (self::is_binary($str) === true) {
2965
2966 5
      $maybeUTF16LE = 0;
2967 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2968 5
      if ($test) {
2969 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2970 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2971 5
        if ($test3 === $test) {
2972 5
          $strChars = self::count_chars($str, true);
2973 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2974 4
            if (in_array($test3char, $strChars, true) === true) {
2975 2
              $maybeUTF16LE++;
2976 2
            }
2977 5
          }
2978 5
        }
2979 5
      }
2980
2981 5
      $maybeUTF16BE = 0;
2982 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2983 5
      if ($test) {
2984 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2985 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2986 5
        if ($test3 === $test) {
2987 5
          $strChars = self::count_chars($str, true);
2988 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2989 4
            if (in_array($test3char, $strChars, true) === true) {
2990 3
              $maybeUTF16BE++;
2991 3
            }
2992 5
          }
2993 5
        }
2994 5
      }
2995
2996 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2997 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
2998 2
          return 1;
2999
        }
3000
3001 3
        return 2;
3002
      }
3003
3004 3
    }
3005
3006 3
    return false;
3007
  }
3008
3009
  /**
3010
   * Check if the string is UTF-32.
3011
   *
3012
   * @param string $str
3013
   *
3014
   * @return int|false <p>
3015
   *                   <strong>false</strong> if is't not UTF-32,<br />
3016
   *                   <strong>1</strong> for UTF-32LE,<br />
3017
   *                   <strong>2</strong> for UTF-32BE.
3018
   *                   </p>
3019
   */
3020 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3021
  {
3022 3
    $str = self::remove_bom($str);
3023
3024 3
    if (self::is_binary($str) === true) {
3025
3026 3
      $maybeUTF32LE = 0;
3027 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3028 3
      if ($test) {
3029 3
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3030 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3031 3
        if ($test3 === $test) {
3032 3
          $strChars = self::count_chars($str, true);
3033 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3034 3
            if (in_array($test3char, $strChars, true) === true) {
3035 1
              $maybeUTF32LE++;
3036 1
            }
3037 3
          }
3038 3
        }
3039 3
      }
3040
3041 3
      $maybeUTF32BE = 0;
3042 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3043 3
      if ($test) {
3044 3
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3045 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3046 3
        if ($test3 === $test) {
3047 3
          $strChars = self::count_chars($str, true);
3048 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3049 3
            if (in_array($test3char, $strChars, true) === true) {
3050 1
              $maybeUTF32BE++;
3051 1
            }
3052 3
          }
3053 3
        }
3054 3
      }
3055
3056 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3057 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3058 1
          return 1;
3059
        }
3060
3061 1
        return 2;
3062
      }
3063
3064 3
    }
3065
3066 3
    return false;
3067
  }
3068
3069
  /**
3070
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3071
   *
3072
   * @see    http://hsivonen.iki.fi/php-utf8/
3073
   *
3074
   * @param string $str    <p>The string to be checked.</p>
3075
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3076
   *
3077
   * @return bool
3078
   */
3079 61
  public static function is_utf8($str, $strict = false)
3080
  {
3081 61
    $str = (string)$str;
3082
3083 61
    if (!isset($str[0])) {
3084 3
      return true;
3085
    }
3086
3087 59
    if ($strict === true) {
3088 1
      if (self::is_utf16($str) !== false) {
3089 1
        return false;
3090
      }
3091
3092
      if (self::is_utf32($str) !== false) {
3093
        return false;
3094
      }
3095
    }
3096
3097 59
    if (self::pcre_utf8_support() !== true) {
3098
3099
      // If even just the first character can be matched, when the /u
3100
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3101
      // invalid, nothing at all will match, even if the string contains
3102
      // some valid sequences
3103
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3104
    }
3105
3106 59
    $mState = 0; // cached expected number of octets after the current octet
3107
    // until the beginning of the next UTF8 character sequence
3108 59
    $mUcs4 = 0; // cached Unicode character
3109 59
    $mBytes = 1; // cached expected number of octets in the current sequence
3110
3111 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3112
      self::checkForSupport();
3113
    }
3114
3115 59 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3116
      $len = \mb_strlen($str, '8BIT');
3117
    } else {
3118 59
      $len = strlen($str);
3119
    }
3120
3121
    /** @noinspection ForeachInvariantsInspection */
3122 59
    for ($i = 0; $i < $len; $i++) {
3123 59
      $in = ord($str[$i]);
3124 59
      if ($mState === 0) {
3125
        // When mState is zero we expect either a US-ASCII character or a
3126
        // multi-octet sequence.
3127 59
        if (0 === (0x80 & $in)) {
3128
          // US-ASCII, pass straight through.
3129 52
          $mBytes = 1;
3130 59 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3131
          // First octet of 2 octet sequence.
3132 47
          $mUcs4 = $in;
3133 47
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3134 47
          $mState = 1;
3135 47
          $mBytes = 2;
3136 56
        } elseif (0xE0 === (0xF0 & $in)) {
3137
          // First octet of 3 octet sequence.
3138 30
          $mUcs4 = $in;
3139 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3140 30
          $mState = 2;
3141 30
          $mBytes = 3;
3142 47 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3143
          // First octet of 4 octet sequence.
3144 12
          $mUcs4 = $in;
3145 12
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3146 12
          $mState = 3;
3147 12
          $mBytes = 4;
3148 23
        } elseif (0xF8 === (0xFC & $in)) {
3149
          /* First octet of 5 octet sequence.
3150
          *
3151
          * This is illegal because the encoded codepoint must be either
3152
          * (a) not the shortest form or
3153
          * (b) outside the Unicode range of 0-0x10FFFF.
3154
          * Rather than trying to resynchronize, we will carry on until the end
3155
          * of the sequence and let the later error handling code catch it.
3156
          */
3157 4
          $mUcs4 = $in;
3158 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3159 4
          $mState = 4;
3160 4
          $mBytes = 5;
3161 12 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3162
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3163 4
          $mUcs4 = $in;
3164 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3165 4
          $mState = 5;
3166 4
          $mBytes = 6;
3167 4
        } else {
3168
          /* Current octet is neither in the US-ASCII range nor a legal first
3169
           * octet of a multi-octet sequence.
3170
           */
3171 6
          return false;
3172
        }
3173 58
      } else {
3174
        // When mState is non-zero, we expect a continuation of the multi-octet
3175
        // sequence
3176 52
        if (0x80 === (0xC0 & $in)) {
3177
          // Legal continuation.
3178 48
          $shift = ($mState - 1) * 6;
3179 48
          $tmp = $in;
3180 48
          $tmp = ($tmp & 0x0000003F) << $shift;
3181 48
          $mUcs4 |= $tmp;
3182
          /**
3183
           * End of the multi-octet sequence. mUcs4 now contains the final
3184
           * Unicode code point to be output
3185
           */
3186 48
          if (0 === --$mState) {
3187
            /*
3188
            * Check for illegal sequences and code points.
3189
            */
3190
            // From Unicode 3.1, non-shortest form is illegal
3191
            if (
3192 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3193 48
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3194 48
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3195 48
                (4 < $mBytes) ||
3196
                // From Unicode 3.2, surrogate characters are illegal.
3197 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3198
                // Code points outside the Unicode range are illegal.
3199 48
                ($mUcs4 > 0x10FFFF)
3200 48
            ) {
3201 7
              return false;
3202
            }
3203
            // initialize UTF8 cache
3204 48
            $mState = 0;
3205 48
            $mUcs4 = 0;
3206 48
            $mBytes = 1;
3207 48
          }
3208 48
        } else {
3209
          /**
3210
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3211
           * Incomplete multi-octet sequence.
3212
           */
3213 25
          return false;
3214
        }
3215
      }
3216 58
    }
3217
3218 28
    return true;
3219
  }
3220
3221
  /**
3222
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3223
   * Decodes a JSON string
3224
   *
3225
   * @link http://php.net/manual/en/function.json-decode.php
3226
   *
3227
   * @param string $json    <p>
3228
   *                        The <i>json</i> string being decoded.
3229
   *                        </p>
3230
   *                        <p>
3231
   *                        This function only works with UTF-8 encoded strings.
3232
   *                        </p>
3233
   *                        <p>PHP implements a superset of
3234
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3235
   *                        only supports these values when they are nested inside an array or an object.
3236
   *                        </p>
3237
   * @param bool   $assoc   [optional] <p>
3238
   *                        When <b>TRUE</b>, returned objects will be converted into
3239
   *                        associative arrays.
3240
   *                        </p>
3241
   * @param int    $depth   [optional] <p>
3242
   *                        User specified recursion depth.
3243
   *                        </p>
3244
   * @param int    $options [optional] <p>
3245
   *                        Bitmask of JSON decode options. Currently only
3246
   *                        <b>JSON_BIGINT_AS_STRING</b>
3247
   *                        is supported (default is to cast large integers as floats)
3248
   *                        </p>
3249
   *
3250
   * @return mixed the value encoded in <i>json</i> in appropriate
3251
   * PHP type. Values true, false and
3252
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3253
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3254
   * <i>json</i> cannot be decoded or if the encoded
3255
   * data is deeper than the recursion limit.
3256
   */
3257 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3258
  {
3259 2
    $json = (string)self::filter($json);
3260
3261 2
    if (Bootup::is_php('5.4') === true) {
3262 2
      $json = json_decode($json, $assoc, $depth, $options);
3263 2
    } else {
3264
      $json = json_decode($json, $assoc, $depth);
3265
    }
3266
3267 2
    return $json;
3268
  }
3269
3270
  /**
3271
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3272
   * Returns the JSON representation of a value.
3273
   *
3274
   * @link http://php.net/manual/en/function.json-encode.php
3275
   *
3276
   * @param mixed $value   <p>
3277
   *                       The <i>value</i> being encoded. Can be any type except
3278
   *                       a resource.
3279
   *                       </p>
3280
   *                       <p>
3281
   *                       All string data must be UTF-8 encoded.
3282
   *                       </p>
3283
   *                       <p>PHP implements a superset of
3284
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3285
   *                       only supports these values when they are nested inside an array or an object.
3286
   *                       </p>
3287
   * @param int   $options [optional] <p>
3288
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3289
   *                       <b>JSON_HEX_TAG</b>,
3290
   *                       <b>JSON_HEX_AMP</b>,
3291
   *                       <b>JSON_HEX_APOS</b>,
3292
   *                       <b>JSON_NUMERIC_CHECK</b>,
3293
   *                       <b>JSON_PRETTY_PRINT</b>,
3294
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3295
   *                       <b>JSON_FORCE_OBJECT</b>,
3296
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3297
   *                       constants is described on
3298
   *                       the JSON constants page.
3299
   *                       </p>
3300
   * @param int   $depth   [optional] <p>
3301
   *                       Set the maximum depth. Must be greater than zero.
3302
   *                       </p>
3303
   *
3304
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3305
   */
3306 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3307
  {
3308 2
    $value = self::filter($value);
3309
3310 2
    if (Bootup::is_php('5.5') === true) {
3311
      $json = json_encode($value, $options, $depth);
3312
    } else {
3313 2
      $json = json_encode($value, $options);
3314
    }
3315
3316 2
    return $json;
3317
  }
3318
3319
  /**
3320
   * Makes string's first char lowercase.
3321
   *
3322
   * @param string $str <p>The input string</p>
3323
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3324
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3325
   *
3326
   * @return string <p>The resulting string</p>
3327
   */
3328 7 View Code Duplication
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3329
  {
3330 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3331 7
    if ($strPartTwo === false) {
3332
      $strPartTwo = '';
3333
    }
3334
3335 7
    $strPartOne = self::strtolower(
3336 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3337 7
        $encoding,
3338
        $cleanUtf8
3339 7
    );
3340
3341 7
    return $strPartOne . $strPartTwo;
3342
  }
3343
3344
  /**
3345
   * alias for "UTF8::lcfirst()"
3346
   *
3347
   * @see UTF8::lcfirst()
3348
   *
3349
   * @param string  $word
3350
   * @param string  $encoding
3351
   * @param boolean $cleanUtf8
3352
   *
3353
   * @return string
3354
   */
3355 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3356
  {
3357 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3358
  }
3359
3360
  /**
3361
   * Lowercase for all words in the string.
3362
   *
3363
   * @param string   $str        <p>The input string.</p>
3364
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3365
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3366
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3367
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3368
   *
3369
   * @return string
3370
   */
3371 1 View Code Duplication
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3372
  {
3373 1
    if (!$str) {
3374 1
      return '';
3375
    }
3376
3377 1
    $words = self::str_to_words($str, $charlist);
3378 1
    $newWords = array();
3379
3380 1
    if (count($exceptions) > 0) {
3381 1
      $useExceptions = true;
3382 1
    } else {
3383 1
      $useExceptions = false;
3384
    }
3385
3386 1
    foreach ($words as $word) {
3387
3388 1
      if (!$word) {
3389 1
        continue;
3390
      }
3391
3392
      if (
3393
          $useExceptions === false
3394 1
          ||
3395
          (
3396
              $useExceptions === true
3397 1
              &&
3398 1
              !in_array($word, $exceptions, true)
3399 1
          )
3400 1
      ) {
3401 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3402 1
      }
3403
3404 1
      $newWords[] = $word;
3405 1
    }
3406
3407 1
    return implode('', $newWords);
3408
  }
3409
3410
  /**
3411
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3412
   *
3413
   * @param string $str   <p>The string to be trimmed</p>
3414
   * @param string $chars <p>Optional characters to be stripped</p>
3415
   *
3416
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3417
   */
3418 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3419
  {
3420 24
    $str = (string)$str;
3421
3422 24
    if (!isset($str[0])) {
3423 2
      return '';
3424
    }
3425
3426
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3427 23
    if ($chars === INF || !$chars) {
3428 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3429
    }
3430
3431 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3432
  }
3433
3434
  /**
3435
   * Returns the UTF-8 character with the maximum code point in the given data.
3436
   *
3437
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3438
   *
3439
   * @return string <p>The character with the highest code point than others.</p>
3440
   */
3441 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3442
  {
3443 1
    if (is_array($arg) === true) {
3444 1
      $arg = implode('', $arg);
3445 1
    }
3446
3447 1
    return self::chr(max(self::codepoints($arg)));
3448
  }
3449
3450
  /**
3451
   * Calculates and returns the maximum number of bytes taken by any
3452
   * UTF-8 encoded character in the given string.
3453
   *
3454
   * @param string $str <p>The original Unicode string.</p>
3455
   *
3456
   * @return int <p>Max byte lengths of the given chars.</p>
3457
   */
3458 1
  public static function max_chr_width($str)
3459
  {
3460 1
    $bytes = self::chr_size_list($str);
3461 1
    if (count($bytes) > 0) {
3462 1
      return (int)max($bytes);
3463
    }
3464
3465 1
    return 0;
3466
  }
3467
3468
  /**
3469
   * Checks whether mbstring is available on the server.
3470
   *
3471
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3472
   */
3473 15
  public static function mbstring_loaded()
3474
  {
3475 15
    $return = extension_loaded('mbstring') ? true : false;
3476
3477 15
    if ($return === true) {
3478 15
      \mb_internal_encoding('UTF-8');
3479 15
    }
3480
3481 15
    return $return;
3482
  }
3483
3484
  /**
3485
   * Returns the UTF-8 character with the minimum code point in the given data.
3486
   *
3487
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3488
   *
3489
   * @return string <p>The character with the lowest code point than others.</p>
3490
   */
3491 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3492
  {
3493 1
    if (is_array($arg) === true) {
3494 1
      $arg = implode('', $arg);
3495 1
    }
3496
3497 1
    return self::chr(min(self::codepoints($arg)));
3498
  }
3499
3500
  /**
3501
   * alias for "UTF8::normalize_encoding()"
3502
   *
3503
   * @see UTF8::normalize_encoding()
3504
   *
3505
   * @param string $encoding
3506
   * @param mixed  $fallback
3507
   *
3508
   * @return string
3509
   *
3510
   * @deprecated
3511
   */
3512
  public static function normalizeEncoding($encoding, $fallback = false)
3513
  {
3514
    return self::normalize_encoding($encoding, $fallback);
3515
  }
3516
3517
  /**
3518
   * Normalize the encoding-"name" input.
3519
   *
3520
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3521
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3522
   *
3523
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3524
   */
3525 76
  public static function normalize_encoding($encoding, $fallback = false)
3526
  {
3527 76
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3528
3529 76
    if (!$encoding) {
3530 2
      return $fallback;
3531
    }
3532
3533 76
    if ('UTF-8' === $encoding) {
3534 1
      return $encoding;
3535
    }
3536
3537 76
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3538 6
      return $encoding;
3539
    }
3540
3541 75
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3542 74
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3543
    }
3544
3545 5
    $encodingOrig = $encoding;
3546 5
    $encoding = strtoupper($encoding);
3547 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3548
3549
    $equivalences = array(
3550 5
        'ISO88591'    => 'ISO-8859-1',
3551 5
        'ISO8859'     => 'ISO-8859-1',
3552 5
        'ISO'         => 'ISO-8859-1',
3553 5
        'LATIN1'      => 'ISO-8859-1',
3554 5
        'LATIN'       => 'ISO-8859-1',
3555 5
        'WIN1252'     => 'ISO-8859-1',
3556 5
        'WINDOWS1252' => 'ISO-8859-1',
3557 5
        'UTF16'       => 'UTF-16',
3558 5
        'UTF32'       => 'UTF-32',
3559 5
        'UTF8'        => 'UTF-8',
3560 5
        'UTF'         => 'UTF-8',
3561 5
        'UTF7'        => 'UTF-7',
3562 5
        '8BIT'        => 'CP850',
3563 5
        'BINARY'      => 'CP850',
3564 5
    );
3565
3566 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3567 5
      $encoding = $equivalences[$encodingUpperHelper];
3568 5
    }
3569
3570 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3571
3572 5
    return $encoding;
3573
  }
3574
3575
  /**
3576
   * Normalize some MS Word special characters.
3577
   *
3578
   * @param string $str <p>The string to be normalized.</p>
3579
   *
3580
   * @return string
3581
   */
3582 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3583
  {
3584 16
    $str = (string)$str;
3585
3586 16
    if (!isset($str[0])) {
3587 1
      return '';
3588
    }
3589
3590 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3591 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3592
3593 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3594 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3595 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3596 1
    }
3597
3598 16
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3599
  }
3600
3601
  /**
3602
   * Normalize the whitespace.
3603
   *
3604
   * @param string $str                     <p>The string to be normalized.</p>
3605
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3606
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3607
   *                                        bidirectional text chars.</p>
3608
   *
3609
   * @return string
3610
   */
3611 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3612
  {
3613 37
    $str = (string)$str;
3614
3615 37
    if (!isset($str[0])) {
3616 4
      return '';
3617
    }
3618
3619 37
    static $WHITESPACE_CACHE = array();
3620 37
    $cacheKey = (int)$keepNonBreakingSpace;
3621
3622 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3623
3624 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3625
3626 2
      if ($keepNonBreakingSpace === true) {
3627
        /** @noinspection OffsetOperationsInspection */
3628 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3629 1
      }
3630
3631 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3632 2
    }
3633
3634 37
    if ($keepBidiUnicodeControls === false) {
3635 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3636
3637 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3638 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3639 1
      }
3640
3641 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3642 37
    }
3643
3644 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3645
  }
3646
3647
  /**
3648
   * Strip all whitespace characters. This includes tabs and newline
3649
   * characters, as well as multibyte whitespace such as the thin space
3650
   * and ideographic space.
3651
   *
3652
   * @param string $str
3653
   *
3654
   * @return string
3655
   */
3656 12
  public static function strip_whitespace($str)
3657
  {
3658 12
    $str = (string)$str;
3659
3660 12
    if (!isset($str[0])) {
3661 1
      return '';
3662
    }
3663
3664 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3665
  }
3666
3667
  /**
3668
   * Format a number with grouped thousands.
3669
   *
3670
   * @param float  $number
3671
   * @param int    $decimals
3672
   * @param string $dec_point
3673
   * @param string $thousands_sep
3674
   *
3675
   * @return string
3676
   *    *
3677
   * @deprecated Because this has nothing to do with UTF8. :/
3678
   */
3679
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3680
  {
3681
    $thousands_sep = (string)$thousands_sep;
3682
    $dec_point = (string)$dec_point;
3683
    $number = (float)$number;
3684
3685
    if (
3686
        isset($thousands_sep[1], $dec_point[1])
3687
        &&
3688
        Bootup::is_php('5.4') === true
3689
    ) {
3690
      return str_replace(
3691
          array(
3692
              '.',
3693
              ',',
3694
          ),
3695
          array(
3696
              $dec_point,
3697
              $thousands_sep,
3698
          ),
3699
          number_format($number, $decimals, '.', ',')
3700
      );
3701
    }
3702
3703
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3704
  }
3705
3706
  /**
3707
   * Calculates Unicode code point of the given UTF-8 encoded character.
3708
   *
3709
   * INFO: opposite to UTF8::chr()
3710
   *
3711
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3712
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3713
   *
3714
   * @return int <p>
3715
   *             Unicode code point of the given character,<br />
3716
   *             0 on invalid UTF-8 byte sequence.
3717
   *             </p>
3718
   */
3719 23
  public static function ord($chr, $encoding = 'UTF-8')
3720
  {
3721
3722 23
    if ($encoding !== 'UTF-8') {
3723 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3724
3725
      // check again, if it's still not UTF-8
3726
      /** @noinspection NotOptimalIfConditionsInspection */
3727 1
      if ($encoding !== 'UTF-8') {
3728 1
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3729 1
      }
3730 1
    }
3731
3732 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3733
      self::checkForSupport();
3734
    }
3735
3736 23
    if (self::$SUPPORT['intlChar'] === true) {
3737
      $tmpReturn = \IntlChar::ord($chr);
3738
      if ($tmpReturn) {
3739
        return $tmpReturn;
3740
      }
3741
    }
3742
3743
    // use static cache, if there is no support for "\IntlChar"
3744 23
    static $CHAR_CACHE = array();
3745 23
    if (isset($CHAR_CACHE[$chr]) === true) {
3746 23
      return $CHAR_CACHE[$chr];
3747
    }
3748
3749 10
    $chr_orig = $chr;
3750
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3751 10
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3752 10
    $code = $chr ? $chr[1] : 0;
3753
3754 10
    if (0xF0 <= $code && isset($chr[4])) {
3755 1
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3756
    }
3757
3758 10
    if (0xE0 <= $code && isset($chr[3])) {
3759 4
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3760
    }
3761
3762 10
    if (0xC0 <= $code && isset($chr[2])) {
3763 6
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3764
    }
3765
3766 10
    return $CHAR_CACHE[$chr_orig] = $code;
3767
  }
3768
3769
  /**
3770
   * Parses the string into an array (into the the second parameter).
3771
   *
3772
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3773
   *          if the second parameter is not set!
3774
   *
3775
   * @link http://php.net/manual/en/function.parse-str.php
3776
   *
3777
   * @param string  $str       <p>The input string.</p>
3778
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3779
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3780
   *
3781
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3782
   */
3783 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3784
  {
3785 1
    if ($cleanUtf8 === true) {
3786 1
      $str = self::clean($str);
3787 1
    }
3788
3789
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3790 1
    $return = \mb_parse_str($str, $result);
3791 1
    if ($return === false || empty($result)) {
3792 1
      return false;
3793
    }
3794
3795 1
    return true;
3796
  }
3797
3798
  /**
3799
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3800
   *
3801
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3802
   */
3803 59
  public static function pcre_utf8_support()
3804
  {
3805
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3806
    /** @noinspection UsageOfSilenceOperatorInspection */
3807 59
    return (bool)@preg_match('//u', '');
3808
  }
3809
3810
  /**
3811
   * Create an array containing a range of UTF-8 characters.
3812
   *
3813
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3814
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3815
   *
3816
   * @return array
3817
   */
3818 1
  public static function range($var1, $var2)
3819
  {
3820 1
    if (!$var1 || !$var2) {
3821 1
      return array();
3822
    }
3823
3824 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3825 1
      $start = (int)$var1;
3826 1
    } elseif (ctype_xdigit($var1)) {
3827
      $start = (int)self::hex_to_int($var1);
3828
    } else {
3829 1
      $start = self::ord($var1);
3830
    }
3831
3832 1
    if (!$start) {
3833
      return array();
3834
    }
3835
3836 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3837 1
      $end = (int)$var2;
3838 1
    } elseif (ctype_xdigit($var2)) {
3839
      $end = (int)self::hex_to_int($var2);
3840
    } else {
3841 1
      $end = self::ord($var2);
3842
    }
3843
3844 1
    if (!$end) {
3845
      return array();
3846
    }
3847
3848 1
    return array_map(
3849
        array(
3850 1
            '\\voku\\helper\\UTF8',
3851 1
            'chr',
3852 1
        ),
3853 1
        range($start, $end)
3854 1
    );
3855
  }
3856
3857
  /**
3858
   * Multi decode html entity & fix urlencoded-win1252-chars.
3859
   *
3860
   * e.g:
3861
   * 'test+test'                     => 'test+test'
3862
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3863
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3864
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3865
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3866
   * 'Düsseldorf'                   => 'Düsseldorf'
3867
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3868
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3869
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3870
   *
3871
   * @param string $str          <p>The input string.</p>
3872
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3873
   *
3874
   * @return string
3875
   */
3876 1 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3877
  {
3878 1
    $str = (string)$str;
3879
3880 1
    if (!isset($str[0])) {
3881 1
      return '';
3882
    }
3883
3884 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
3885 1
    if (preg_match($pattern, $str)) {
3886 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3887 1
    }
3888
3889 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3890
3891
    do {
3892 1
      $str_compare = $str;
3893
3894 1
      $str = self::fix_simple_utf8(
3895 1
          rawurldecode(
3896 1
              self::html_entity_decode(
3897 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3898
                  $flags
3899 1
              )
3900 1
          )
3901 1
      );
3902
3903 1
    } while ($multi_decode === true && $str_compare !== $str);
3904
3905 1
    return (string)$str;
3906
  }
3907
3908
  /**
3909
   * alias for "UTF8::remove_bom()"
3910
   *
3911
   * @see UTF8::remove_bom()
3912
   *
3913
   * @param string $str
3914
   *
3915
   * @return string
3916
   *
3917
   * @deprecated
3918
   */
3919
  public static function removeBOM($str)
3920
  {
3921
    return self::remove_bom($str);
3922
  }
3923
3924
  /**
3925
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3926
   *
3927
   * @param string $str <p>The input string.</p>
3928
   *
3929
   * @return string <p>String without UTF-BOM</p>
3930
   */
3931 40
  public static function remove_bom($str)
3932
  {
3933 40
    $str = (string)$str;
3934
3935 40
    if (!isset($str[0])) {
3936 5
      return '';
3937
    }
3938
3939 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
3940 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3941 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3942 5
        if ($strTmp === false) {
3943
          $strTmp = '';
3944
        }
3945 5
        $str = (string)$strTmp;
3946 5
      }
3947 40
    }
3948
3949 40
    return $str;
3950
  }
3951
3952
  /**
3953
   * Removes duplicate occurrences of a string in another string.
3954
   *
3955
   * @param string          $str  <p>The base string.</p>
3956
   * @param string|string[] $what <p>String to search for in the base string.</p>
3957
   *
3958
   * @return string <p>The result string with removed duplicates.</p>
3959
   */
3960 1
  public static function remove_duplicates($str, $what = ' ')
3961
  {
3962 1
    if (is_string($what) === true) {
3963 1
      $what = array($what);
3964 1
    }
3965
3966 1
    if (is_array($what) === true) {
3967
      /** @noinspection ForeachSourceInspection */
3968 1
      foreach ($what as $item) {
3969 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3970 1
      }
3971 1
    }
3972
3973 1
    return $str;
3974
  }
3975
3976
  /**
3977
   * Remove invisible characters from a string.
3978
   *
3979
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3980
   *
3981
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3982
   *
3983
   * @param string $str
3984
   * @param bool   $url_encoded
3985
   * @param string $replacement
3986
   *
3987
   * @return string
3988
   */
3989 57
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3990
  {
3991
    // init
3992 57
    $non_displayables = array();
3993
3994
    // every control character except newline (dec 10),
3995
    // carriage return (dec 13) and horizontal tab (dec 09)
3996 57
    if ($url_encoded) {
3997 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
3998 57
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
3999 57
    }
4000
4001 57
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4002
4003
    do {
4004 57
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4005 57
    } while ($count !== 0);
4006
4007 57
    return $str;
4008
  }
4009
4010
  /**
4011
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4012
   *
4013
   * @param string $str                <p>The input string</p>
4014
   * @param string $replacementChar    <p>The replacement character.</p>
4015
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4016
   *
4017
   * @return string
4018
   */
4019 57
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4020
  {
4021 57
    $str = (string)$str;
4022
4023 57
    if (!isset($str[0])) {
4024 4
      return '';
4025
    }
4026
4027 57
    if ($processInvalidUtf8 === true) {
4028 57
      $replacementCharHelper = $replacementChar;
4029 57
      if ($replacementChar === '') {
4030 57
        $replacementCharHelper = 'none';
4031 57
      }
4032
4033 57
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4034
        self::checkForSupport();
4035
      }
4036
4037 57
      $save = \mb_substitute_character();
4038 57
      \mb_substitute_character($replacementCharHelper);
4039
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4040 57
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4041 57
      \mb_substitute_character($save);
4042 57
    }
4043
4044 57
    return str_replace(
4045
        array(
4046 57
            "\xEF\xBF\xBD",
4047 57
            '�',
4048 57
        ),
4049
        array(
4050 57
            $replacementChar,
4051 57
            $replacementChar,
4052 57
        ),
4053
        $str
4054 57
    );
4055
  }
4056
4057
  /**
4058
   * Strip whitespace or other characters from end of a UTF-8 string.
4059
   *
4060
   * @param string $str   <p>The string to be trimmed.</p>
4061
   * @param string $chars <p>Optional characters to be stripped.</p>
4062
   *
4063
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4064
   */
4065 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4066
  {
4067 23
    $str = (string)$str;
4068
4069 23
    if (!isset($str[0])) {
4070 5
      return '';
4071
    }
4072
4073
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4074 19
    if ($chars === INF || !$chars) {
4075 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4076
    }
4077
4078 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4079
  }
4080
4081
  /**
4082
   * rxClass
4083
   *
4084
   * @param string $s
4085
   * @param string $class
4086
   *
4087
   * @return string
4088
   */
4089 60
  private static function rxClass($s, $class = '')
4090
  {
4091 60
    static $RX_CLASSS_CACHE = array();
4092
4093 60
    $cacheKey = $s . $class;
4094
4095 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4096 48
      return $RX_CLASSS_CACHE[$cacheKey];
4097
    }
4098
4099
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4100 19
    $class = array($class);
4101
4102
    /** @noinspection SuspiciousLoopInspection */
4103 19
    foreach (self::str_split($s) as $s) {
4104 18
      if ('-' === $s) {
4105
        $class[0] = '-' . $class[0];
4106 18
      } elseif (!isset($s[2])) {
4107 18
        $class[0] .= preg_quote($s, '/');
4108 18
      } elseif (1 === self::strlen($s)) {
4109 2
        $class[0] .= $s;
4110 2
      } else {
4111
        $class[] = $s;
4112
      }
4113 19
    }
4114
4115 19
    if ($class[0]) {
4116 19
      $class[0] = '[' . $class[0] . ']';
4117 19
    }
4118
4119 19
    if (1 === count($class)) {
4120 19
      $return = $class[0];
4121 19
    } else {
4122
      $return = '(?:' . implode('|', $class) . ')';
4123
    }
4124
4125 19
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4126
4127 19
    return $return;
4128
  }
4129
4130
  /**
4131
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4132
   */
4133
  public static function showSupport()
4134
  {
4135
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4136
      self::checkForSupport();
4137
    }
4138
4139
    foreach (self::$SUPPORT as $utf8Support) {
4140
      echo $utf8Support . "\n<br>";
4141
    }
4142
  }
4143
4144
  /**
4145
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4146
   *
4147
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4148
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4149
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4150
   *
4151
   * @return string <p>The HTML numbered entity.</p>
4152
   */
4153 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4154
  {
4155 1
    $char = (string)$char;
4156
4157 1
    if (!isset($char[0])) {
4158 1
      return '';
4159
    }
4160
4161
    if (
4162
        $keepAsciiChars === true
4163 1
        &&
4164 1
        self::is_ascii($char) === true
4165 1
    ) {
4166 1
      return $char;
4167
    }
4168
4169 1
    if ($encoding !== 'UTF-8') {
4170
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4171
    }
4172
4173 1
    return '&#' . self::ord($char, $encoding) . ';';
4174
  }
4175
4176
  /**
4177
   * Convert a string to an array of Unicode characters.
4178
   *
4179
   * @param string  $str       <p>The string to split into array.</p>
4180
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4181
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4182
   *
4183
   * @return string[] <p>An array containing chunks of the string.</p>
4184
   */
4185 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4186
  {
4187 39
    $str = (string)$str;
4188
4189 39
    if (!isset($str[0])) {
4190 3
      return array();
4191
    }
4192
4193
    // init
4194 38
    $ret = array();
4195
4196 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4197
      self::checkForSupport();
4198
    }
4199
4200 38
    if ($cleanUtf8 === true) {
4201 7
      $str = self::clean($str);
4202 7
    }
4203
4204 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4205
4206 38
      preg_match_all('/./us', $str, $retArray);
4207 38
      if (isset($retArray[0])) {
4208 38
        $ret = $retArray[0];
4209 38
      }
4210 38
      unset($retArray);
4211
4212 38
    } else {
4213
4214
      // fallback
4215
4216
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4217
        self::checkForSupport();
4218
      }
4219
4220 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4221
        $len = \mb_strlen($str, '8BIT');
4222
      } else {
4223
        $len = strlen($str);
4224
      }
4225
4226
      /** @noinspection ForeachInvariantsInspection */
4227
      for ($i = 0; $i < $len; $i++) {
4228
4229
        if (($str[$i] & "\x80") === "\x00") {
4230
4231
          $ret[] = $str[$i];
4232
4233
        } elseif (
4234
            isset($str[$i + 1])
4235
            &&
4236
            ($str[$i] & "\xE0") === "\xC0"
4237
        ) {
4238
4239
          if (($str[$i + 1] & "\xC0") === "\x80") {
4240
            $ret[] = $str[$i] . $str[$i + 1];
4241
4242
            $i++;
4243
          }
4244
4245 View Code Duplication
        } elseif (
4246
            isset($str[$i + 2])
4247
            &&
4248
            ($str[$i] & "\xF0") === "\xE0"
4249
        ) {
4250
4251
          if (
4252
              ($str[$i + 1] & "\xC0") === "\x80"
4253
              &&
4254
              ($str[$i + 2] & "\xC0") === "\x80"
4255
          ) {
4256
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4257
4258
            $i += 2;
4259
          }
4260
4261
        } elseif (
4262
            isset($str[$i + 3])
4263
            &&
4264
            ($str[$i] & "\xF8") === "\xF0"
4265
        ) {
4266
4267 View Code Duplication
          if (
4268
              ($str[$i + 1] & "\xC0") === "\x80"
4269
              &&
4270
              ($str[$i + 2] & "\xC0") === "\x80"
4271
              &&
4272
              ($str[$i + 3] & "\xC0") === "\x80"
4273
          ) {
4274
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4275
4276
            $i += 3;
4277
          }
4278
4279
        }
4280
      }
4281
    }
4282
4283 38
    if ($length > 1) {
4284 5
      $ret = array_chunk($ret, $length);
4285
4286 5
      return array_map(
4287
          function ($item) {
4288 5
            return implode('', $item);
4289 5
          }, $ret
4290 5
      );
4291
    }
4292
4293
    /** @noinspection OffsetOperationsInspection */
4294 34
    if (isset($ret[0]) && $ret[0] === '') {
4295
      return array();
4296
    }
4297
4298 34
    return $ret;
4299
  }
4300
4301
  /**
4302
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4303
   *
4304
   * @param string $str <p>The input string.</p>
4305
   *
4306
   * @return false|string <p>
4307
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4308
   *                      otherwise it will return false.
4309
   *                      </p>
4310
   */
4311 14
  public static function str_detect_encoding($str)
4312
  {
4313
    //
4314
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4315
    //
4316
4317 14
    if (self::is_binary($str) === true) {
4318
4319 3
      if (self::is_utf16($str) === 1) {
4320 1
        return 'UTF-16LE';
4321
      }
4322
4323 3
      if (self::is_utf16($str) === 2) {
4324 1
        return 'UTF-16BE';
4325
      }
4326
4327 2
      if (self::is_utf32($str) === 1) {
4328
        return 'UTF-32LE';
4329
      }
4330
4331 2
      if (self::is_utf32($str) === 2) {
4332
        return 'UTF-32BE';
4333
      }
4334
4335 2
    }
4336
4337
    //
4338
    // 2.) simple check for ASCII chars
4339
    //
4340
4341 14
    if (self::is_ascii($str) === true) {
4342 4
      return 'ASCII';
4343
    }
4344
4345
    //
4346
    // 3.) simple check for UTF-8 chars
4347
    //
4348
4349 14
    if (self::is_utf8($str) === true) {
4350 11
      return 'UTF-8';
4351
    }
4352
4353
    //
4354
    // 4.) check via "\mb_detect_encoding()"
4355
    //
4356
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4357
4358
    $detectOrder = array(
4359 7
        'ISO-8859-1',
4360 7
        'ISO-8859-2',
4361 7
        'ISO-8859-3',
4362 7
        'ISO-8859-4',
4363 7
        'ISO-8859-5',
4364 7
        'ISO-8859-6',
4365 7
        'ISO-8859-7',
4366 7
        'ISO-8859-8',
4367 7
        'ISO-8859-9',
4368 7
        'ISO-8859-10',
4369 7
        'ISO-8859-13',
4370 7
        'ISO-8859-14',
4371 7
        'ISO-8859-15',
4372 7
        'ISO-8859-16',
4373 7
        'WINDOWS-1251',
4374 7
        'WINDOWS-1252',
4375 7
        'WINDOWS-1254',
4376 7
        'ISO-2022-JP',
4377 7
        'JIS',
4378 7
        'EUC-JP',
4379 7
    );
4380
4381 7
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4382 7
    if ($encoding) {
4383 7
      return $encoding;
4384
    }
4385
4386
    //
4387
    // 5.) check via "iconv()"
4388
    //
4389
4390
    $md5 = md5($str);
4391
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4392
      # INFO: //IGNORE and //TRANSLIT still throw notice
4393
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4394
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4395
        return $encodingTmp;
4396
      }
4397
    }
4398
4399
    return false;
4400
  }
4401
4402
  /**
4403
   * Check if the string ends with the given substring.
4404
   *
4405
   * @param string $haystack <p>The string to search in.</p>
4406
   * @param string $needle   <p>The substring to search for.</p>
4407
   *
4408
   * @return bool
4409
   */
4410 2
  public static function str_ends_with($haystack, $needle)
4411
  {
4412 2
    $haystack = (string)$haystack;
4413 2
    $needle = (string)$needle;
4414
4415 2
    if (!isset($haystack[0], $needle[0])) {
4416 1
      return false;
4417
    }
4418
4419 2
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4420 2
    if ($haystackSub === false) {
4421
      return false;
4422
    }
4423
4424 2
    if ($needle === $haystackSub) {
4425 2
      return true;
4426
    }
4427
4428 2
    return false;
4429
  }
4430
4431
  /**
4432
   * Check if the string ends with the given substring, case insensitive.
4433
   *
4434
   * @param string $haystack <p>The string to search in.</p>
4435
   * @param string $needle   <p>The substring to search for.</p>
4436
   *
4437
   * @return bool
4438
   */
4439 2 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4440
  {
4441 2
    $haystack = (string)$haystack;
4442 2
    $needle = (string)$needle;
4443
4444 2
    if (!isset($haystack[0], $needle[0])) {
4445 1
      return false;
4446
    }
4447
4448 2
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4449 2
      return true;
4450
    }
4451
4452 2
    return false;
4453
  }
4454
4455
  /**
4456
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4457
   *
4458
   * @link  http://php.net/manual/en/function.str-ireplace.php
4459
   *
4460
   * @param mixed $search  <p>
4461
   *                       Every replacement with search array is
4462
   *                       performed on the result of previous replacement.
4463
   *                       </p>
4464
   * @param mixed $replace <p>
4465
   *                       </p>
4466
   * @param mixed $subject <p>
4467
   *                       If subject is an array, then the search and
4468
   *                       replace is performed with every entry of
4469
   *                       subject, and the return value is an array as
4470
   *                       well.
4471
   *                       </p>
4472
   * @param int   $count   [optional] <p>
4473
   *                       The number of matched and replaced needles will
4474
   *                       be returned in count which is passed by
4475
   *                       reference.
4476
   *                       </p>
4477
   *
4478
   * @return mixed <p>A string or an array of replacements.</p>
4479
   */
4480 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4481
  {
4482 26
    $search = (array)$search;
4483
4484
    /** @noinspection AlterInForeachInspection */
4485 26
    foreach ($search as &$s) {
4486 26
      if ('' === $s .= '') {
4487 2
        $s = '/^(?<=.)$/';
4488 2
      } else {
4489 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4490
      }
4491 26
    }
4492
4493 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4494 26
    $count = $replace; // used as reference parameter
4495
4496 26
    return $subject;
4497
  }
4498
4499
  /**
4500
   * Check if the string starts with the given substring, case insensitive.
4501
   *
4502
   * @param string $haystack <p>The string to search in.</p>
4503
   * @param string $needle   <p>The substring to search for.</p>
4504
   *
4505
   * @return bool
4506
   */
4507 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4508
  {
4509 2
    $haystack = (string)$haystack;
4510 2
    $needle = (string)$needle;
4511
4512 2
    if (!isset($haystack[0], $needle[0])) {
4513 1
      return false;
4514
    }
4515
4516 2
    if (self::stripos($haystack, $needle) === 0) {
4517 2
      return true;
4518
    }
4519
4520 2
    return false;
4521
  }
4522
4523
  /**
4524
   * Limit the number of characters in a string, but also after the next word.
4525
   *
4526
   * @param string $str
4527
   * @param int    $length
4528
   * @param string $strAddOn
4529
   *
4530
   * @return string
4531
   */
4532 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4533
  {
4534 1
    $str = (string)$str;
4535
4536 1
    if (!isset($str[0])) {
4537 1
      return '';
4538
    }
4539
4540 1
    $length = (int)$length;
4541
4542 1
    if (self::strlen($str) <= $length) {
4543 1
      return $str;
4544
    }
4545
4546 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4547 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4548
    }
4549
4550 1
    $str = (string)self::substr($str, 0, $length);
4551 1
    $array = explode(' ', $str);
4552 1
    array_pop($array);
4553 1
    $new_str = implode(' ', $array);
4554
4555 1
    if ($new_str === '') {
4556 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4557 1
    } else {
4558 1
      $str = $new_str . $strAddOn;
4559
    }
4560
4561 1
    return $str;
4562
  }
4563
4564
  /**
4565
   * Pad a UTF-8 string to given length with another string.
4566
   *
4567
   * @param string $str        <p>The input string.</p>
4568
   * @param int    $pad_length <p>The length of return string.</p>
4569
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4570
   * @param int    $pad_type   [optional] <p>
4571
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4572
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4573
   *                           </p>
4574
   *
4575
   * @return string <strong>Returns the padded string</strong>
4576
   */
4577 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4578
  {
4579 2
    $str_length = self::strlen($str);
4580
4581
    if (
4582 2
        is_int($pad_length) === true
4583 2
        &&
4584
        $pad_length > 0
4585 2
        &&
4586
        $pad_length >= $str_length
4587 2
    ) {
4588 2
      $ps_length = self::strlen($pad_string);
4589
4590 2
      $diff = $pad_length - $str_length;
4591
4592
      switch ($pad_type) {
4593 2 View Code Duplication
        case STR_PAD_LEFT:
4594 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4595 2
          $pre = (string)self::substr($pre, 0, $diff);
4596 2
          $post = '';
4597 2
          break;
4598
4599 2
        case STR_PAD_BOTH:
4600 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4601 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4602 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4603 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4604 2
          break;
4605
4606 2
        case STR_PAD_RIGHT:
4607 2 View Code Duplication
        default:
4608 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4609 2
          $post = (string)self::substr($post, 0, $diff);
4610 2
          $pre = '';
4611 2
      }
4612
4613 2
      return $pre . $str . $post;
4614
    }
4615
4616 2
    return $str;
4617
  }
4618
4619
  /**
4620
   * Repeat a string.
4621
   *
4622
   * @param string $str        <p>
4623
   *                           The string to be repeated.
4624
   *                           </p>
4625
   * @param int    $multiplier <p>
4626
   *                           Number of time the input string should be
4627
   *                           repeated.
4628
   *                           </p>
4629
   *                           <p>
4630
   *                           multiplier has to be greater than or equal to 0.
4631
   *                           If the multiplier is set to 0, the function
4632
   *                           will return an empty string.
4633
   *                           </p>
4634
   *
4635
   * @return string <p>The repeated string.</p>
4636
   */
4637 1
  public static function str_repeat($str, $multiplier)
4638
  {
4639 1
    $str = self::filter($str);
4640
4641 1
    return str_repeat($str, $multiplier);
4642
  }
4643
4644
  /**
4645
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4646
   *
4647
   * Replace all occurrences of the search string with the replacement string
4648
   *
4649
   * @link http://php.net/manual/en/function.str-replace.php
4650
   *
4651
   * @param mixed $search  <p>
4652
   *                       The value being searched for, otherwise known as the needle.
4653
   *                       An array may be used to designate multiple needles.
4654
   *                       </p>
4655
   * @param mixed $replace <p>
4656
   *                       The replacement value that replaces found search
4657
   *                       values. An array may be used to designate multiple replacements.
4658
   *                       </p>
4659
   * @param mixed $subject <p>
4660
   *                       The string or array being searched and replaced on,
4661
   *                       otherwise known as the haystack.
4662
   *                       </p>
4663
   *                       <p>
4664
   *                       If subject is an array, then the search and
4665
   *                       replace is performed with every entry of
4666
   *                       subject, and the return value is an array as
4667
   *                       well.
4668
   *                       </p>
4669
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4670
   *
4671
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4672
   */
4673 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4674
  {
4675 12
    return str_replace($search, $replace, $subject, $count);
4676
  }
4677
4678
  /**
4679
   * Replace the first "$search"-term with the "$replace"-term.
4680
   *
4681
   * @param string $search
4682
   * @param string $replace
4683
   * @param string $subject
4684
   *
4685
   * @return string
4686
   */
4687 1
  public static function str_replace_first($search, $replace, $subject)
4688
  {
4689 1
    $pos = self::strpos($subject, $search);
4690
4691 1
    if ($pos !== false) {
4692 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4693
    }
4694
4695 1
    return $subject;
4696
  }
4697
4698
  /**
4699
   * Shuffles all the characters in the string.
4700
   *
4701
   * @param string $str <p>The input string</p>
4702
   *
4703
   * @return string <p>The shuffled string.</p>
4704
   */
4705 1
  public static function str_shuffle($str)
4706
  {
4707 1
    $array = self::split($str);
4708
4709 1
    shuffle($array);
4710
4711 1
    return implode('', $array);
4712
  }
4713
4714
  /**
4715
   * Sort all characters according to code points.
4716
   *
4717
   * @param string $str    <p>A UTF-8 string.</p>
4718
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4719
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4720
   *
4721
   * @return string <p>String of sorted characters.</p>
4722
   */
4723 1
  public static function str_sort($str, $unique = false, $desc = false)
4724
  {
4725 1
    $array = self::codepoints($str);
4726
4727 1
    if ($unique) {
4728 1
      $array = array_flip(array_flip($array));
4729 1
    }
4730
4731 1
    if ($desc) {
4732 1
      arsort($array);
4733 1
    } else {
4734 1
      asort($array);
4735
    }
4736
4737 1
    return self::string($array);
4738
  }
4739
4740
  /**
4741
   * Split a string into an array.
4742
   *
4743
   * @param string $str
4744
   * @param int    $len
4745
   *
4746
   * @return array
4747
   */
4748 22
  public static function str_split($str, $len = 1)
4749
  {
4750 22
    $str = (string)$str;
4751
4752 22
    if (!isset($str[0])) {
4753 1
      return array();
4754
    }
4755
4756 21
    $len = (int)$len;
4757
4758 21
    if ($len < 1) {
4759
      return str_split($str, $len);
4760
    }
4761
4762
    /** @noinspection PhpInternalEntityUsedInspection */
4763 21
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4764 21
    $a = $a[0];
4765
4766 21
    if ($len === 1) {
4767 21
      return $a;
4768
    }
4769
4770 1
    $arrayOutput = array();
4771 1
    $p = -1;
4772
4773
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4774 1
    foreach ($a as $l => $a) {
4775 1
      if ($l % $len) {
4776 1
        $arrayOutput[$p] .= $a;
4777 1
      } else {
4778 1
        $arrayOutput[++$p] = $a;
4779
      }
4780 1
    }
4781
4782 1
    return $arrayOutput;
4783
  }
4784
4785
  /**
4786
   * Check if the string starts with the given substring.
4787
   *
4788
   * @param string $haystack <p>The string to search in.</p>
4789
   * @param string $needle   <p>The substring to search for.</p>
4790
   *
4791
   * @return bool
4792
   */
4793 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4794
  {
4795 2
    $haystack = (string)$haystack;
4796 2
    $needle = (string)$needle;
4797
4798 2
    if (!isset($haystack[0], $needle[0])) {
4799 1
      return false;
4800
    }
4801
4802 2
    if (self::strpos($haystack, $needle) === 0) {
4803 2
      return true;
4804
    }
4805
4806 2
    return false;
4807
  }
4808
4809
  /**
4810
   * Get a binary representation of a specific string.
4811
   *
4812
   * @param string $str <p>The input string.</p>
4813
   *
4814
   * @return string
4815
   */
4816 1
  public static function str_to_binary($str)
4817
  {
4818 1
    $str = (string)$str;
4819
4820 1
    $value = unpack('H*', $str);
4821
4822 1
    return base_convert($value[1], 16, 2);
4823
  }
4824
4825
  /**
4826
   * Convert a string into an array of words.
4827
   *
4828
   * @param string   $str
4829
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4830
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4831
   * @param null|int $removeShortValues
4832
   *
4833
   * @return array
4834
   */
4835 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4836
  {
4837 10
    $str = (string)$str;
4838
4839 10
    if ($removeShortValues !== null) {
4840 1
      $removeShortValues = (int)$removeShortValues;
4841 1
    }
4842
4843 10
    if (!isset($str[0])) {
4844 2
      if ($removeEmptyValues === true) {
4845
        return array();
4846
      }
4847
4848 2
      return array('');
4849
    }
4850
4851 10
    $charList = self::rxClass($charList, '\pL');
4852
4853 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4854
4855
    if (
4856
        $removeShortValues === null
4857 10
        &&
4858
        $removeEmptyValues === false
4859 10
    ) {
4860 10
      return $return;
4861
    }
4862
4863 1
    $tmpReturn = array();
4864 1
    foreach ($return as $returnValue) {
4865
      if (
4866
          $removeShortValues !== null
4867 1
          &&
4868 1
          self::strlen($returnValue) <= $removeShortValues
4869 1
      ) {
4870 1
        continue;
4871
      }
4872
4873
      if (
4874
          $removeEmptyValues === true
4875 1
          &&
4876 1
          trim($returnValue) === ''
4877 1
      ) {
4878 1
        continue;
4879
      }
4880
4881 1
      $tmpReturn[] = $returnValue;
4882 1
    }
4883
4884 1
    return $tmpReturn;
4885
  }
4886
4887
  /**
4888
   * alias for "UTF8::to_ascii()"
4889
   *
4890
   * @see UTF8::to_ascii()
4891
   *
4892
   * @param string $str
4893
   * @param string $unknown
4894
   * @param bool   $strict
4895
   *
4896
   * @return string
4897
   */
4898 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4899
  {
4900 7
    return self::to_ascii($str, $unknown, $strict);
4901
  }
4902
4903
  /**
4904
   * Counts number of words in the UTF-8 string.
4905
   *
4906
   * @param string $str      <p>The input string.</p>
4907
   * @param int    $format   [optional] <p>
4908
   *                         <strong>0</strong> => return a number of words (default)<br />
4909
   *                         <strong>1</strong> => return an array of words<br />
4910
   *                         <strong>2</strong> => return an array of words with word-offset as key
4911
   *                         </p>
4912
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4913
   *
4914
   * @return array|int <p>The number of words in the string</p>
4915
   */
4916 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4917
  {
4918 1
    $strParts = self::str_to_words($str, $charlist);
4919
4920 1
    $len = count($strParts);
4921
4922 1
    if ($format === 1) {
4923
4924 1
      $numberOfWords = array();
4925 1
      for ($i = 1; $i < $len; $i += 2) {
4926 1
        $numberOfWords[] = $strParts[$i];
4927 1
      }
4928
4929 1
    } elseif ($format === 2) {
4930
4931 1
      $numberOfWords = array();
4932 1
      $offset = self::strlen($strParts[0]);
4933 1
      for ($i = 1; $i < $len; $i += 2) {
4934 1
        $numberOfWords[$offset] = $strParts[$i];
4935 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4936 1
      }
4937
4938 1
    } else {
4939
4940 1
      $numberOfWords = ($len - 1) / 2;
4941
4942
    }
4943
4944 1
    return $numberOfWords;
4945
  }
4946
4947
  /**
4948
   * Case-insensitive string comparison.
4949
   *
4950
   * INFO: Case-insensitive version of UTF8::strcmp()
4951
   *
4952
   * @param string $str1
4953
   * @param string $str2
4954
   *
4955
   * @return int <p>
4956
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4957
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4958
   *             <strong>0</strong> if they are equal.
4959
   *             </p>
4960
   */
4961 11
  public static function strcasecmp($str1, $str2)
4962
  {
4963 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4964
  }
4965
4966
  /**
4967
   * alias for "UTF8::strstr()"
4968
   *
4969
   * @see UTF8::strstr()
4970
   *
4971
   * @param string  $haystack
4972
   * @param string  $needle
4973
   * @param bool    $before_needle
4974
   * @param string  $encoding
4975
   * @param boolean $cleanUtf8
4976
   *
4977
   * @return string|false
4978
   */
4979 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4980
  {
4981 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4982
  }
4983
4984
  /**
4985
   * Case-sensitive string comparison.
4986
   *
4987
   * @param string $str1
4988
   * @param string $str2
4989
   *
4990
   * @return int  <p>
4991
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4992
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4993
   *              <strong>0</strong> if they are equal.
4994
   *              </p>
4995
   */
4996 14
  public static function strcmp($str1, $str2)
4997
  {
4998
    /** @noinspection PhpUndefinedClassInspection */
4999 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5000 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5001 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
5002 14
    );
5003
  }
5004
5005
  /**
5006
   * Find length of initial segment not matching mask.
5007
   *
5008
   * @param string $str
5009
   * @param string $charList
5010
   * @param int    $offset
5011
   * @param int    $length
5012
   *
5013
   * @return int|null
5014
   */
5015 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5016
  {
5017 15
    if ('' === $charList .= '') {
5018 1
      return null;
5019
    }
5020
5021 14 View Code Duplication
    if ($offset || $length !== null) {
5022 2
      $strTmp = self::substr($str, $offset, $length);
5023 2
      if ($strTmp === false) {
5024
        return null;
5025
      }
5026 2
      $str = (string)$strTmp;
5027 2
    }
5028
5029 14
    $str = (string)$str;
5030 14
    if (!isset($str[0])) {
5031 1
      return null;
5032
    }
5033
5034 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5035
      /** @noinspection OffsetOperationsInspection */
5036 13
      return self::strlen($length[1]);
5037
    }
5038
5039 1
    return self::strlen($str);
5040
  }
5041
5042
  /**
5043
   * alias for "UTF8::stristr()"
5044
   *
5045
   * @see UTF8::stristr()
5046
   *
5047
   * @param string  $haystack
5048
   * @param string  $needle
5049
   * @param bool    $before_needle
5050
   * @param string  $encoding
5051
   * @param boolean $cleanUtf8
5052
   *
5053
   * @return string|false
5054
   */
5055 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5056
  {
5057 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5058
  }
5059
5060
  /**
5061
   * Create a UTF-8 string from code points.
5062
   *
5063
   * INFO: opposite to UTF8::codepoints()
5064
   *
5065
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5066
   *
5067
   * @return string <p>UTF-8 encoded string.</p>
5068
   */
5069 2
  public static function string(array $array)
5070
  {
5071 2
    return implode(
5072 2
        '',
5073 2
        array_map(
5074
            array(
5075 2
                '\\voku\\helper\\UTF8',
5076 2
                'chr',
5077 2
            ),
5078
            $array
5079 2
        )
5080 2
    );
5081
  }
5082
5083
  /**
5084
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5085
   *
5086
   * @param string $str <p>The input string.</p>
5087
   *
5088
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5089
   */
5090 3
  public static function string_has_bom($str)
5091
  {
5092 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5093 3
      if (0 === strpos($str, $bomString)) {
5094 3
        return true;
5095
      }
5096 3
    }
5097
5098 3
    return false;
5099
  }
5100
5101
  /**
5102
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5103
   *
5104
   * @link http://php.net/manual/en/function.strip-tags.php
5105
   *
5106
   * @param string  $str            <p>
5107
   *                                The input string.
5108
   *                                </p>
5109
   * @param string  $allowable_tags [optional] <p>
5110
   *                                You can use the optional second parameter to specify tags which should
5111
   *                                not be stripped.
5112
   *                                </p>
5113
   *                                <p>
5114
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5115
   *                                can not be changed with allowable_tags.
5116
   *                                </p>
5117
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5118
   *
5119
   * @return string <p>The stripped string.</p>
5120
   */
5121 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5122
  {
5123 2
    $str = (string)$str;
5124
5125 2
    if (!isset($str[0])) {
5126 1
      return '';
5127
    }
5128
5129 2
    if ($cleanUtf8 === true) {
5130 1
      $str = self::clean($str);
5131 1
    }
5132
5133 2
    return strip_tags($str, $allowable_tags);
5134
  }
5135
5136
  /**
5137
   * Finds position of first occurrence of a string within another, case insensitive.
5138
   *
5139
   * @link http://php.net/manual/en/function.mb-stripos.php
5140
   *
5141
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5142
   * @param string  $needle    <p>The string to find in haystack.</p>
5143
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5144
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5145
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5146
   *
5147
   * @return int|false <p>
5148
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
5149
   *                   or false if needle is not found.
5150
   *                   </p>
5151
   */
5152 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5153
  {
5154 10
    $haystack = (string)$haystack;
5155 10
    $needle = (string)$needle;
5156 10
    $offset = (int)$offset;
5157
5158 10
    if (!isset($haystack[0], $needle[0])) {
5159 3
      return false;
5160
    }
5161
5162 9
    if ($cleanUtf8 === true) {
5163
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5164
      // if invalid characters are found in $haystack before $needle
5165 1
      $haystack = self::clean($haystack);
5166 1
      $needle = self::clean($needle);
5167 1
    }
5168
5169 View Code Duplication
    if (
5170
        $encoding === 'UTF-8'
5171 9
        ||
5172 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5173 9
    ) {
5174 9
      $encoding = 'UTF-8';
5175 9
    } else {
5176 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5177
    }
5178
5179 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5180
      self::checkForSupport();
5181
    }
5182
5183
    if (
5184
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5185 9
        &&
5186 9
        self::$SUPPORT['intl'] === true
5187 9
        &&
5188 9
        Bootup::is_php('5.4') === true
5189 9
    ) {
5190 9
      return \grapheme_stripos($haystack, $needle, $offset);
5191
    }
5192
5193
    // fallback to "mb_"-function via polyfill
5194 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5195
  }
5196
5197
  /**
5198
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5199
   *
5200
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5201
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5202
   * @param bool    $before_needle [optional] <p>
5203
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5204
   *                               haystack before the first occurrence of the needle (excluding the needle).
5205
   *                               </p>
5206
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5207
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5208
   *
5209
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5210
   */
5211 17 View Code Duplication
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5212
  {
5213 17
    $haystack = (string)$haystack;
5214 17
    $needle = (string)$needle;
5215 17
    $before_needle = (bool)$before_needle;
5216
5217 17
    if (!isset($haystack[0], $needle[0])) {
5218 6
      return false;
5219
    }
5220
5221 11
    if ($encoding !== 'UTF-8') {
5222 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5223 1
    }
5224
5225 11
    if ($cleanUtf8 === true) {
5226
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5227
      // if invalid characters are found in $haystack before $needle
5228 1
      $needle = self::clean($needle);
5229 1
      $haystack = self::clean($haystack);
5230 1
    }
5231
5232 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5233
      self::checkForSupport();
5234
    }
5235
5236
    if (
5237
        $encoding !== 'UTF-8'
5238 11
        &&
5239 1
        self::$SUPPORT['mbstring'] === false
5240 11
    ) {
5241
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5242
    }
5243
5244 11
    if (self::$SUPPORT['mbstring'] === true) {
5245 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5246
    }
5247
5248
    if (
5249
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5250
        &&
5251
        self::$SUPPORT['intl'] === true
5252
        &&
5253
        Bootup::is_php('5.4') === true
5254
    ) {
5255
      return \grapheme_stristr($haystack, $needle, $before_needle);
5256
    }
5257
5258
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5259
5260
    if (!isset($match[1])) {
5261
      return false;
5262
    }
5263
5264
    if ($before_needle) {
5265
      return $match[1];
5266
    }
5267
5268
    return self::substr($haystack, self::strlen($match[1]));
5269
  }
5270
5271
  /**
5272
   * Get the string length, not the byte-length!
5273
   *
5274
   * @link     http://php.net/manual/en/function.mb-strlen.php
5275
   *
5276
   * @param string  $str       <p>The string being checked for length.</p>
5277
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5278
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5279
   *
5280
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5281
   *             character counted as +1)</p>
5282
   */
5283 89
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5284
  {
5285 89
    $str = (string)$str;
5286
5287 89
    if (!isset($str[0])) {
5288 5
      return 0;
5289
    }
5290
5291 View Code Duplication
    if (
5292
        $encoding === 'UTF-8'
5293 88
        ||
5294 13
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5295 88
    ) {
5296 79
      $encoding = 'UTF-8';
5297 79
    } else {
5298 12
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5299
    }
5300
5301
    switch ($encoding) {
5302 88
      case 'ASCII':
5303 88
      case 'CP850':
5304
        if (
5305
            $encoding === 'CP850'
5306 10
            &&
5307 10
            self::$SUPPORT['mbstring_func_overload'] === false
5308 10
        ) {
5309 10
          return strlen($str);
5310
        }
5311
5312
        return \mb_strlen($str, '8BIT');
5313
    }
5314
5315 80
    if ($cleanUtf8 === true) {
5316
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5317
      // if invalid characters are found in $str
5318 2
      $str = self::clean($str);
5319 2
    }
5320
5321 80
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5322
      self::checkForSupport();
5323
    }
5324
5325 View Code Duplication
    if (
5326
        $encoding !== 'UTF-8'
5327 80
        &&
5328 2
        self::$SUPPORT['mbstring'] === false
5329 80
        &&
5330
        self::$SUPPORT['iconv'] === false
5331 80
    ) {
5332
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5333
    }
5334
5335
    if (
5336
        $encoding !== 'UTF-8'
5337 80
        &&
5338 2
        self::$SUPPORT['iconv'] === true
5339 80
        &&
5340 2
        self::$SUPPORT['mbstring'] === false
5341 80
    ) {
5342
      return \iconv_strlen($str, $encoding);
5343
    }
5344
5345 80
    if (self::$SUPPORT['mbstring'] === true) {
5346 80
      return \mb_strlen($str, $encoding);
5347
    }
5348
5349
    if (self::$SUPPORT['iconv'] === true) {
5350
      return \iconv_strlen($str, $encoding);
5351
    }
5352
5353
    if (
5354
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5355
        &&
5356
        self::$SUPPORT['intl'] === true
5357
        &&
5358
        Bootup::is_php('5.4') === true
5359
    ) {
5360
      return \grapheme_strlen($str);
5361
    }
5362
5363
    // fallback via vanilla php
5364
    preg_match_all('/./us', $str, $parts);
5365
    $returnTmp = count($parts[0]);
5366
    if ($returnTmp !== 0) {
5367
      return $returnTmp;
5368
    }
5369
5370
    // fallback to "mb_"-function via polyfill
5371
    return \mb_strlen($str, $encoding);
5372
  }
5373
5374
  /**
5375
   * Case insensitive string comparisons using a "natural order" algorithm.
5376
   *
5377
   * INFO: natural order version of UTF8::strcasecmp()
5378
   *
5379
   * @param string $str1 <p>The first string.</p>
5380
   * @param string $str2 <p>The second string.</p>
5381
   *
5382
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5383
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5384
   *             <strong>0</strong> if they are equal
5385
   */
5386 1
  public static function strnatcasecmp($str1, $str2)
5387
  {
5388 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5389
  }
5390
5391
  /**
5392
   * String comparisons using a "natural order" algorithm
5393
   *
5394
   * INFO: natural order version of UTF8::strcmp()
5395
   *
5396
   * @link  http://php.net/manual/en/function.strnatcmp.php
5397
   *
5398
   * @param string $str1 <p>The first string.</p>
5399
   * @param string $str2 <p>The second string.</p>
5400
   *
5401
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5402
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5403
   *             <strong>0</strong> if they are equal
5404
   */
5405 2
  public static function strnatcmp($str1, $str2)
5406
  {
5407 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5408
  }
5409
5410
  /**
5411
   * Case-insensitive string comparison of the first n characters.
5412
   *
5413
   * @link  http://php.net/manual/en/function.strncasecmp.php
5414
   *
5415
   * @param string $str1 <p>The first string.</p>
5416
   * @param string $str2 <p>The second string.</p>
5417
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5418
   *
5419
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5420
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5421
   *             <strong>0</strong> if they are equal
5422
   */
5423 1
  public static function strncasecmp($str1, $str2, $len)
5424
  {
5425 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5426
  }
5427
5428
  /**
5429
   * String comparison of the first n characters.
5430
   *
5431
   * @link  http://php.net/manual/en/function.strncmp.php
5432
   *
5433
   * @param string $str1 <p>The first string.</p>
5434
   * @param string $str2 <p>The second string.</p>
5435
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5436
   *
5437
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5438
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5439
   *             <strong>0</strong> if they are equal
5440
   */
5441 2
  public static function strncmp($str1, $str2, $len)
5442
  {
5443 2
    $str1 = (string)self::substr($str1, 0, $len);
5444 2
    $str2 = (string)self::substr($str2, 0, $len);
5445
5446 2
    return self::strcmp($str1, $str2);
5447
  }
5448
5449
  /**
5450
   * Search a string for any of a set of characters.
5451
   *
5452
   * @link  http://php.net/manual/en/function.strpbrk.php
5453
   *
5454
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5455
   * @param string $char_list <p>This parameter is case sensitive.</p>
5456
   *
5457
   * @return string String starting from the character found, or false if it is not found.
5458
   */
5459 1
  public static function strpbrk($haystack, $char_list)
5460
  {
5461 1
    $haystack = (string)$haystack;
5462 1
    $char_list = (string)$char_list;
5463
5464 1
    if (!isset($haystack[0], $char_list[0])) {
5465 1
      return false;
5466
    }
5467
5468 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5469 1
      return substr($haystack, strpos($haystack, $m[0]));
5470
    }
5471
5472 1
    return false;
5473
  }
5474
5475
  /**
5476
   * Find position of first occurrence of string in a string.
5477
   *
5478
   * @link http://php.net/manual/en/function.mb-strpos.php
5479
   *
5480
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5481
   * @param string  $needle    <p>The string to find in haystack.</p>
5482
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5483
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5484
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5485
   *
5486
   * @return int|false <p>
5487
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5488
   *                   If needle is not found it returns false.
5489
   *                   </p>
5490
   */
5491 58
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5492
  {
5493 58
    $haystack = (string)$haystack;
5494 58
    $needle = (string)$needle;
5495
5496 58
    if (!isset($haystack[0], $needle[0])) {
5497 3
      return false;
5498
    }
5499
5500
    // init
5501 57
    $offset = (int)$offset;
5502
5503
    // iconv and mbstring do not support integer $needle
5504
5505 57
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5506
      $needle = (string)self::chr($needle);
5507
    }
5508
5509 57
    if ($cleanUtf8 === true) {
5510
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5511
      // if invalid characters are found in $haystack before $needle
5512 2
      $needle = self::clean($needle);
5513 2
      $haystack = self::clean($haystack);
5514 2
    }
5515
5516 View Code Duplication
    if (
5517
        $encoding === 'UTF-8'
5518 57
        ||
5519 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5520 57
    ) {
5521 17
      $encoding = 'UTF-8';
5522 17
    } else {
5523 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5524
    }
5525
5526 57
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5527
      self::checkForSupport();
5528
    }
5529
5530
    if (
5531
        $encoding === 'CP850'
5532 57
        &&
5533 41
        self::$SUPPORT['mbstring_func_overload'] === false
5534 57
    ) {
5535 41
      return strpos($haystack, $needle, $offset);
5536
    }
5537
5538 View Code Duplication
    if (
5539
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5540 17
        &
5541 17
        self::$SUPPORT['iconv'] === true
5542 17
        &&
5543 1
        self::$SUPPORT['mbstring'] === false
5544 17
    ) {
5545
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5546
    }
5547
5548
    if (
5549
        $offset >= 0 // iconv_strpos() can't handle negative offset
5550 17
        &&
5551
        $encoding !== 'UTF-8'
5552 17
        &&
5553 1
        self::$SUPPORT['mbstring'] === false
5554 17
        &&
5555
        self::$SUPPORT['iconv'] === true
5556 17
    ) {
5557
      // ignore invalid negative offset to keep compatibility
5558
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5559
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5560
    }
5561
5562 17
    if (self::$SUPPORT['mbstring'] === true) {
5563 17
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5564
    }
5565
5566
    if (
5567
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5568
        &&
5569
        self::$SUPPORT['intl'] === true
5570
        &&
5571
        Bootup::is_php('5.4') === true
5572
    ) {
5573
      return \grapheme_strpos($haystack, $needle, $offset);
5574
    }
5575
5576
    if (
5577
        $offset >= 0 // iconv_strpos() can't handle negative offset
5578
        &&
5579
        self::$SUPPORT['iconv'] === true
5580
    ) {
5581
      // ignore invalid negative offset to keep compatibility
5582
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5583
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5584
    }
5585
5586
    // fallback via vanilla php
5587
5588
    $haystackTmp = self::substr($haystack, $offset);
5589
    if ($haystackTmp === false) {
5590
      $haystackTmp = '';
5591
    }
5592
    $haystack = (string)$haystackTmp;
5593
5594
    if ($offset < 0) {
5595
      $offset = 0;
5596
    }
5597
5598
    $pos = strpos($haystack, $needle);
5599
    if ($pos === false) {
5600
      return false;
5601
    }
5602
5603
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5604
    if ($returnTmp !== false) {
5605
      return $returnTmp;
5606
    }
5607
5608
    // fallback to "mb_"-function via polyfill
5609
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5610
  }
5611
5612
  /**
5613
   * Finds the last occurrence of a character in a string within another.
5614
   *
5615
   * @link http://php.net/manual/en/function.mb-strrchr.php
5616
   *
5617
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5618
   * @param string $needle        <p>The string to find in haystack</p>
5619
   * @param bool   $before_needle [optional] <p>
5620
   *                              Determines which portion of haystack
5621
   *                              this function returns.
5622
   *                              If set to true, it returns all of haystack
5623
   *                              from the beginning to the last occurrence of needle.
5624
   *                              If set to false, it returns all of haystack
5625
   *                              from the last occurrence of needle to the end,
5626
   *                              </p>
5627
   * @param string $encoding      [optional] <p>
5628
   *                              Character encoding name to use.
5629
   *                              If it is omitted, internal character encoding is used.
5630
   *                              </p>
5631
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5632
   *
5633
   * @return string|false The portion of haystack or false if needle is not found.
5634
   */
5635 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5636
  {
5637 1
    if ($encoding !== 'UTF-8') {
5638 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5639 1
    }
5640
5641 1
    if ($cleanUtf8 === true) {
5642
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5643
      // if invalid characters are found in $haystack before $needle
5644
      $needle = self::clean($needle);
5645
      $haystack = self::clean($haystack);
5646
    }
5647
5648
    // fallback to "mb_"-function via polyfill
5649 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5650
  }
5651
5652
  /**
5653
   * Reverses characters order in the string.
5654
   *
5655
   * @param string $str The input string
5656
   *
5657
   * @return string The string with characters in the reverse sequence
5658
   */
5659 4
  public static function strrev($str)
5660
  {
5661 4
    $str = (string)$str;
5662
5663 4
    if (!isset($str[0])) {
5664 2
      return '';
5665
    }
5666
5667 3
    return implode('', array_reverse(self::split($str)));
5668
  }
5669
5670
  /**
5671
   * Finds the last occurrence of a character in a string within another, case insensitive.
5672
   *
5673
   * @link http://php.net/manual/en/function.mb-strrichr.php
5674
   *
5675
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5676
   * @param string  $needle        <p>The string to find in haystack.</p>
5677
   * @param bool    $before_needle [optional] <p>
5678
   *                               Determines which portion of haystack
5679
   *                               this function returns.
5680
   *                               If set to true, it returns all of haystack
5681
   *                               from the beginning to the last occurrence of needle.
5682
   *                               If set to false, it returns all of haystack
5683
   *                               from the last occurrence of needle to the end,
5684
   *                               </p>
5685
   * @param string  $encoding      [optional] <p>
5686
   *                               Character encoding name to use.
5687
   *                               If it is omitted, internal character encoding is used.
5688
   *                               </p>
5689
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5690
   *
5691
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5692
   */
5693 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5694
  {
5695 1
    if ($encoding !== 'UTF-8') {
5696 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5697 1
    }
5698
5699 1
    if ($cleanUtf8 === true) {
5700
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5701
      // if invalid characters are found in $haystack before $needle
5702
      $needle = self::clean($needle);
5703
      $haystack = self::clean($haystack);
5704
    }
5705
5706 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5707
  }
5708
5709
  /**
5710
   * Find position of last occurrence of a case-insensitive string.
5711
   *
5712
   * @param string  $haystack  <p>The string to look in.</p>
5713
   * @param string  $needle    <p>The string to look for.</p>
5714
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5715
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5716
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5717
   *
5718
   * @return int|false <p>
5719
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5720
   *                   not found, it returns false.
5721
   *                   </p>
5722
   */
5723 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5724
  {
5725 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5726
      $needle = (string)self::chr($needle);
5727
    }
5728
5729
    // init
5730 1
    $haystack = (string)$haystack;
5731 1
    $needle = (string)$needle;
5732 1
    $offset = (int)$offset;
5733
5734 1
    if (!isset($haystack[0], $needle[0])) {
5735
      return false;
5736
    }
5737
5738 View Code Duplication
    if (
5739
        $cleanUtf8 === true
5740 1
        ||
5741
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5742 1
    ) {
5743
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5744
5745
      $needle = self::clean($needle);
5746
      $haystack = self::clean($haystack);
5747
    }
5748
5749 View Code Duplication
    if (
5750
        $encoding === 'UTF-8'
5751 1
        ||
5752
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5753 1
    ) {
5754 1
      $encoding = 'UTF-8';
5755 1
    } else {
5756
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5757
    }
5758
5759 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5760
      self::checkForSupport();
5761
    }
5762
5763
    if (
5764
        $encoding !== 'UTF-8'
5765 1
        &&
5766
        self::$SUPPORT['mbstring'] === false
5767 1
    ) {
5768
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5769
    }
5770
5771 1
    if (self::$SUPPORT['mbstring'] === true) {
5772 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5773
    }
5774
5775
    if (
5776
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5777
        &&
5778
        self::$SUPPORT['intl'] === true
5779
        &&
5780
        Bootup::is_php('5.4') === true
5781
    ) {
5782
      return \grapheme_strripos($haystack, $needle, $offset);
5783
    }
5784
5785
    // fallback via vanilla php
5786
5787
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5788
  }
5789
5790
  /**
5791
   * Find position of last occurrence of a string in a string.
5792
   *
5793
   * @link http://php.net/manual/en/function.mb-strrpos.php
5794
   *
5795
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5796
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5797
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5798
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5799
   *                              the end of the string.
5800
   *                              </p>
5801
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5802
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5803
   *
5804
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5805
   *                   is not found, it returns false.</p>
5806
   */
5807 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5808
  {
5809 10
    if ((int)$needle === $needle && $needle >= 0) {
5810 2
      $needle = (string)self::chr($needle);
5811 2
    }
5812
5813
    // init
5814 10
    $haystack = (string)$haystack;
5815 10
    $needle = (string)$needle;
5816 10
    $offset = (int)$offset;
5817
5818 10
    if (!isset($haystack[0], $needle[0])) {
5819 2
      return false;
5820
    }
5821
5822 View Code Duplication
    if (
5823
        $cleanUtf8 === true
5824 9
        ||
5825
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5826 9
    ) {
5827
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5828 3
      $needle = self::clean($needle);
5829 3
      $haystack = self::clean($haystack);
5830 3
    }
5831
5832 View Code Duplication
    if (
5833
        $encoding === 'UTF-8'
5834 9
        ||
5835 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5836 9
    ) {
5837 9
      $encoding = 'UTF-8';
5838 9
    } else {
5839 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5840
    }
5841
5842 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5843
      self::checkForSupport();
5844
    }
5845
5846
    if (
5847
        $encoding !== 'UTF-8'
5848 9
        &&
5849 1
        self::$SUPPORT['mbstring'] === false
5850 9
    ) {
5851
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5852
    }
5853
5854 9
    if (self::$SUPPORT['mbstring'] === true) {
5855 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5856
    }
5857
5858
    if (
5859
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5860
        &&
5861
        self::$SUPPORT['intl'] === true
5862
        &&
5863
        Bootup::is_php('5.4') === true
5864
    ) {
5865
      return \grapheme_strrpos($haystack, $needle, $offset);
5866
    }
5867
5868
    // fallback via vanilla php
5869
5870
    $haystackTmp = null;
5871
    if ($offset > 0) {
5872
      $haystackTmp = self::substr($haystack, $offset);
5873
    } elseif ($offset < 0) {
5874
      $haystackTmp = self::substr($haystack, 0, $offset);
5875
      $offset = 0;
5876
    }
5877
5878
    if ($haystackTmp !== null) {
5879
      if ($haystackTmp === false) {
5880
        $haystackTmp = '';
5881
      }
5882
      $haystack = (string)$haystackTmp;
5883
    }
5884
5885
    $pos = strrpos($haystack, $needle);
5886
    if ($pos === false) {
5887
      return false;
5888
    }
5889
5890
    return $offset + self::strlen(substr($haystack, 0, $pos));
5891
  }
5892
5893
  /**
5894
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5895
   * mask.
5896
   *
5897
   * @param string $str    <p>The input string.</p>
5898
   * @param string $mask   <p>The mask of chars</p>
5899
   * @param int    $offset [optional]
5900
   * @param int    $length [optional]
5901
   *
5902
   * @return int
5903
   */
5904 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
5905
  {
5906 10 View Code Duplication
    if ($offset || $length !== null) {
5907 2
      $strTmp = self::substr($str, $offset, $length);
5908 2
      if ($strTmp === false) {
5909
        $strTmp = '';
5910
      }
5911 2
      $str = (string)$strTmp;
5912 2
    }
5913
5914 10
    $str = (string)$str;
5915 10
    if (!isset($str[0], $mask[0])) {
5916 2
      return 0;
5917
    }
5918
5919 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5920
  }
5921
5922
  /**
5923
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5924
   *
5925
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5926
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5927
   * @param bool    $before_needle [optional] <p>
5928
   *                               If <b>TRUE</b>, strstr() returns the part of the
5929
   *                               haystack before the first occurrence of the needle (excluding the needle).
5930
   *                               </p>
5931
   * @param string  $encoding      [optional] <p>Set the charset.</p>
5932
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5933
   *
5934
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5935
   */
5936 2 View Code Duplication
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5937
  {
5938 2
    $haystack = (string)$haystack;
5939 2
    $needle = (string)$needle;
5940
5941 2
    if (!isset($haystack[0], $needle[0])) {
5942 1
      return false;
5943
    }
5944
5945 2
    if ($cleanUtf8 === true) {
5946
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5947
      // if invalid characters are found in $haystack before $needle
5948
      $needle = self::clean($needle);
5949
      $haystack = self::clean($haystack);
5950
    }
5951
5952 2
    if ($encoding !== 'UTF-8') {
5953 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5954 1
    }
5955
5956 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5957
      self::checkForSupport();
5958
    }
5959
5960
    if (
5961
        $encoding !== 'UTF-8'
5962 2
        &&
5963 1
        self::$SUPPORT['mbstring'] === false
5964 2
    ) {
5965
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5966
    }
5967
5968 2
    if (self::$SUPPORT['mbstring'] === true) {
5969 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5970
    }
5971
5972
    if (
5973
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5974
        &&
5975
        self::$SUPPORT['intl'] === true
5976
        &&
5977
        Bootup::is_php('5.4') === true
5978
    ) {
5979
      return \grapheme_strstr($haystack, $needle, $before_needle);
5980
    }
5981
5982
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5983
5984
    if (!isset($match[1])) {
5985
      return false;
5986
    }
5987
5988
    if ($before_needle) {
5989
      return $match[1];
5990
    }
5991
5992
    return self::substr($haystack, self::strlen($match[1]));
5993
  }
5994
5995
  /**
5996
   * Unicode transformation for case-less matching.
5997
   *
5998
   * @link http://unicode.org/reports/tr21/tr21-5.html
5999
   *
6000
   * @param string  $str       <p>The input string.</p>
6001
   * @param bool    $full      [optional] <p>
6002
   *                           <b>true</b>, replace full case folding chars (default)<br />
6003
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6004
   *                           </p>
6005
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6006
   *
6007
   * @return string
6008
   */
6009 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6010
  {
6011
    // init
6012 13
    $str = (string)$str;
6013
6014 13
    if (!isset($str[0])) {
6015 4
      return '';
6016
    }
6017
6018 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6019 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6020
6021 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6022 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6023 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6024 1
    }
6025
6026 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6027
6028 12
    if ($full) {
6029
6030 12
      static $FULL_CASE_FOLD = null;
6031
6032 12
      if ($FULL_CASE_FOLD === null) {
6033 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6034 1
      }
6035
6036
      /** @noinspection OffsetOperationsInspection */
6037 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6038 12
    }
6039
6040 12
    if ($cleanUtf8 === true) {
6041 1
      $str = self::clean($str);
6042 1
    }
6043
6044 12
    return self::strtolower($str);
6045
  }
6046
6047
  /**
6048
   * Make a string lowercase.
6049
   *
6050
   * @link http://php.net/manual/en/function.mb-strtolower.php
6051
   *
6052
   * @param string      $str       <p>The string being lowercased.</p>
6053
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6054
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6055
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6056
   *
6057
   * @return string str with all alphabetic characters converted to lowercase.
6058
   */
6059 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6060
  {
6061
    // init
6062 25
    $str = (string)$str;
6063
6064 25
    if (!isset($str[0])) {
6065 3
      return '';
6066
    }
6067
6068 23
    if ($cleanUtf8 === true) {
6069
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6070
      // if invalid characters are found in $haystack before $needle
6071 1
      $str = self::clean($str);
6072 1
    }
6073
6074 23
    if ($encoding !== 'UTF-8') {
6075 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6076 2
    }
6077
6078 23
    if ($lang !== null) {
6079
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6080
        self::checkForSupport();
6081
      }
6082
6083
      if (
6084
          self::$SUPPORT['intl'] === true
6085
          &&
6086
          Bootup::is_php('5.4') === true
6087
      ) {
6088
6089
        $langCode = $lang . '-Lower';
6090
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6091
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6092
6093
          $langCode = 'Any-Lower';
6094
        }
6095
6096
        return transliterator_transliterate($langCode, $str);
6097
      }
6098
6099
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6100
    }
6101
6102 23
    return \mb_strtolower($str, $encoding);
6103
  }
6104
6105
  /**
6106
   * Generic case sensitive transformation for collation matching.
6107
   *
6108
   * @param string $str <p>The input string</p>
6109
   *
6110
   * @return string
6111
   */
6112 3
  private static function strtonatfold($str)
6113
  {
6114
    /** @noinspection PhpUndefinedClassInspection */
6115 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6116
  }
6117
6118
  /**
6119
   * Make a string uppercase.
6120
   *
6121
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6122
   *
6123
   * @param string      $str       <p>The string being uppercased.</p>
6124
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6125
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6126
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6127
   *
6128
   * @return string str with all alphabetic characters converted to uppercase.
6129
   */
6130 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6131
  {
6132 19
    $str = (string)$str;
6133
6134 19
    if (!isset($str[0])) {
6135 3
      return '';
6136
    }
6137
6138 17
    if ($cleanUtf8 === true) {
6139
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6140
      // if invalid characters are found in $haystack before $needle
6141 1
      $str = self::clean($str);
6142 1
    }
6143
6144 17
    if ($encoding !== 'UTF-8') {
6145 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6146 2
    }
6147
6148 17
    if ($lang !== null) {
6149
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6150
        self::checkForSupport();
6151
      }
6152
6153
      if (
6154
          self::$SUPPORT['intl'] === true
6155
          &&
6156
          Bootup::is_php('5.4') === true
6157
      ) {
6158
6159
        $langCode = $lang . '-Upper';
6160
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6161
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6162
6163
          $langCode = 'Any-Upper';
6164
        }
6165
6166
        return transliterator_transliterate($langCode, $str);
6167
      }
6168
6169
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6170
    }
6171
6172 17
    return \mb_strtoupper($str, $encoding);
6173
  }
6174
6175
  /**
6176
   * Translate characters or replace sub-strings.
6177
   *
6178
   * @link  http://php.net/manual/en/function.strtr.php
6179
   *
6180
   * @param string          $str  <p>The string being translated.</p>
6181
   * @param string|string[] $from <p>The string replacing from.</p>
6182
   * @param string|string[] $to   <p>The string being translated to to.</p>
6183
   *
6184
   * @return string <p>
6185
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6186
   *                corresponding character in to.
6187
   *                </p>
6188
   */
6189 1
  public static function strtr($str, $from, $to = INF)
6190
  {
6191 1
    $str = (string)$str;
6192
6193 1
    if (!isset($str[0])) {
6194
      return '';
6195
    }
6196
6197 1
    if ($from === $to) {
6198
      return $str;
6199
    }
6200
6201 1
    if (INF !== $to) {
6202 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6202 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6203 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6203 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6204 1
      $countFrom = count($from);
6205 1
      $countTo = count($to);
6206
6207 1
      if ($countFrom > $countTo) {
6208 1
        $from = array_slice($from, 0, $countTo);
6209 1
      } elseif ($countFrom < $countTo) {
6210 1
        $to = array_slice($to, 0, $countFrom);
6211 1
      }
6212
6213 1
      $from = array_combine($from, $to);
6214 1
    }
6215
6216 1
    if (is_string($from)) {
6217 1
      return str_replace($from, '', $str);
6218
    }
6219
6220 1
    return strtr($str, $from);
6221
  }
6222
6223
  /**
6224
   * Return the width of a string.
6225
   *
6226
   * @param string  $str       <p>The input string.</p>
6227
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6228
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6229
   *
6230
   * @return int
6231
   */
6232 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6233
  {
6234 1
    if ($encoding !== 'UTF-8') {
6235 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6236 1
    }
6237
6238 1
    if ($cleanUtf8 === true) {
6239
      // iconv and mbstring are not tolerant to invalid encoding
6240
      // further, their behaviour is inconsistent with that of PHP's substr
6241 1
      $str = self::clean($str);
6242 1
    }
6243
6244
    // fallback to "mb_"-function via polyfill
6245 1
    return \mb_strwidth($str, $encoding);
6246
  }
6247
6248
  /**
6249
   * Changes all keys in an array.
6250
   *
6251
   * @param array $array <p>The array to work on</p>
6252
   * @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br />
6253
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6254
   *
6255
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6256
   *                     input is not an array.</p>
6257
   */
6258 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
6259
  {
6260 1
    if (!is_array($array)) {
6261
      return false;
6262
    }
6263
6264
    if (
6265
        $case !== CASE_LOWER
6266 1
        &&
6267
        $case !== CASE_UPPER
6268 1
    ) {
6269
      $case = CASE_UPPER;
6270
    }
6271
6272 1
    $return = array();
6273 1
    foreach ($array as $key => $value) {
6274 1
      if ($case  === CASE_LOWER) {
6275 1
        $key = self::strtolower($key);
6276 1
      } else {
6277 1
        $key = self::strtoupper($key);
6278
      }
6279
6280 1
      $return[$key] = $value;
6281 1
    }
6282
6283 1
    return $return;
6284
  }
6285
6286
  /**
6287
   * Get part of a string.
6288
   *
6289
   * @link http://php.net/manual/en/function.mb-substr.php
6290
   *
6291
   * @param string  $str       <p>The string being checked.</p>
6292
   * @param int     $offset    <p>The first position used in str.</p>
6293
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6294
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6295
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6296
   *
6297
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6298
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6299
   *                      characters long, <b>FALSE</b> will be returned.</p>
6300
   */
6301 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6302
  {
6303
    // init
6304 76
    $str = (string)$str;
6305
6306 76
    if (!isset($str[0])) {
6307 10
      return '';
6308
    }
6309
6310 74
    if ($cleanUtf8 === true) {
6311
      // iconv and mbstring are not tolerant to invalid encoding
6312
      // further, their behaviour is inconsistent with that of PHP's substr
6313 1
      $str = self::clean($str);
6314 1
    }
6315
6316 74
    $str_length = 0;
6317 74
    if ($offset || $length === null) {
6318 50
      $str_length = (int)self::strlen($str, $encoding);
6319 50
    }
6320
6321 74
    if ($offset && $offset > $str_length) {
6322 2
      return false;
6323
    }
6324
6325 72
    if ($length === null) {
6326 35
      $length = $str_length;
6327 35
    } else {
6328 60
      $length = (int)$length;
6329
    }
6330
6331 View Code Duplication
    if (
6332
        $encoding === 'UTF-8'
6333 72
        ||
6334 23
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6335 72
    ) {
6336 51
      $encoding = 'UTF-8';
6337 51
    } else {
6338 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6339
    }
6340
6341 72
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6342
      self::checkForSupport();
6343
    }
6344
6345
    if (
6346
        $encoding === 'CP850'
6347 72
        &&
6348 21
        self::$SUPPORT['mbstring_func_overload'] === false
6349 72
    ) {
6350 21
      return substr($str, $offset, $length === null ? $str_length : $length);
6351
    }
6352
6353
    if (
6354
        $encoding !== 'UTF-8'
6355 51
        &&
6356 1
        self::$SUPPORT['mbstring'] === false
6357 51
    ) {
6358
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6359
    }
6360
6361 51
    if (self::$SUPPORT['mbstring'] === true) {
6362 51
      return \mb_substr($str, $offset, $length, $encoding);
6363
    }
6364
6365
    if (
6366
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6367
        &&
6368
        self::$SUPPORT['intl'] === true
6369
        &&
6370
        Bootup::is_php('5.4') === true
6371
    ) {
6372
      return \grapheme_substr($str, $offset, $length);
6373
    }
6374
6375
    if (
6376
        $length >= 0 // "iconv_substr()" can't handle negative length
6377
        &&
6378
        self::$SUPPORT['iconv'] === true
6379
    ) {
6380
      return \iconv_substr($str, $offset, $length);
6381
    }
6382
6383
    // fallback via vanilla php
6384
6385
    // split to array, and remove invalid characters
6386
    $array = self::split($str);
6387
6388
    // extract relevant part, and join to make sting again
6389
    return implode('', array_slice($array, $offset, $length));
6390
  }
6391
6392
  /**
6393
   * Binary safe comparison of two strings from an offset, up to length characters.
6394
   *
6395
   * @param string  $str1               <p>The main string being compared.</p>
6396
   * @param string  $str2               <p>The secondary string being compared.</p>
6397
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6398
   *                                    counting from the end of the string.</p>
6399
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6400
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6401
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6402
   *                                    insensitive.</p>
6403
   *
6404
   * @return int <p>
6405
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
6406
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
6407
   *             <strong>0</strong> if they are equal.
6408
   *             </p>
6409
   */
6410 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6411
  {
6412
    if (
6413
        $offset !== 0
6414 1
        ||
6415
        $length !== null
6416 1
    ) {
6417 1
      $str1Tmp = self::substr($str1, $offset, $length);
6418 1
      if ($str1Tmp === false) {
6419
        $str1Tmp = '';
6420
      }
6421 1
      $str1 = (string)$str1Tmp;
6422
6423 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6424 1
      if ($str2Tmp === false) {
6425
        $str2Tmp = '';
6426
      }
6427 1
      $str2 = (string)$str2Tmp;
6428 1
    }
6429
6430 1
    if ($case_insensitivity === true) {
6431 1
      return self::strcasecmp($str1, $str2);
6432
    }
6433
6434 1
    return self::strcmp($str1, $str2);
6435
  }
6436
6437
  /**
6438
   * Count the number of substring occurrences.
6439
   *
6440
   * @link  http://php.net/manual/en/function.substr-count.php
6441
   *
6442
   * @param string  $haystack  <p>The string to search in.</p>
6443
   * @param string  $needle    <p>The substring to search for.</p>
6444
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6445
   * @param int     $length    [optional] <p>
6446
   *                           The maximum length after the specified offset to search for the
6447
   *                           substring. It outputs a warning if the offset plus the length is
6448
   *                           greater than the haystack length.
6449
   *                           </p>
6450
   * @param string  $encoding  <p>Set the charset.</p>
6451
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6452
   *
6453
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6454
   */
6455 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6456
  {
6457
    // init
6458 1
    $haystack = (string)$haystack;
6459 1
    $needle = (string)$needle;
6460
6461 1
    if (!isset($haystack[0], $needle[0])) {
6462 1
      return false;
6463
    }
6464
6465 1
    if ($offset || $length !== null) {
6466
6467 1
      if ($length === null) {
6468 1
        $length = (int)self::strlen($haystack);
6469 1
      }
6470
6471 1
      $offset = (int)$offset;
6472 1
      $length = (int)$length;
6473
6474
      if (
6475
          (
6476
            $length !== 0
6477 1
            &&
6478
            $offset !== 0
6479 1
          )
6480 1
          &&
6481 1
          $length + $offset <= 0
6482 1
          &&
6483 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6484 1
      ) {
6485 1
        return false;
6486
      }
6487
6488 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6489 1
      if ($haystackTmp === false) {
6490
        $haystackTmp = '';
6491
      }
6492 1
      $haystack = (string)$haystackTmp;
6493 1
    }
6494
6495 1
    if ($encoding !== 'UTF-8') {
6496 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6497 1
    }
6498
6499 1
    if ($cleanUtf8 === true) {
6500
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6501
      // if invalid characters are found in $haystack before $needle
6502
      $needle = self::clean($needle);
6503
      $haystack = self::clean($haystack);
6504
    }
6505
6506 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6507
      self::checkForSupport();
6508
    }
6509
6510
    if (
6511
        $encoding !== 'UTF-8'
6512 1
        &&
6513 1
        self::$SUPPORT['mbstring'] === false
6514 1
    ) {
6515
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6516
    }
6517
6518 1
    if (self::$SUPPORT['mbstring'] === true) {
6519 1
      return \mb_substr_count($haystack, $needle, $encoding);
6520
    }
6521
6522
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6523
6524
    return count($matches);
6525
  }
6526
6527
  /**
6528
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6529
   *
6530
   * @param string $haystack <p>The string to search in.</p>
6531
   * @param string $needle   <p>The substring to search for.</p>
6532
   *
6533
   * @return string <p>Return the sub-string.</p>
6534
   */
6535 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6536
  {
6537
    // init
6538 1
    $haystack = (string)$haystack;
6539 1
    $needle = (string)$needle;
6540
6541 1
    if (!isset($haystack[0])) {
6542 1
      return '';
6543
    }
6544
6545 1
    if (!isset($needle[0])) {
6546 1
      return $haystack;
6547
    }
6548
6549 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6550 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6551 1
      if ($haystackTmp === false) {
6552
        $haystackTmp = '';
6553
      }
6554 1
      $haystack = (string)$haystackTmp;
6555 1
    }
6556
6557 1
    return $haystack;
6558
  }
6559
6560
  /**
6561
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6562
   *
6563
   * @param string $haystack <p>The string to search in.</p>
6564
   * @param string $needle   <p>The substring to search for.</p>
6565
   *
6566
   * @return string <p>Return the sub-string.</p>
6567
   */
6568 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6569
  {
6570
    // init
6571 1
    $haystack = (string)$haystack;
6572 1
    $needle = (string)$needle;
6573
6574 1
    if (!isset($haystack[0])) {
6575 1
      return '';
6576
    }
6577
6578 1
    if (!isset($needle[0])) {
6579 1
      return $haystack;
6580
    }
6581
6582 1
    if (self::str_iends_with($haystack, $needle) === true) {
6583 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6584 1
      if ($haystackTmp === false) {
6585
        $haystackTmp = '';
6586
      }
6587 1
      $haystack = (string)$haystackTmp;
6588 1
    }
6589
6590 1
    return $haystack;
6591
  }
6592
6593
  /**
6594
   * Removes an prefix ($needle) from start of the string ($haystack).
6595
   *
6596
   * @param string $haystack <p>The string to search in.</p>
6597
   * @param string $needle   <p>The substring to search for.</p>
6598
   *
6599
   * @return string <p>Return the sub-string.</p>
6600
   */
6601 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6602
  {
6603
    // init
6604 1
    $haystack = (string)$haystack;
6605 1
    $needle = (string)$needle;
6606
6607 1
    if (!isset($haystack[0])) {
6608 1
      return '';
6609
    }
6610
6611 1
    if (!isset($needle[0])) {
6612 1
      return $haystack;
6613
    }
6614
6615 1
    if (self::str_starts_with($haystack, $needle) === true) {
6616 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6617 1
      if ($haystackTmp === false) {
6618
        $haystackTmp = '';
6619
      }
6620 1
      $haystack = (string)$haystackTmp;
6621 1
    }
6622
6623 1
    return $haystack;
6624
  }
6625
6626
  /**
6627
   * Replace text within a portion of a string.
6628
   *
6629
   * source: https://gist.github.com/stemar/8287074
6630
   *
6631
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6632
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6633
   * @param int|int[]       $offset           <p>
6634
   *                                          If start is positive, the replacing will begin at the start'th offset
6635
   *                                          into string.
6636
   *                                          <br /><br />
6637
   *                                          If start is negative, the replacing will begin at the start'th character
6638
   *                                          from the end of string.
6639
   *                                          </p>
6640
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6641
   *                                          portion of string which is to be replaced. If it is negative, it
6642
   *                                          represents the number of characters from the end of string at which to
6643
   *                                          stop replacing. If it is not given, then it will default to strlen(
6644
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6645
   *                                          length is zero then this function will have the effect of inserting
6646
   *                                          replacement into string at the given start offset.</p>
6647
   *
6648
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6649
   */
6650 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6651
  {
6652 7
    if (is_array($str) === true) {
6653 1
      $num = count($str);
6654
6655
      // the replacement
6656 1
      if (is_array($replacement) === true) {
6657 1
        $replacement = array_slice($replacement, 0, $num);
6658 1
      } else {
6659 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6660
      }
6661
6662
      // the offset
6663 1 View Code Duplication
      if (is_array($offset) === true) {
6664 1
        $offset = array_slice($offset, 0, $num);
6665 1
        foreach ($offset as &$valueTmp) {
6666 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6667 1
        }
6668 1
        unset($valueTmp);
6669 1
      } else {
6670 1
        $offset = array_pad(array($offset), $num, $offset);
6671
      }
6672
6673
      // the length
6674 1
      if (!isset($length)) {
6675 1
        $length = array_fill(0, $num, 0);
6676 1 View Code Duplication
      } elseif (is_array($length) === true) {
6677 1
        $length = array_slice($length, 0, $num);
6678 1
        foreach ($length as &$valueTmpV2) {
6679 1
          if (isset($valueTmpV2)) {
6680 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6681 1
          } else {
6682
            $valueTmpV2 = 0;
6683
          }
6684 1
        }
6685 1
        unset($valueTmpV2);
6686 1
      } else {
6687 1
        $length = array_pad(array($length), $num, $length);
6688
      }
6689
6690
      // recursive call
6691 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
6692
6693
    }
6694
6695 7
    if (is_array($replacement) === true) {
6696 1
      if (count($replacement) > 0) {
6697 1
        $replacement = $replacement[0];
6698 1
      } else {
6699 1
        $replacement = '';
6700
      }
6701 1
    }
6702
6703
    // init
6704 7
    $str = (string)$str;
6705 7
    $replacement = (string)$replacement;
6706
6707 7
    if (!isset($str[0])) {
6708 1
      return $replacement;
6709
    }
6710
6711 6
    preg_match_all('/./us', $str, $smatches);
6712 6
    preg_match_all('/./us', $replacement, $rmatches);
6713
6714 6
    if ($length === null) {
6715 3
      $length = (int)self::strlen($str);
6716 3
    }
6717
6718 6
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6719
6720 6
    return implode('', $smatches[0]);
6721
  }
6722
6723
  /**
6724
   * Removes an suffix ($needle) from end of the string ($haystack).
6725
   *
6726
   * @param string $haystack <p>The string to search in.</p>
6727
   * @param string $needle   <p>The substring to search for.</p>
6728
   *
6729
   * @return string <p>Return the sub-string.</p>
6730
   */
6731 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6732
  {
6733 1
    $haystack = (string)$haystack;
6734 1
    $needle = (string)$needle;
6735
6736 1
    if (!isset($haystack[0])) {
6737 1
      return '';
6738
    }
6739
6740 1
    if (!isset($needle[0])) {
6741 1
      return $haystack;
6742
    }
6743
6744 1
    if (self::str_ends_with($haystack, $needle) === true) {
6745 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6746 1
      if ($haystackTmp === false) {
6747
        $haystackTmp = '';
6748
      }
6749 1
      $haystack = (string)$haystackTmp;
6750 1
    }
6751
6752 1
    return $haystack;
6753
  }
6754
6755
  /**
6756
   * Returns a case swapped version of the string.
6757
   *
6758
   * @param string  $str       <p>The input string.</p>
6759
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6760
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6761
   *
6762
   * @return string <p>Each character's case swapped.</p>
6763
   */
6764 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6765
  {
6766 1
    $str = (string)$str;
6767
6768 1
    if (!isset($str[0])) {
6769 1
      return '';
6770
    }
6771
6772 1
    if ($encoding !== 'UTF-8') {
6773 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6774 1
    }
6775
6776 1
    if ($cleanUtf8 === true) {
6777
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6778
      // if invalid characters are found in $haystack before $needle
6779 1
      $str = self::clean($str);
6780 1
    }
6781
6782 1
    $strSwappedCase = preg_replace_callback(
6783 1
        '/[\S]/u',
6784
        function ($match) use ($encoding) {
6785 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6786
6787 1
          if ($match[0] === $marchToUpper) {
6788 1
            return UTF8::strtolower($match[0], $encoding);
6789
          }
6790
6791 1
          return $marchToUpper;
6792 1
        },
6793
        $str
6794 1
    );
6795
6796 1
    return $strSwappedCase;
6797
  }
6798
6799
  /**
6800
   * alias for "UTF8::to_ascii()"
6801
   *
6802
   * @see UTF8::to_ascii()
6803
   *
6804
   * @param string $s
6805
   * @param string $subst_chr
6806
   * @param bool   $strict
6807
   *
6808
   * @return string
6809
   *
6810
   * @deprecated
6811
   */
6812
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6813
  {
6814
    return self::to_ascii($s, $subst_chr, $strict);
6815
  }
6816
6817
  /**
6818
   * alias for "UTF8::to_iso8859()"
6819
   *
6820
   * @see UTF8::to_iso8859()
6821
   *
6822
   * @param string $str
6823
   *
6824
   * @return string|string[]
6825
   *
6826
   * @deprecated
6827
   */
6828
  public static function toIso8859($str)
6829
  {
6830
    return self::to_iso8859($str);
6831
  }
6832
6833
  /**
6834
   * alias for "UTF8::to_latin1()"
6835
   *
6836
   * @see UTF8::to_latin1()
6837
   *
6838
   * @param $str
6839
   *
6840
   * @return string
6841
   *
6842
   * @deprecated
6843
   */
6844
  public static function toLatin1($str)
6845
  {
6846
    return self::to_latin1($str);
6847
  }
6848
6849
  /**
6850
   * alias for "UTF8::to_utf8()"
6851
   *
6852
   * @see UTF8::to_utf8()
6853
   *
6854
   * @param string $str
6855
   *
6856
   * @return string
6857
   *
6858
   * @deprecated
6859
   */
6860
  public static function toUTF8($str)
6861
  {
6862
    return self::to_utf8($str);
6863
  }
6864
6865
  /**
6866
   * Convert a string into ASCII.
6867
   *
6868
   * @param string $str     <p>The input string.</p>
6869
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6870
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6871
   *                        performance</p>
6872
   *
6873
   * @return string
6874
   */
6875 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
6876
  {
6877 21
    static $UTF8_TO_ASCII;
6878
6879
    // init
6880 21
    $str = (string)$str;
6881
6882 21
    if (!isset($str[0])) {
6883 4
      return '';
6884
    }
6885
6886
    // check if we only have ASCII, first (better performance)
6887 18
    if (self::is_ascii($str) === true) {
6888 6
      return $str;
6889
    }
6890
6891 13
    $str = self::clean($str, true, true, true);
6892
6893
    // check again, if we only have ASCII, now ...
6894 13
    if (self::is_ascii($str) === true) {
6895 7
      return $str;
6896
    }
6897
6898 7
    if ($strict === true) {
6899 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6900
        self::checkForSupport();
6901
      }
6902
6903
      if (
6904 1
          self::$SUPPORT['intl'] === true
6905 1
          &&
6906 1
          Bootup::is_php('5.4') === true
6907 1
      ) {
6908
6909
        // HACK for issue from "transliterator_transliterate()"
6910 1
        $str = str_replace(
6911 1
            'ℌ',
6912 1
            'H',
6913
            $str
6914 1
        );
6915
6916 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6917
6918
        // check again, if we only have ASCII, now ...
6919 1
        if (self::is_ascii($str) === true) {
6920 1
          return $str;
6921
        }
6922
6923 1
      }
6924 1
    }
6925
6926 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6927 7
    $chars = $ar[0];
6928 7
    foreach ($chars as &$c) {
6929
6930 7
      $ordC0 = ord($c[0]);
6931
6932 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6933 7
        continue;
6934
      }
6935
6936 7
      $ordC1 = ord($c[1]);
6937
6938
      // ASCII - next please
6939 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6940 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6941 7
      }
6942
6943 7
      if ($ordC0 >= 224) {
6944 2
        $ordC2 = ord($c[2]);
6945
6946 2
        if ($ordC0 <= 239) {
6947 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6948 2
        }
6949
6950 2
        if ($ordC0 >= 240) {
6951 1
          $ordC3 = ord($c[3]);
6952
6953 1
          if ($ordC0 <= 247) {
6954 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6955 1
          }
6956
6957 1
          if ($ordC0 >= 248) {
6958
            $ordC4 = ord($c[4]);
6959
6960 View Code Duplication
            if ($ordC0 <= 251) {
6961
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6962
            }
6963
6964
            if ($ordC0 >= 252) {
6965
              $ordC5 = ord($c[5]);
6966
6967 View Code Duplication
              if ($ordC0 <= 253) {
6968
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6969
              }
6970
            }
6971
          }
6972 1
        }
6973 2
      }
6974
6975 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6976
        $c = $unknown;
6977
        continue;
6978
      }
6979
6980 7
      if (!isset($ord)) {
6981
        $c = $unknown;
6982
        continue;
6983
      }
6984
6985 7
      $bank = $ord >> 8;
6986 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6987 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6988 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6989 1
          $UTF8_TO_ASCII[$bank] = array();
6990 1
        }
6991 3
      }
6992
6993 7
      $newchar = $ord & 255;
6994
6995 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6996
6997
        // keep for debugging
6998
        /*
6999
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7000
        echo "char: " . $c . "\n";
7001
        echo "ord: " . $ord . "\n";
7002
        echo "newchar: " . $newchar . "\n";
7003
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7004
        echo "bank:" . $bank . "\n\n";
7005
        */
7006
7007 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7008 7
      } else {
7009
7010
        // keep for debugging missing chars
7011
        /*
7012
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7013
        echo "char: " . $c . "\n";
7014
        echo "ord: " . $ord . "\n";
7015
        echo "newchar: " . $newchar . "\n";
7016
        echo "bank:" . $bank . "\n\n";
7017
        */
7018
7019 1
        $c = $unknown;
7020
      }
7021 7
    }
7022
7023 7
    return implode('', $chars);
7024
  }
7025
7026
  /**
7027
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7028
   *
7029
   * @param string|string[] $str
7030
   *
7031
   * @return string|string[]
7032
   */
7033 3
  public static function to_iso8859($str)
7034
  {
7035 3
    if (is_array($str) === true) {
7036
7037
      /** @noinspection ForeachSourceInspection */
7038 1
      foreach ($str as $k => $v) {
7039
        /** @noinspection AlterInForeachInspection */
7040
        /** @noinspection OffsetOperationsInspection */
7041 1
        $str[$k] = self::to_iso8859($v);
7042 1
      }
7043
7044 1
      return $str;
7045
    }
7046
7047 3
    $str = (string)$str;
7048
7049 3
    if (!isset($str[0])) {
7050 1
      return '';
7051
    }
7052
7053 3
    return self::utf8_decode($str);
7054
  }
7055
7056
  /**
7057
   * alias for "UTF8::to_iso8859()"
7058
   *
7059
   * @see UTF8::to_iso8859()
7060
   *
7061
   * @param string|string[] $str
7062
   *
7063
   * @return string|string[]
7064
   */
7065 1
  public static function to_latin1($str)
7066
  {
7067 1
    return self::to_iso8859($str);
7068
  }
7069
7070
  /**
7071
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7072
   *
7073
   * <ul>
7074
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7075
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
7076
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7077
   * case.</li>
7078
   * </ul>
7079
   *
7080
   * @param string|string[] $str                    <p>Any string or array.</p>
7081
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7082
   *
7083
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7084
   */
7085 22
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7086
  {
7087 22
    if (is_array($str) === true) {
7088
      /** @noinspection ForeachSourceInspection */
7089 2
      foreach ($str as $k => $v) {
7090
        /** @noinspection AlterInForeachInspection */
7091
        /** @noinspection OffsetOperationsInspection */
7092 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7093 2
      }
7094
7095 2
      return $str;
7096
    }
7097
7098 22
    $str = (string)$str;
7099
7100 22
    if (!isset($str[0])) {
7101 3
      return $str;
7102
    }
7103
7104 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7105
      self::checkForSupport();
7106
    }
7107
7108 22 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7109
      $max = \mb_strlen($str, '8BIT');
7110
    } else {
7111 22
      $max = strlen($str);
7112
    }
7113
7114 22
    $buf = '';
7115
7116
    /** @noinspection ForeachInvariantsInspection */
7117 22
    for ($i = 0; $i < $max; $i++) {
7118
7119 22
      $c1 = $str[$i];
7120
7121 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7122
7123 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7124
7125 19
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7126
7127 19
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7128 17
            $buf .= $c1 . $c2;
7129 17
            $i++;
7130 17 View Code Duplication
          } else { // not valid UTF8 - convert it
7131 7
            $cc1tmp = ord($c1) / 64;
7132 7
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7133 7
            $cc2 = ($c1 & "\x3F") | "\x80";
7134 7
            $buf .= $cc1 . $cc2;
7135
          }
7136
7137 22
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7138
7139 21
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7140 21
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7141
7142 21
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7143 15
            $buf .= $c1 . $c2 . $c3;
7144 15
            $i += 2;
7145 15 View Code Duplication
          } else { // not valid UTF8 - convert it
7146 11
            $cc1tmp = ord($c1) / 64;
7147 11
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7148 11
            $cc2 = ($c1 & "\x3F") | "\x80";
7149 11
            $buf .= $cc1 . $cc2;
7150
          }
7151
7152 22
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7153
7154 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7155 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7156 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7157
7158 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7159 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7160 5
            $i += 3;
7161 5 View Code Duplication
          } else { // not valid UTF8 - convert it
7162 9
            $cc1tmp = ord($c1) / 64;
7163 9
            $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7164 9
            $cc2 = ($c1 & "\x3F") | "\x80";
7165 9
            $buf .= $cc1 . $cc2;
7166
          }
7167
7168 12 View Code Duplication
        } else { // doesn't look like UTF8, but should be converted
7169 9
          $cc1tmp = ord($c1) / 64;
7170 9
          $cc1 = self::chr_and_parse_int($cc1tmp) | "\xC0";
7171 9
          $cc2 = ($c1 & "\x3F") | "\x80";
7172 9
          $buf .= $cc1 . $cc2;
7173
        }
7174
7175 22
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7176
7177 5
        $ordC1 = ord($c1);
7178 5
        if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7179 2
          $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7180 2 View Code Duplication
        } else {
7181 4
          $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7182 4
          $cc2 = ($c1 & "\x3F") | "\x80";
7183 4
          $buf .= $cc1 . $cc2;
7184
        }
7185
7186 5
      } else { // it doesn't need conversion
7187 19
        $buf .= $c1;
7188
      }
7189 22
    }
7190
7191
    // decode unicode escape sequences
7192 22
    $buf = preg_replace_callback(
7193 22
        '/\\\\u([0-9a-f]{4})/i',
7194 22
        function ($match) {
7195 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7196 22
        },
7197
        $buf
7198 22
    );
7199
7200
    // decode UTF-8 codepoints
7201 22
    if ($decodeHtmlEntityToUtf8 === true) {
7202 1
      $buf = self::html_entity_decode($buf);
7203 1
    }
7204
7205 22
    return $buf;
7206
  }
7207
7208
  /**
7209
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7210
   *
7211
   * INFO: This is slower then "trim()"
7212
   *
7213
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7214
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7215
   *
7216
   * @param string $str   <p>The string to be trimmed</p>
7217
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7218
   *
7219
   * @return string <p>The trimmed string.</p>
7220
   */
7221 26
  public static function trim($str = '', $chars = INF)
7222
  {
7223 26
    $str = (string)$str;
7224
7225 26
    if (!isset($str[0])) {
7226 5
      return '';
7227
    }
7228
7229
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7230 22
    if ($chars === INF || !$chars) {
7231 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7232
    }
7233
7234 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7235
  }
7236
7237
  /**
7238
   * Makes string's first char uppercase.
7239
   *
7240
   * @param string  $str       <p>The input string.</p>
7241
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7242
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7243
   *
7244
   * @return string <p>The resulting string</p>
7245
   */
7246 14 View Code Duplication
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7247
  {
7248 14
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
7249 14
    if ($strPartTwo === false) {
7250
      $strPartTwo = '';
7251
    }
7252
7253 14
    $strPartOne = self::strtoupper(
7254 14
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
7255 14
        $encoding,
7256
        $cleanUtf8
7257 14
    );
7258
7259 14
    return $strPartOne . $strPartTwo;
7260
  }
7261
7262
  /**
7263
   * alias for "UTF8::ucfirst()"
7264
   *
7265
   * @see UTF8::ucfirst()
7266
   *
7267
   * @param string  $word
7268
   * @param string  $encoding
7269
   * @param boolean $cleanUtf8
7270
   *
7271
   * @return string
7272
   */
7273 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7274
  {
7275 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7276
  }
7277
7278
  /**
7279
   * Uppercase for all words in the string.
7280
   *
7281
   * @param string   $str        <p>The input string.</p>
7282
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7283
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7284
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7285
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7286
   *
7287
   * @return string
7288
   */
7289 8 View Code Duplication
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7290
  {
7291 8
    if (!$str) {
7292 2
      return '';
7293
    }
7294
7295 7
    $words = self::str_to_words($str, $charlist);
7296 7
    $newWords = array();
7297
7298 7
    if (count($exceptions) > 0) {
7299 1
      $useExceptions = true;
7300 1
    } else {
7301 7
      $useExceptions = false;
7302
    }
7303
7304 7
    foreach ($words as $word) {
7305
7306 7
      if (!$word) {
7307 7
        continue;
7308
      }
7309
7310
      if (
7311
          $useExceptions === false
7312 7
          ||
7313
          (
7314
              $useExceptions === true
7315 1
              &&
7316 1
              !in_array($word, $exceptions, true)
7317 1
          )
7318 7
      ) {
7319 7
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
7320 7
      }
7321
7322 7
      $newWords[] = $word;
7323 7
    }
7324
7325 7
    return implode('', $newWords);
7326
  }
7327
7328
  /**
7329
   * Multi decode html entity & fix urlencoded-win1252-chars.
7330
   *
7331
   * e.g:
7332
   * 'test+test'                     => 'test test'
7333
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7334
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7335
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7336
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7337
   * 'Düsseldorf'                   => 'Düsseldorf'
7338
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7339
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7340
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7341
   *
7342
   * @param string $str          <p>The input string.</p>
7343
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7344
   *
7345
   * @return string
7346
   */
7347 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7348
  {
7349 1
    $str = (string)$str;
7350
7351 1
    if (!isset($str[0])) {
7352 1
      return '';
7353
    }
7354
7355 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7356 1
    if (preg_match($pattern, $str)) {
7357 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7358 1
    }
7359
7360 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7361
7362
    do {
7363 1
      $str_compare = $str;
7364
7365 1
      $str = self::fix_simple_utf8(
7366 1
          urldecode(
7367 1
              self::html_entity_decode(
7368 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7369
                  $flags
7370 1
              )
7371 1
          )
7372 1
      );
7373
7374 1
    } while ($multi_decode === true && $str_compare !== $str);
7375
7376 1
    return (string)$str;
7377
  }
7378
7379
  /**
7380
   * Return a array with "urlencoded"-win1252 -> UTF-8
7381
   *
7382
   * @deprecated use the "UTF8::urldecode()" function to decode a string
7383
   *
7384
   * @return array
7385
   */
7386
  public static function urldecode_fix_win1252_chars()
7387
  {
7388
    return array(
7389
        '%20' => ' ',
7390
        '%21' => '!',
7391
        '%22' => '"',
7392
        '%23' => '#',
7393
        '%24' => '$',
7394
        '%25' => '%',
7395
        '%26' => '&',
7396
        '%27' => "'",
7397
        '%28' => '(',
7398
        '%29' => ')',
7399
        '%2A' => '*',
7400
        '%2B' => '+',
7401
        '%2C' => ',',
7402
        '%2D' => '-',
7403
        '%2E' => '.',
7404
        '%2F' => '/',
7405
        '%30' => '0',
7406
        '%31' => '1',
7407
        '%32' => '2',
7408
        '%33' => '3',
7409
        '%34' => '4',
7410
        '%35' => '5',
7411
        '%36' => '6',
7412
        '%37' => '7',
7413
        '%38' => '8',
7414
        '%39' => '9',
7415
        '%3A' => ':',
7416
        '%3B' => ';',
7417
        '%3C' => '<',
7418
        '%3D' => '=',
7419
        '%3E' => '>',
7420
        '%3F' => '?',
7421
        '%40' => '@',
7422
        '%41' => 'A',
7423
        '%42' => 'B',
7424
        '%43' => 'C',
7425
        '%44' => 'D',
7426
        '%45' => 'E',
7427
        '%46' => 'F',
7428
        '%47' => 'G',
7429
        '%48' => 'H',
7430
        '%49' => 'I',
7431
        '%4A' => 'J',
7432
        '%4B' => 'K',
7433
        '%4C' => 'L',
7434
        '%4D' => 'M',
7435
        '%4E' => 'N',
7436
        '%4F' => 'O',
7437
        '%50' => 'P',
7438
        '%51' => 'Q',
7439
        '%52' => 'R',
7440
        '%53' => 'S',
7441
        '%54' => 'T',
7442
        '%55' => 'U',
7443
        '%56' => 'V',
7444
        '%57' => 'W',
7445
        '%58' => 'X',
7446
        '%59' => 'Y',
7447
        '%5A' => 'Z',
7448
        '%5B' => '[',
7449
        '%5C' => '\\',
7450
        '%5D' => ']',
7451
        '%5E' => '^',
7452
        '%5F' => '_',
7453
        '%60' => '`',
7454
        '%61' => 'a',
7455
        '%62' => 'b',
7456
        '%63' => 'c',
7457
        '%64' => 'd',
7458
        '%65' => 'e',
7459
        '%66' => 'f',
7460
        '%67' => 'g',
7461
        '%68' => 'h',
7462
        '%69' => 'i',
7463
        '%6A' => 'j',
7464
        '%6B' => 'k',
7465
        '%6C' => 'l',
7466
        '%6D' => 'm',
7467
        '%6E' => 'n',
7468
        '%6F' => 'o',
7469
        '%70' => 'p',
7470
        '%71' => 'q',
7471
        '%72' => 'r',
7472
        '%73' => 's',
7473
        '%74' => 't',
7474
        '%75' => 'u',
7475
        '%76' => 'v',
7476
        '%77' => 'w',
7477
        '%78' => 'x',
7478
        '%79' => 'y',
7479
        '%7A' => 'z',
7480
        '%7B' => '{',
7481
        '%7C' => '|',
7482
        '%7D' => '}',
7483
        '%7E' => '~',
7484
        '%7F' => '',
7485
        '%80' => '`',
7486
        '%81' => '',
7487
        '%82' => '‚',
7488
        '%83' => 'ƒ',
7489
        '%84' => '„',
7490
        '%85' => '…',
7491
        '%86' => '†',
7492
        '%87' => '‡',
7493
        '%88' => 'ˆ',
7494
        '%89' => '‰',
7495
        '%8A' => 'Š',
7496
        '%8B' => '‹',
7497
        '%8C' => 'Œ',
7498
        '%8D' => '',
7499
        '%8E' => 'Ž',
7500
        '%8F' => '',
7501
        '%90' => '',
7502
        '%91' => '‘',
7503
        '%92' => '’',
7504
        '%93' => '“',
7505
        '%94' => '”',
7506
        '%95' => '•',
7507
        '%96' => '–',
7508
        '%97' => '—',
7509
        '%98' => '˜',
7510
        '%99' => '™',
7511
        '%9A' => 'š',
7512
        '%9B' => '›',
7513
        '%9C' => 'œ',
7514
        '%9D' => '',
7515
        '%9E' => 'ž',
7516
        '%9F' => 'Ÿ',
7517
        '%A0' => '',
7518
        '%A1' => '¡',
7519
        '%A2' => '¢',
7520
        '%A3' => '£',
7521
        '%A4' => '¤',
7522
        '%A5' => '¥',
7523
        '%A6' => '¦',
7524
        '%A7' => '§',
7525
        '%A8' => '¨',
7526
        '%A9' => '©',
7527
        '%AA' => 'ª',
7528
        '%AB' => '«',
7529
        '%AC' => '¬',
7530
        '%AD' => '',
7531
        '%AE' => '®',
7532
        '%AF' => '¯',
7533
        '%B0' => '°',
7534
        '%B1' => '±',
7535
        '%B2' => '²',
7536
        '%B3' => '³',
7537
        '%B4' => '´',
7538
        '%B5' => 'µ',
7539
        '%B6' => '¶',
7540
        '%B7' => '·',
7541
        '%B8' => '¸',
7542
        '%B9' => '¹',
7543
        '%BA' => 'º',
7544
        '%BB' => '»',
7545
        '%BC' => '¼',
7546
        '%BD' => '½',
7547
        '%BE' => '¾',
7548
        '%BF' => '¿',
7549
        '%C0' => 'À',
7550
        '%C1' => 'Á',
7551
        '%C2' => 'Â',
7552
        '%C3' => 'Ã',
7553
        '%C4' => 'Ä',
7554
        '%C5' => 'Å',
7555
        '%C6' => 'Æ',
7556
        '%C7' => 'Ç',
7557
        '%C8' => 'È',
7558
        '%C9' => 'É',
7559
        '%CA' => 'Ê',
7560
        '%CB' => 'Ë',
7561
        '%CC' => 'Ì',
7562
        '%CD' => 'Í',
7563
        '%CE' => 'Î',
7564
        '%CF' => 'Ï',
7565
        '%D0' => 'Ð',
7566
        '%D1' => 'Ñ',
7567
        '%D2' => 'Ò',
7568
        '%D3' => 'Ó',
7569
        '%D4' => 'Ô',
7570
        '%D5' => 'Õ',
7571
        '%D6' => 'Ö',
7572
        '%D7' => '×',
7573
        '%D8' => 'Ø',
7574
        '%D9' => 'Ù',
7575
        '%DA' => 'Ú',
7576
        '%DB' => 'Û',
7577
        '%DC' => 'Ü',
7578
        '%DD' => 'Ý',
7579
        '%DE' => 'Þ',
7580
        '%DF' => 'ß',
7581
        '%E0' => 'à',
7582
        '%E1' => 'á',
7583
        '%E2' => 'â',
7584
        '%E3' => 'ã',
7585
        '%E4' => 'ä',
7586
        '%E5' => 'å',
7587
        '%E6' => 'æ',
7588
        '%E7' => 'ç',
7589
        '%E8' => 'è',
7590
        '%E9' => 'é',
7591
        '%EA' => 'ê',
7592
        '%EB' => 'ë',
7593
        '%EC' => 'ì',
7594
        '%ED' => 'í',
7595
        '%EE' => 'î',
7596
        '%EF' => 'ï',
7597
        '%F0' => 'ð',
7598
        '%F1' => 'ñ',
7599
        '%F2' => 'ò',
7600
        '%F3' => 'ó',
7601
        '%F4' => 'ô',
7602
        '%F5' => 'õ',
7603
        '%F6' => 'ö',
7604
        '%F7' => '÷',
7605
        '%F8' => 'ø',
7606
        '%F9' => 'ù',
7607
        '%FA' => 'ú',
7608
        '%FB' => 'û',
7609
        '%FC' => 'ü',
7610
        '%FD' => 'ý',
7611
        '%FE' => 'þ',
7612
        '%FF' => 'ÿ',
7613
    );
7614
  }
7615
7616
  /**
7617
   * Decodes an UTF-8 string to ISO-8859-1.
7618
   *
7619
   * @param string $str <p>The input string.</p>
7620
   *
7621
   * @return string
7622
   */
7623 6
  public static function utf8_decode($str)
7624
  {
7625
    // init
7626 6
    $str = (string)$str;
7627
7628 6
    if (!isset($str[0])) {
7629 3
      return '';
7630
    }
7631
7632 6
    $str = (string)self::to_utf8($str);
7633
7634 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7635 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7636
7637 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7638 1
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7639 1
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7640 1
    }
7641
7642
    /** @noinspection PhpInternalEntityUsedInspection */
7643 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7644
7645 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7646
      self::checkForSupport();
7647
    }
7648
7649 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7650
      $len = \mb_strlen($str, '8BIT');
7651
    } else {
7652 6
      $len = strlen($str);
7653
    }
7654
7655
    /** @noinspection ForeachInvariantsInspection */
7656 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7657 6
      switch ($str[$i] & "\xF0") {
7658 6
        case "\xC0":
7659 6
        case "\xD0":
7660 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7661 6
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7662 6
          break;
7663
7664
        /** @noinspection PhpMissingBreakStatementInspection */
7665 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7666
          ++$i;
7667 6
        case "\xE0":
7668 4
          $str[$j] = '?';
7669 4
          $i += 2;
7670 4
          break;
7671
7672 6
        default:
7673 6
          $str[$j] = $str[$i];
7674 6
      }
7675 6
    }
7676
7677 6
    return (string)self::substr($str, 0, $j, '8BIT');
7678
  }
7679
7680
  /**
7681
   * Encodes an ISO-8859-1 string to UTF-8.
7682
   *
7683
   * @param string $str <p>The input string.</p>
7684
   *
7685
   * @return string
7686
   */
7687 7
  public static function utf8_encode($str)
7688
  {
7689
    // init
7690 7
    $str = (string)$str;
7691
7692 7
    if (!isset($str[0])) {
7693 7
      return '';
7694
    }
7695
7696 7
    $strTmp = \utf8_encode($str);
7697 7
    if ($strTmp === false) {
7698
      return '';
7699
    }
7700
7701 7
    $str = (string)$strTmp;
7702 7
    if (false === strpos($str, "\xC2")) {
7703 3
      return $str;
7704
    }
7705
7706 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7707 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7708
7709 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7710 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7711 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7712 1
    }
7713
7714 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7715
  }
7716
7717
  /**
7718
   * fix -> utf8-win1252 chars
7719
   *
7720
   * @param string $str <p>The input string.</p>
7721
   *
7722
   * @return string
7723
   *
7724
   * @deprecated use "UTF8::fix_simple_utf8()"
7725
   */
7726
  public static function utf8_fix_win1252_chars($str)
7727
  {
7728
    return self::fix_simple_utf8($str);
7729
  }
7730
7731
  /**
7732
   * Returns an array with all utf8 whitespace characters.
7733
   *
7734
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7735
   *
7736
   * @author: Derek E. [email protected]
7737
   *
7738
   * @return array <p>
7739
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7740
   *               as defined in above URL.
7741
   *               </p>
7742
   */
7743 1
  public static function whitespace_table()
7744
  {
7745 1
    return self::$WHITESPACE_TABLE;
7746
  }
7747
7748
  /**
7749
   * Limit the number of words in a string.
7750
   *
7751
   * @param string $str      <p>The input string.</p>
7752
   * @param int    $limit    <p>The limit of words as integer.</p>
7753
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7754
   *
7755
   * @return string
7756
   */
7757 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7758
  {
7759 1
    $str = (string)$str;
7760
7761 1
    if (!isset($str[0])) {
7762 1
      return '';
7763
    }
7764
7765
    // init
7766 1
    $limit = (int)$limit;
7767
7768 1
    if ($limit < 1) {
7769 1
      return '';
7770
    }
7771
7772 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7773
7774
    if (
7775 1
        !isset($matches[0])
7776 1
        ||
7777 1
        self::strlen($str) === self::strlen($matches[0])
7778 1
    ) {
7779 1
      return $str;
7780
    }
7781
7782 1
    return self::rtrim($matches[0]) . $strAddOn;
7783
  }
7784
7785
  /**
7786
   * Wraps a string to a given number of characters
7787
   *
7788
   * @link  http://php.net/manual/en/function.wordwrap.php
7789
   *
7790
   * @param string $str   <p>The input string.</p>
7791
   * @param int    $width [optional] <p>The column width.</p>
7792
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7793
   * @param bool   $cut   [optional] <p>
7794
   *                      If the cut is set to true, the string is
7795
   *                      always wrapped at or before the specified width. So if you have
7796
   *                      a word that is larger than the given width, it is broken apart.
7797
   *                      </p>
7798
   *
7799
   * @return string <p>The given string wrapped at the specified column.</p>
7800
   */
7801 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7802
  {
7803 10
    $str = (string)$str;
7804 10
    $break = (string)$break;
7805
7806 10
    if (!isset($str[0], $break[0])) {
7807 3
      return '';
7808
    }
7809
7810 8
    $w = '';
7811 8
    $strSplit = explode($break, $str);
7812 8
    $count = count($strSplit);
7813
7814 8
    $chars = array();
7815
    /** @noinspection ForeachInvariantsInspection */
7816 8
    for ($i = 0; $i < $count; ++$i) {
7817
7818 8
      if ($i) {
7819 1
        $chars[] = $break;
7820 1
        $w .= '#';
7821 1
      }
7822
7823 8
      $c = $strSplit[$i];
7824 8
      unset($strSplit[$i]);
7825
7826 8
      foreach (self::split($c) as $c) {
7827 8
        $chars[] = $c;
7828 8
        $w .= ' ' === $c ? ' ' : '?';
7829 8
      }
7830 8
    }
7831
7832 8
    $strReturn = '';
7833 8
    $j = 0;
7834 8
    $b = $i = -1;
7835 8
    $w = wordwrap($w, $width, '#', $cut);
7836
7837 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7838 6
      for (++$i; $i < $b; ++$i) {
7839 6
        $strReturn .= $chars[$j];
7840 6
        unset($chars[$j++]);
7841 6
      }
7842
7843 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7844 3
        unset($chars[$j++]);
7845 3
      }
7846
7847 6
      $strReturn .= $break;
7848 6
    }
7849
7850 8
    return $strReturn . implode('', $chars);
7851
  }
7852
7853
  /**
7854
   * Returns an array of Unicode White Space characters.
7855
   *
7856
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7857
   */
7858 1
  public static function ws()
7859
  {
7860 1
    return self::$WHITESPACE;
7861
  }
7862
7863
}
7864