Completed
Push — master ( e4515f...29a5d9 )
by Lars
03:30
created

UTF8::str_istarts_with()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 15
Code Lines 8

Duplication

Lines 15
Ratio 100 %

Code Coverage

Tests 8
CRAP Score 3

Importance

Changes 0
Metric Value
dl 15
loc 15
ccs 8
cts 8
cp 1
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 8
nc 3
nop 2
crap 3
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943 1
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 9
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
966
      self::checkForSupport();
967
    }
968
969 9
    if ($encoding !== 'UTF-8') {
970 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
971 9
    } elseif (self::$SUPPORT['intlChar'] === true) {
972
      return \IntlChar::chr($code_point);
973
    }
974
975
    // check type of code_point, only if there is no support for "\IntlChar"
976 9
    $i = (int)$code_point;
977 9
    if ($i !== $code_point) {
978 1
      return null;
979
    }
980
981
    // use static cache, only if there is no support for "\IntlChar"
982 9
    static $CHAR_CACHE = array();
983 9
    $cacheKey = $code_point . $encoding;
984 9
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
985 8
      return $CHAR_CACHE[$cacheKey];
986
    }
987
988 8
    if ($code_point <= 0x7F) {
989 6
      $str = self::chr_and_parse_int($code_point);
990 8
    } elseif ($code_point <= 0x7FF) {
991 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
992 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
993 7
    } elseif ($code_point <= 0xFFFF) {
994 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
995 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
996 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
997 7
    } else {
998 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
999 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1000 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1001 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1002
    }
1003
1004 8
    if ($encoding !== 'UTF-8') {
1005 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1006 1
    }
1007
1008
    // add into static cache
1009 8
    $CHAR_CACHE[$cacheKey] = $str;
1010
1011 8
    return $str;
1012
  }
1013
1014
  /**
1015
   * @param int $int
1016
   *
1017
   * @return string
1018
   */
1019 25
  private static function chr_and_parse_int($int)
1020
  {
1021 25
    return chr((int)$int);
1022
  }
1023
1024
  /**
1025
   * Applies callback to all characters of a string.
1026
   *
1027
   * @param string|array $callback <p>The callback function.</p>
1028
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1029
   *
1030
   * @return array <p>The outcome of callback.</p>
1031
   */
1032 1
  public static function chr_map($callback, $str)
1033
  {
1034 1
    $chars = self::split($str);
1035
1036 1
    return array_map($callback, $chars);
1037
  }
1038
1039
  /**
1040
   * Generates an array of byte length of each character of a Unicode string.
1041
   *
1042
   * 1 byte => U+0000  - U+007F
1043
   * 2 byte => U+0080  - U+07FF
1044
   * 3 byte => U+0800  - U+FFFF
1045
   * 4 byte => U+10000 - U+10FFFF
1046
   *
1047
   * @param string $str <p>The original Unicode string.</p>
1048
   *
1049
   * @return array <p>An array of byte lengths of each character.</p>
1050
   */
1051 4
  public static function chr_size_list($str)
1052
  {
1053 4
    $str = (string)$str;
1054
1055 4
    if (!isset($str[0])) {
1056 3
      return array();
1057
    }
1058
1059 4
    return array_map(
1060
        function ($data) {
1061 4
          return UTF8::strlen($data, '8BIT');
1062 4
        },
1063 4
        self::split($str)
1064 4
    );
1065
  }
1066
1067
  /**
1068
   * Get a decimal code representation of a specific character.
1069
   *
1070
   * @param string $char <p>The input character.</p>
1071
   *
1072
   * @return int
1073
   */
1074 2
  public static function chr_to_decimal($char)
1075
  {
1076 2
    $char = (string)$char;
1077 2
    $code = self::ord($char[0]);
1078 2
    $bytes = 1;
1079
1080 2
    if (!($code & 0x80)) {
1081
      // 0xxxxxxx
1082 2
      return $code;
1083
    }
1084
1085 2
    if (($code & 0xe0) === 0xc0) {
1086
      // 110xxxxx
1087 2
      $bytes = 2;
1088 2
      $code &= ~0xc0;
1089 2
    } elseif (($code & 0xf0) === 0xe0) {
1090
      // 1110xxxx
1091 2
      $bytes = 3;
1092 2
      $code &= ~0xe0;
1093 2
    } elseif (($code & 0xf8) === 0xf0) {
1094
      // 11110xxx
1095 1
      $bytes = 4;
1096 1
      $code &= ~0xf0;
1097 1
    }
1098
1099 2
    for ($i = 2; $i <= $bytes; $i++) {
1100
      // 10xxxxxx
1101 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1102 2
    }
1103
1104 2
    return $code;
1105
  }
1106
1107
  /**
1108
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1109
   *
1110
   * @param string $char <p>The input character</p>
1111
   * @param string $pfix [optional]
1112
   *
1113
   * @return string <p>The code point encoded as U+xxxx<p>
1114
   */
1115 1
  public static function chr_to_hex($char, $pfix = 'U+')
1116
  {
1117 1
    $char = (string)$char;
1118
1119 1
    if (!isset($char[0])) {
1120 1
      return '';
1121
    }
1122
1123 1
    if ($char === '&#0;') {
1124 1
      $char = '';
1125 1
    }
1126
1127 1
    return self::int_to_hex(self::ord($char), $pfix);
1128
  }
1129
1130
  /**
1131
   * alias for "UTF8::chr_to_decimal()"
1132
   *
1133
   * @see UTF8::chr_to_decimal()
1134
   *
1135
   * @param string $chr
1136
   *
1137
   * @return int
1138
   */
1139 1
  public static function chr_to_int($chr)
1140
  {
1141 1
    return self::chr_to_decimal($chr);
1142
  }
1143
1144
  /**
1145
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1146
   *
1147
   * @param string $body     <p>The original string to be split.</p>
1148
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1149
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1150
   *
1151
   * @return string <p>The chunked string</p>
1152
   */
1153 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1154
  {
1155 1
    return implode($end, self::split($body, $chunklen));
1156
  }
1157
1158
  /**
1159
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1160
   *
1161
   * @param string $str                     <p>The string to be sanitized.</p>
1162
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1163
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1164
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1165
   *                                        => "..."</p>
1166
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1167
   *                                        $normalize_whitespace</p>
1168
   *
1169
   * @return string <p>Clean UTF-8 encoded string.</p>
1170
   */
1171 56
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1172
  {
1173
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1174
    // caused connection reset problem on larger strings
1175
1176
    $regx = '/
1177
      (
1178
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1179
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1180
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1181
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1182
        ){1,100}                      # ...one or more times
1183
      )
1184
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1185
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1186 56
    /x';
1187 56
    $str = preg_replace($regx, '$1', $str);
1188
1189 56
    $str = self::replace_diamond_question_mark($str, '');
1190 56
    $str = self::remove_invisible_characters($str);
1191
1192 56
    if ($normalize_whitespace === true) {
1193 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1194 36
    }
1195
1196 56
    if ($normalize_msword === true) {
1197 15
      $str = self::normalize_msword($str);
1198 15
    }
1199
1200 56
    if ($remove_bom === true) {
1201 35
      $str = self::remove_bom($str);
1202 35
    }
1203
1204 56
    return $str;
1205
  }
1206
1207
  /**
1208
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1209
   *
1210
   * @param string $str <p>The input string.</p>
1211
   *
1212
   * @return string
1213
   */
1214 22 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1215
  {
1216 22
    $str = (string)$str;
1217
1218 22
    if (!isset($str[0])) {
1219 2
      return '';
1220
    }
1221
1222
    // fixed ISO <-> UTF-8 Errors
1223 22
    $str = self::fix_simple_utf8($str);
1224
1225
    // remove all none UTF-8 symbols
1226
    // && remove diamond question mark (�)
1227
    // && remove remove invisible characters (e.g. "\0")
1228
    // && remove BOM
1229
    // && normalize whitespace chars (but keep non-breaking-spaces)
1230 22
    $str = self::clean($str, true, true, false, true);
1231
1232 22
    return (string)$str;
1233
  }
1234
1235
  /**
1236
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1237
   *
1238
   * INFO: opposite to UTF8::string()
1239
   *
1240
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1241
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1242
   *                                    default, code points will be returned as integers.</p>
1243
   *
1244
   * @return array <p>The array of code points.</p>
1245
   */
1246 7
  public static function codepoints($arg, $u_style = false)
1247
  {
1248 7
    if (is_string($arg) === true) {
1249 7
      $arg = self::split($arg);
1250 7
    }
1251
1252 7
    $arg = array_map(
1253
        array(
1254 7
            '\\voku\\helper\\UTF8',
1255 7
            'ord',
1256 7
        ),
1257
        $arg
1258 7
    );
1259
1260 7
    if ($u_style) {
1261 1
      $arg = array_map(
1262
          array(
1263 1
              '\\voku\\helper\\UTF8',
1264 1
              'int_to_hex',
1265 1
          ),
1266
          $arg
1267 1
      );
1268 1
    }
1269
1270 7
    return $arg;
1271
  }
1272
1273
  /**
1274
   * Returns count of characters used in a string.
1275
   *
1276
   * @param string $str       <p>The input string.</p>
1277
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1278
   *
1279
   * @return array <p>An associative array of Character as keys and
1280
   *               their count as values.</p>
1281
   */
1282 7
  public static function count_chars($str, $cleanUtf8 = false)
1283
  {
1284 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1285
  }
1286
1287
  /**
1288
   * Converts a int-value into an UTF-8 character.
1289
   *
1290
   * @param mixed $int
1291
   *
1292
   * @return string
1293
   */
1294 5
  public static function decimal_to_chr($int)
1295
  {
1296 5
    if (Bootup::is_php('5.4') === true) {
1297 5
      $flags = ENT_QUOTES | ENT_HTML5;
1298 5
    } else {
1299
      $flags = ENT_QUOTES;
1300
    }
1301
1302 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1303
  }
1304
1305
  /**
1306
   * Encode a string with a new charset-encoding.
1307
   *
1308
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1309
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1310
   *
1311
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1312
   * @param string $str      <p>The input string</p>
1313
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1314
   *                         /> otherwise we auto-detect the current string-encoding</p>
1315
   *
1316
   * @return string
1317
   */
1318 13
  public static function encode($encoding, $str, $force = true)
1319
  {
1320 13
    $str = (string)$str;
1321 13
    $encoding = (string)$encoding;
1322
1323 13
    if (!isset($str[0], $encoding[0])) {
1324 5
      return $str;
1325
    }
1326
1327 13
    if ($encoding !== 'UTF-8') {
1328 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1329 2
    }
1330
1331 13
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1332
      self::checkForSupport();
1333
    }
1334
1335 13
    $encodingDetected = self::str_detect_encoding($str);
1336
1337
    if (
1338
        $encodingDetected !== false
1339 13
        &&
1340
        (
1341
            $force === true
1342 13
            ||
1343
            $encodingDetected !== $encoding
1344 4
        )
1345 13
    ) {
1346
1347
      if (
1348
          $encoding === 'UTF-8'
1349 13
          &&
1350
          (
1351
              $force === true
1352 13
              || $encodingDetected === 'UTF-8'
1353 3
              || $encodingDetected === 'WINDOWS-1252'
1354 3
              || $encodingDetected === 'ISO-8859-1'
1355 3
          )
1356 13
      ) {
1357 12
        return self::to_utf8($str);
1358
      }
1359
1360
      if (
1361
          $encoding === 'ISO-8859-1'
1362 4
          &&
1363
          (
1364
              $force === true
1365 2
              || $encodingDetected === 'ISO-8859-1'
1366 1
              || $encodingDetected === 'UTF-8'
1367 1
          )
1368 4
      ) {
1369 2
        return self::to_iso8859($str);
1370
      }
1371
1372
      if (
1373
          $encoding !== 'UTF-8'
1374 3
          &&
1375
          $encoding !== 'WINDOWS-1252'
1376 3
          &&
1377 1
          self::$SUPPORT['mbstring'] === false
1378 3
      ) {
1379
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1380
      }
1381
1382 3
      $strEncoded = \mb_convert_encoding(
1383 3
          $str,
1384 3
          $encoding,
1385
          $encodingDetected
1386 3
      );
1387
1388 3
      if ($strEncoded) {
1389 3
        return $strEncoded;
1390
      }
1391
    }
1392
1393 2
    return $str;
1394
  }
1395
1396
  /**
1397
   * Reads entire file into a string.
1398
   *
1399
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1400
   *
1401
   * @link http://php.net/manual/en/function.file-get-contents.php
1402
   *
1403
   * @param string        $filename      <p>
1404
   *                                     Name of the file to read.
1405
   *                                     </p>
1406
   * @param int|false     $flags         [optional] <p>
1407
   *                                     Prior to PHP 6, this parameter is called
1408
   *                                     use_include_path and is a bool.
1409
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1410
   *                                     to trigger include path
1411
   *                                     search.
1412
   *                                     </p>
1413
   *                                     <p>
1414
   *                                     The value of flags can be any combination of
1415
   *                                     the following flags (with some restrictions), joined with the
1416
   *                                     binary OR (|)
1417
   *                                     operator.
1418
   *                                     </p>
1419
   *                                     <p>
1420
   *                                     <table>
1421
   *                                     Available flags
1422
   *                                     <tr valign="top">
1423
   *                                     <td>Flag</td>
1424
   *                                     <td>Description</td>
1425
   *                                     </tr>
1426
   *                                     <tr valign="top">
1427
   *                                     <td>
1428
   *                                     FILE_USE_INCLUDE_PATH
1429
   *                                     </td>
1430
   *                                     <td>
1431
   *                                     Search for filename in the include directory.
1432
   *                                     See include_path for more
1433
   *                                     information.
1434
   *                                     </td>
1435
   *                                     </tr>
1436
   *                                     <tr valign="top">
1437
   *                                     <td>
1438
   *                                     FILE_TEXT
1439
   *                                     </td>
1440
   *                                     <td>
1441
   *                                     As of PHP 6, the default encoding of the read
1442
   *                                     data is UTF-8. You can specify a different encoding by creating a
1443
   *                                     custom context or by changing the default using
1444
   *                                     stream_default_encoding. This flag cannot be
1445
   *                                     used with FILE_BINARY.
1446
   *                                     </td>
1447
   *                                     </tr>
1448
   *                                     <tr valign="top">
1449
   *                                     <td>
1450
   *                                     FILE_BINARY
1451
   *                                     </td>
1452
   *                                     <td>
1453
   *                                     With this flag, the file is read in binary mode. This is the default
1454
   *                                     setting and cannot be used with FILE_TEXT.
1455
   *                                     </td>
1456
   *                                     </tr>
1457
   *                                     </table>
1458
   *                                     </p>
1459
   * @param resource|null $context       [optional] <p>
1460
   *                                     A valid context resource created with
1461
   *                                     stream_context_create. If you don't need to use a
1462
   *                                     custom context, you can skip this parameter by &null;.
1463
   *                                     </p>
1464
   * @param int|null $offset             [optional] <p>
1465
   *                                     The offset where the reading starts.
1466
   *                                     </p>
1467
   * @param int|null $maxLength          [optional] <p>
1468
   *                                     Maximum length of data read. The default is to read until end
1469
   *                                     of file is reached.
1470
   *                                     </p>
1471
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1472
   *
1473
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1474
   *                                     or pdf, because they used non default utf-8 chars</p>
1475
   *
1476
   * @return string <p>The function returns the read data or false on failure.</p>
1477
   */
1478 4
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1479
  {
1480
    // init
1481 4
    $timeout = (int)$timeout;
1482 4
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1483
1484 4
    if ($timeout && $context === null) {
1485 3
      $context = stream_context_create(
1486
          array(
1487
              'http' =>
1488
                  array(
1489 3
                      'timeout' => $timeout,
1490 3
                  ),
1491
          )
1492 3
      );
1493 3
    }
1494
1495 4
    if (!$flags) {
1496 4
      $flags = false;
1497 4
    }
1498
1499 4
    if ($offset === null) {
1500 4
      $offset = 0;
1501 4
    }
1502
1503 4
    if (is_int($maxLength) === true) {
1504 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1505 1
    } else {
1506 4
      $data = file_get_contents($filename, $flags, $context, $offset);
1507
    }
1508
1509
    // return false on error
1510 4
    if ($data === false) {
1511 1
      return false;
1512
    }
1513
1514 3
    if ($convertToUtf8 === true) {
1515 3
      $data = self::encode('UTF-8', $data, false);
1516 3
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1517 3
    }
1518
1519 3
    return $data;
1520
  }
1521
1522
  /**
1523
   * Checks if a file starts with BOM (Byte Order Mark) character.
1524
   *
1525
   * @param string $file_path <p>Path to a valid file.</p>
1526
   *
1527
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1528
   */
1529 1
  public static function file_has_bom($file_path)
1530
  {
1531 1
    return self::string_has_bom(file_get_contents($file_path));
1532
  }
1533
1534
  /**
1535
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1536
   *
1537
   * @param mixed  $var
1538
   * @param int    $normalization_form
1539
   * @param string $leading_combining
1540
   *
1541
   * @return mixed
1542
   */
1543 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1544
  {
1545 9
    switch (gettype($var)) {
1546 9 View Code Duplication
      case 'array':
1547 3
        foreach ($var as $k => $v) {
1548
          /** @noinspection AlterInForeachInspection */
1549 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1550 3
        }
1551 3
        break;
1552 9 View Code Duplication
      case 'object':
1553 2
        foreach ($var as $k => $v) {
1554 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1555 2
        }
1556 2
        break;
1557 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1558
1559 8
        if (false !== strpos($var, "\r")) {
1560
          // Workaround https://bugs.php.net/65732
1561 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1562 2
        }
1563
1564 8
        if (self::is_ascii($var) === false) {
1565
          /** @noinspection PhpUndefinedClassInspection */
1566 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1567 6
            $n = '-';
1568 6
          } else {
1569
            /** @noinspection PhpUndefinedClassInspection */
1570 6
            $n = \Normalizer::normalize($var, $normalization_form);
1571
1572 6
            if (isset($n[0])) {
1573 3
              $var = $n;
1574 3
            } else {
1575 5
              $var = self::encode('UTF-8', $var);
1576
            }
1577
          }
1578
1579
          if (
1580 8
              $var[0] >= "\x80"
1581 8
              &&
1582 6
              isset($n[0], $leading_combining[0])
1583 8
              &&
1584 5
              preg_match('/^\p{Mn}/u', $var)
1585 8
          ) {
1586
            // Prevent leading combining chars
1587
            // for NFC-safe concatenations.
1588 2
            $var = $leading_combining . $var;
1589 2
          }
1590 8
        }
1591
1592 8
        break;
1593 9
    }
1594
1595 9
    return $var;
1596
  }
1597
1598
  /**
1599
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1600
   *
1601
   * Gets a specific external variable by name and optionally filters it
1602
   *
1603
   * @link  http://php.net/manual/en/function.filter-input.php
1604
   *
1605
   * @param int    $type          <p>
1606
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1607
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1608
   *                              <b>INPUT_ENV</b>.
1609
   *                              </p>
1610
   * @param string $variable_name <p>
1611
   *                              Name of a variable to get.
1612
   *                              </p>
1613
   * @param int    $filter        [optional] <p>
1614
   *                              The ID of the filter to apply. The
1615
   *                              manual page lists the available filters.
1616
   *                              </p>
1617
   * @param mixed  $options       [optional] <p>
1618
   *                              Associative array of options or bitwise disjunction of flags. If filter
1619
   *                              accepts options, flags can be provided in "flags" field of array.
1620
   *                              </p>
1621
   *
1622
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1623
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1624
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1625
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1626
   * @since 5.2.0
1627
   */
1628 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1629
  {
1630
    if (4 > func_num_args()) {
1631
      $var = filter_input($type, $variable_name, $filter);
1632
    } else {
1633
      $var = filter_input($type, $variable_name, $filter, $options);
1634
    }
1635
1636
    return self::filter($var);
1637
  }
1638
1639
  /**
1640
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1641
   *
1642
   * Gets external variables and optionally filters them
1643
   *
1644
   * @link  http://php.net/manual/en/function.filter-input-array.php
1645
   *
1646
   * @param int   $type       <p>
1647
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1648
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1649
   *                          <b>INPUT_ENV</b>.
1650
   *                          </p>
1651
   * @param mixed $definition [optional] <p>
1652
   *                          An array defining the arguments. A valid key is a string
1653
   *                          containing a variable name and a valid value is either a filter type, or an array
1654
   *                          optionally specifying the filter, flags and options. If the value is an
1655
   *                          array, valid keys are filter which specifies the
1656
   *                          filter type,
1657
   *                          flags which specifies any flags that apply to the
1658
   *                          filter, and options which specifies any options that
1659
   *                          apply to the filter. See the example below for a better understanding.
1660
   *                          </p>
1661
   *                          <p>
1662
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1663
   *                          input array are filtered by this filter.
1664
   *                          </p>
1665
   * @param bool  $add_empty  [optional] <p>
1666
   *                          Add missing keys as <b>NULL</b> to the return value.
1667
   *                          </p>
1668
   *
1669
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1670
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1671
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1672
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1673
   * fails.
1674
   * @since 5.2.0
1675
   */
1676 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1677
  {
1678
    if (2 > func_num_args()) {
1679
      $a = filter_input_array($type);
1680
    } else {
1681
      $a = filter_input_array($type, $definition, $add_empty);
1682
    }
1683
1684
    return self::filter($a);
1685
  }
1686
1687
  /**
1688
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1689
   *
1690
   * Filters a variable with a specified filter
1691
   *
1692
   * @link  http://php.net/manual/en/function.filter-var.php
1693
   *
1694
   * @param mixed $variable <p>
1695
   *                        Value to filter.
1696
   *                        </p>
1697
   * @param int   $filter   [optional] <p>
1698
   *                        The ID of the filter to apply. The
1699
   *                        manual page lists the available filters.
1700
   *                        </p>
1701
   * @param mixed $options  [optional] <p>
1702
   *                        Associative array of options or bitwise disjunction of flags. If filter
1703
   *                        accepts options, flags can be provided in "flags" field of array. For
1704
   *                        the "callback" filter, callable type should be passed. The
1705
   *                        callback must accept one argument, the value to be filtered, and return
1706
   *                        the value after filtering/sanitizing it.
1707
   *                        </p>
1708
   *                        <p>
1709
   *                        <code>
1710
   *                        // for filters that accept options, use this format
1711
   *                        $options = array(
1712
   *                        'options' => array(
1713
   *                        'default' => 3, // value to return if the filter fails
1714
   *                        // other options here
1715
   *                        'min_range' => 0
1716
   *                        ),
1717
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1718
   *                        );
1719
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1720
   *                        // for filter that only accept flags, you can pass them directly
1721
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1722
   *                        // for filter that only accept flags, you can also pass as an array
1723
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1724
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1725
   *                        // callback validate filter
1726
   *                        function foo($value)
1727
   *                        {
1728
   *                        // Expected format: Surname, GivenNames
1729
   *                        if (strpos($value, ", ") === false) return false;
1730
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1731
   *                        $empty = (empty($surname) || empty($givennames));
1732
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1733
   *                        if ($empty || $notstrings) {
1734
   *                        return false;
1735
   *                        } else {
1736
   *                        return $value;
1737
   *                        }
1738
   *                        }
1739
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1740
   *                        </code>
1741
   *                        </p>
1742
   *
1743
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1744
   * @since 5.2.0
1745
   */
1746 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1747
  {
1748 1
    if (3 > func_num_args()) {
1749 1
      $variable = filter_var($variable, $filter);
1750 1
    } else {
1751 1
      $variable = filter_var($variable, $filter, $options);
1752
    }
1753
1754 1
    return self::filter($variable);
1755
  }
1756
1757
  /**
1758
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1759
   *
1760
   * Gets multiple variables and optionally filters them
1761
   *
1762
   * @link  http://php.net/manual/en/function.filter-var-array.php
1763
   *
1764
   * @param array $data       <p>
1765
   *                          An array with string keys containing the data to filter.
1766
   *                          </p>
1767
   * @param mixed $definition [optional] <p>
1768
   *                          An array defining the arguments. A valid key is a string
1769
   *                          containing a variable name and a valid value is either a
1770
   *                          filter type, or an
1771
   *                          array optionally specifying the filter, flags and options.
1772
   *                          If the value is an array, valid keys are filter
1773
   *                          which specifies the filter type,
1774
   *                          flags which specifies any flags that apply to the
1775
   *                          filter, and options which specifies any options that
1776
   *                          apply to the filter. See the example below for a better understanding.
1777
   *                          </p>
1778
   *                          <p>
1779
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1780
   *                          input array are filtered by this filter.
1781
   *                          </p>
1782
   * @param bool  $add_empty  [optional] <p>
1783
   *                          Add missing keys as <b>NULL</b> to the return value.
1784
   *                          </p>
1785
   *
1786
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1787
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1788
   * the variable is not set.
1789
   * @since 5.2.0
1790
   */
1791 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1792
  {
1793 1
    if (2 > func_num_args()) {
1794 1
      $a = filter_var_array($data);
1795 1
    } else {
1796 1
      $a = filter_var_array($data, $definition, $add_empty);
1797
    }
1798
1799 1
    return self::filter($a);
1800
  }
1801
1802
  /**
1803
   * Check if the number of unicode characters are not more than the specified integer.
1804
   *
1805
   * @param string $str      The original string to be checked.
1806
   * @param int    $box_size The size in number of chars to be checked against string.
1807
   *
1808
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1809
   */
1810 1
  public static function fits_inside($str, $box_size)
1811
  {
1812 1
    return (self::strlen($str) <= $box_size);
1813
  }
1814
1815
  /**
1816
   * Try to fix simple broken UTF-8 strings.
1817
   *
1818
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1819
   *
1820
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1821
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1822
   * See: http://en.wikipedia.org/wiki/Windows-1252
1823
   *
1824
   * @param string $str <p>The input string</p>
1825
   *
1826
   * @return string
1827
   */
1828 26 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1829
  {
1830
    // init
1831 26
    $str = (string)$str;
1832
1833 26
    if (!isset($str[0])) {
1834 2
      return '';
1835
    }
1836
1837 26
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1838 26
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1839
1840 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1841 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1842 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1843 1
    }
1844
1845 26
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1846
  }
1847
1848
  /**
1849
   * Fix a double (or multiple) encoded UTF8 string.
1850
   *
1851
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1852
   *
1853
   * @return string|string[] <p>Will return the fixed input-"array" or
1854
   *                         the fixed input-"string".</p>
1855
   */
1856 1
  public static function fix_utf8($str)
1857
  {
1858 1
    if (is_array($str) === true) {
1859
1860
      /** @noinspection ForeachSourceInspection */
1861 1
      foreach ($str as $k => $v) {
1862
        /** @noinspection AlterInForeachInspection */
1863
        /** @noinspection OffsetOperationsInspection */
1864 1
        $str[$k] = self::fix_utf8($v);
1865 1
      }
1866
1867 1
      return $str;
1868
    }
1869
1870 1
    $last = '';
1871 1
    while ($last !== $str) {
1872 1
      $last = $str;
1873 1
      $str = self::to_utf8(
1874 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1873 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1875 1
      );
1876 1
    }
1877
1878 1
    return $str;
1879
  }
1880
1881
  /**
1882
   * Get character of a specific character.
1883
   *
1884
   * @param string $char
1885
   *
1886
   * @return string <p>'RTL' or 'LTR'</p>
1887
   */
1888 1
  public static function getCharDirection($char)
1889
  {
1890 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1891
      self::checkForSupport();
1892
    }
1893
1894 1
    if (self::$SUPPORT['intlChar'] === true) {
1895
      $tmpReturn = \IntlChar::charDirection($char);
1896
1897
      // from "IntlChar"-Class
1898
      $charDirection = array(
1899
          'RTL' => array(1, 13, 14, 15, 21),
1900
          'LTR' => array(0, 11, 12, 20),
1901
      );
1902
1903
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1904
        return 'LTR';
1905
      }
1906
1907
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1908
        return 'RTL';
1909
      }
1910
    }
1911
1912 1
    $c = static::chr_to_decimal($char);
1913
1914 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1915 1
      return 'LTR';
1916
    }
1917
1918 1
    if (0x85e >= $c) {
1919
1920 1
      if (0x5be === $c ||
1921 1
          0x5c0 === $c ||
1922 1
          0x5c3 === $c ||
1923 1
          0x5c6 === $c ||
1924 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1925 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1926 1
          0x608 === $c ||
1927 1
          0x60b === $c ||
1928 1
          0x60d === $c ||
1929 1
          0x61b === $c ||
1930 1
          (0x61e <= $c && 0x64a >= $c) ||
1931 1
          (0x66d <= $c && 0x66f >= $c) ||
1932 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1933 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1934 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1935 1
          (0x6fa <= $c && 0x70d >= $c) ||
1936 1
          0x710 === $c ||
1937 1
          (0x712 <= $c && 0x72f >= $c) ||
1938 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1939 1
          0x7b1 === $c ||
1940 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1941 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1942 1
          0x7fa === $c ||
1943 1
          (0x800 <= $c && 0x815 >= $c) ||
1944 1
          0x81a === $c ||
1945 1
          0x824 === $c ||
1946 1
          0x828 === $c ||
1947 1
          (0x830 <= $c && 0x83e >= $c) ||
1948 1
          (0x840 <= $c && 0x858 >= $c) ||
1949
          0x85e === $c
1950 1
      ) {
1951 1
        return 'RTL';
1952
      }
1953
1954 1
    } elseif (0x200f === $c) {
1955
1956
      return 'RTL';
1957
1958 1
    } elseif (0xfb1d <= $c) {
1959
1960 1
      if (0xfb1d === $c ||
1961 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1962 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1963 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1964 1
          0xfb3e === $c ||
1965 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1966 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1967 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1968 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1969 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1970 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1971 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1972 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1973 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1974 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1975 1
          0x10808 === $c ||
1976 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1977 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1978 1
          0x1083c === $c ||
1979 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1980 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1981 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1982 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1983 1
          0x1093f === $c ||
1984 1
          0x10a00 === $c ||
1985 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1986 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1987 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1988 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1989 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1990 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1991 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1992 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1993 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1994
          (0x10b78 <= $c && 0x10b7f >= $c)
1995 1
      ) {
1996 1
        return 'RTL';
1997
      }
1998
    }
1999
2000 1
    return 'LTR';
2001
  }
2002
2003
  /**
2004
   * get data from "/data/*.ser"
2005
   *
2006
   * @param string $file
2007
   *
2008
   * @return bool|string|array|int <p>Will return false on error.</p>
2009
   */
2010 4
  private static function getData($file)
2011
  {
2012 4
    $file = __DIR__ . '/data/' . $file . '.php';
2013 4
    if (file_exists($file)) {
2014
      /** @noinspection PhpIncludeInspection */
2015 4
      return require $file;
2016
    }
2017
2018 1
    return false;
2019
  }
2020
2021
  /**
2022
   * Check for php-support.
2023
   *
2024
   * @param string|null $key
2025
   *
2026
   * @return mixed <p>Return the full support-"array", if $key === null<br />
2027
   *               return bool-value, if $key is used and available<br />
2028
   *               otherwise return null</p>
2029
   */
2030 7
  public static function getSupportInfo($key = null)
2031
  {
2032 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2033
      self::checkForSupport();
2034
    }
2035
2036 7
    if ($key === null) {
2037 2
      return self::$SUPPORT;
2038
    }
2039
2040 5
    if (!isset(self::$SUPPORT[$key])) {
2041
      return null;
2042
    }
2043
2044 5
    return self::$SUPPORT[$key];
2045
  }
2046
2047
  /**
2048
   * alias for "UTF8::string_has_bom()"
2049
   *
2050
   * @see UTF8::string_has_bom()
2051
   *
2052
   * @param string $str
2053
   *
2054
   * @return bool
2055
   *
2056
   * @deprecated
2057
   */
2058
  public static function hasBom($str)
2059
  {
2060
    return self::string_has_bom($str);
2061
  }
2062
2063
  /**
2064
   * Converts a hexadecimal-value into an UTF-8 character.
2065
   *
2066
   * @param string $hexdec <p>The hexadecimal value.</p>
2067
   *
2068
   * @return string|false <p>One single UTF-8 character.</p>
2069
   */
2070 2
  public static function hex_to_chr($hexdec)
2071
  {
2072 2
    return self::decimal_to_chr(hexdec($hexdec));
2073
  }
2074
2075
  /**
2076
   * Converts hexadecimal U+xxxx code point representation to integer.
2077
   *
2078
   * INFO: opposite to UTF8::int_to_hex()
2079
   *
2080
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2081
   *
2082
   * @return int|false <p>The code point, or false on failure.</p>
2083
   */
2084 1
  public static function hex_to_int($hexDec)
2085
  {
2086 1
    $hexDec = (string)$hexDec;
2087
2088 1
    if (!isset($hexDec[0])) {
2089 1
      return false;
2090
    }
2091
2092 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2093 1
      return intval($match[1], 16);
2094
    }
2095
2096 1
    return false;
2097
  }
2098
2099
  /**
2100
   * alias for "UTF8::html_entity_decode()"
2101
   *
2102
   * @see UTF8::html_entity_decode()
2103
   *
2104
   * @param string $str
2105
   * @param int    $flags
2106
   * @param string $encoding
2107
   *
2108
   * @return string
2109
   */
2110 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2111
  {
2112 1
    return self::html_entity_decode($str, $flags, $encoding);
2113
  }
2114
2115
  /**
2116
   * Converts a UTF-8 string to a series of HTML numbered entities.
2117
   *
2118
   * INFO: opposite to UTF8::html_decode()
2119
   *
2120
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2121
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2122
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2123
   *
2124
   * @return string <p>HTML numbered entities.</p>
2125
   */
2126 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2127
  {
2128
    // init
2129 2
    $str = (string)$str;
2130
2131 2
    if (!isset($str[0])) {
2132 1
      return '';
2133
    }
2134
2135 2
    if ($encoding !== 'UTF-8') {
2136 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2137 1
    }
2138
2139
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2140 2
    if (function_exists('mb_encode_numericentity')) {
2141
2142 2
      $startCode = 0x00;
2143 2
      if ($keepAsciiChars === true) {
2144 1
        $startCode = 0x80;
2145 1
      }
2146
2147 2
      return mb_encode_numericentity(
2148 2
          $str,
2149 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2150
          $encoding
2151 2
      );
2152
    }
2153
2154
    return implode(
2155
        '',
2156
        array_map(
2157
            function ($data) use ($keepAsciiChars, $encoding) {
2158
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2159
            },
2160
            self::split($str)
2161
        )
2162
    );
2163
  }
2164
2165
  /**
2166
   * UTF-8 version of html_entity_decode()
2167
   *
2168
   * The reason we are not using html_entity_decode() by itself is because
2169
   * while it is not technically correct to leave out the semicolon
2170
   * at the end of an entity most browsers will still interpret the entity
2171
   * correctly. html_entity_decode() does not convert entities without
2172
   * semicolons, so we are left with our own little solution here. Bummer.
2173
   *
2174
   * Convert all HTML entities to their applicable characters
2175
   *
2176
   * INFO: opposite to UTF8::html_encode()
2177
   *
2178
   * @link http://php.net/manual/en/function.html-entity-decode.php
2179
   *
2180
   * @param string $str      <p>
2181
   *                         The input string.
2182
   *                         </p>
2183
   * @param int    $flags    [optional] <p>
2184
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2185
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2186
   *                         <table>
2187
   *                         Available <i>flags</i> constants
2188
   *                         <tr valign="top">
2189
   *                         <td>Constant Name</td>
2190
   *                         <td>Description</td>
2191
   *                         </tr>
2192
   *                         <tr valign="top">
2193
   *                         <td><b>ENT_COMPAT</b></td>
2194
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2195
   *                         </tr>
2196
   *                         <tr valign="top">
2197
   *                         <td><b>ENT_QUOTES</b></td>
2198
   *                         <td>Will convert both double and single quotes.</td>
2199
   *                         </tr>
2200
   *                         <tr valign="top">
2201
   *                         <td><b>ENT_NOQUOTES</b></td>
2202
   *                         <td>Will leave both double and single quotes unconverted.</td>
2203
   *                         </tr>
2204
   *                         <tr valign="top">
2205
   *                         <td><b>ENT_HTML401</b></td>
2206
   *                         <td>
2207
   *                         Handle code as HTML 4.01.
2208
   *                         </td>
2209
   *                         </tr>
2210
   *                         <tr valign="top">
2211
   *                         <td><b>ENT_XML1</b></td>
2212
   *                         <td>
2213
   *                         Handle code as XML 1.
2214
   *                         </td>
2215
   *                         </tr>
2216
   *                         <tr valign="top">
2217
   *                         <td><b>ENT_XHTML</b></td>
2218
   *                         <td>
2219
   *                         Handle code as XHTML.
2220
   *                         </td>
2221
   *                         </tr>
2222
   *                         <tr valign="top">
2223
   *                         <td><b>ENT_HTML5</b></td>
2224
   *                         <td>
2225
   *                         Handle code as HTML 5.
2226
   *                         </td>
2227
   *                         </tr>
2228
   *                         </table>
2229
   *                         </p>
2230
   * @param string $encoding [optional] <p>Encoding to use.</p>
2231
   *
2232
   * @return string <p>The decoded string.</p>
2233
   */
2234 16
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2235
  {
2236
    // init
2237 16
    $str = (string)$str;
2238
2239 16
    if (!isset($str[0])) {
2240 6
      return '';
2241
    }
2242
2243 16
    if (!isset($str[3])) { // examples: &; || &x;
2244 9
      return $str;
2245
    }
2246
2247
    if (
2248 16
        strpos($str, '&') === false
2249 16
        ||
2250
        (
2251 16
            strpos($str, '&#') === false
2252 16
            &&
2253 10
            strpos($str, ';') === false
2254 10
        )
2255 16
    ) {
2256 9
      return $str;
2257
    }
2258
2259 16
    if ($encoding !== 'UTF-8') {
2260 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2261 2
    }
2262
2263 16
    if ($flags === null) {
2264 5
      if (Bootup::is_php('5.4') === true) {
2265 5
        $flags = ENT_QUOTES | ENT_HTML5;
2266 5
      } else {
2267
        $flags = ENT_QUOTES;
2268
      }
2269 5
    }
2270
2271
    do {
2272 16
      $str_compare = $str;
2273
2274 16
      $str = preg_replace_callback(
2275 16
          "/&#\d{2,6};/",
2276
          function ($matches) use ($encoding) {
2277 14
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2278
2279 14
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2280 13
              return $returnTmp;
2281
            }
2282
2283 7
            return $matches[0];
2284 16
          },
2285
          $str
2286 16
      );
2287
2288
      // decode numeric & UTF16 two byte entities
2289 16
      $str = html_entity_decode(
2290 16
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2291 16
          $flags,
2292
          $encoding
2293 16
      );
2294
2295 16
    } while ($str_compare !== $str);
2296
2297 16
    return $str;
2298
  }
2299
2300
  /**
2301
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2302
   *
2303
   * @link http://php.net/manual/en/function.htmlentities.php
2304
   *
2305
   * @param string $str           <p>
2306
   *                              The input string.
2307
   *                              </p>
2308
   * @param int    $flags         [optional] <p>
2309
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2310
   *                              invalid code unit sequences and the used document type. The default is
2311
   *                              ENT_COMPAT | ENT_HTML401.
2312
   *                              <table>
2313
   *                              Available <i>flags</i> constants
2314
   *                              <tr valign="top">
2315
   *                              <td>Constant Name</td>
2316
   *                              <td>Description</td>
2317
   *                              </tr>
2318
   *                              <tr valign="top">
2319
   *                              <td><b>ENT_COMPAT</b></td>
2320
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2321
   *                              </tr>
2322
   *                              <tr valign="top">
2323
   *                              <td><b>ENT_QUOTES</b></td>
2324
   *                              <td>Will convert both double and single quotes.</td>
2325
   *                              </tr>
2326
   *                              <tr valign="top">
2327
   *                              <td><b>ENT_NOQUOTES</b></td>
2328
   *                              <td>Will leave both double and single quotes unconverted.</td>
2329
   *                              </tr>
2330
   *                              <tr valign="top">
2331
   *                              <td><b>ENT_IGNORE</b></td>
2332
   *                              <td>
2333
   *                              Silently discard invalid code unit sequences instead of returning
2334
   *                              an empty string. Using this flag is discouraged as it
2335
   *                              may have security implications.
2336
   *                              </td>
2337
   *                              </tr>
2338
   *                              <tr valign="top">
2339
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2340
   *                              <td>
2341
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2342
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2343
   *                              </td>
2344
   *                              </tr>
2345
   *                              <tr valign="top">
2346
   *                              <td><b>ENT_DISALLOWED</b></td>
2347
   *                              <td>
2348
   *                              Replace invalid code points for the given document type with a
2349
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2350
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2351
   *                              instance, to ensure the well-formedness of XML documents with
2352
   *                              embedded external content.
2353
   *                              </td>
2354
   *                              </tr>
2355
   *                              <tr valign="top">
2356
   *                              <td><b>ENT_HTML401</b></td>
2357
   *                              <td>
2358
   *                              Handle code as HTML 4.01.
2359
   *                              </td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_XML1</b></td>
2363
   *                              <td>
2364
   *                              Handle code as XML 1.
2365
   *                              </td>
2366
   *                              </tr>
2367
   *                              <tr valign="top">
2368
   *                              <td><b>ENT_XHTML</b></td>
2369
   *                              <td>
2370
   *                              Handle code as XHTML.
2371
   *                              </td>
2372
   *                              </tr>
2373
   *                              <tr valign="top">
2374
   *                              <td><b>ENT_HTML5</b></td>
2375
   *                              <td>
2376
   *                              Handle code as HTML 5.
2377
   *                              </td>
2378
   *                              </tr>
2379
   *                              </table>
2380
   *                              </p>
2381
   * @param string $encoding      [optional] <p>
2382
   *                              Like <b>htmlspecialchars</b>,
2383
   *                              <b>htmlentities</b> takes an optional third argument
2384
   *                              <i>encoding</i> which defines encoding used in
2385
   *                              conversion.
2386
   *                              Although this argument is technically optional, you are highly
2387
   *                              encouraged to specify the correct value for your code.
2388
   *                              </p>
2389
   * @param bool   $double_encode [optional] <p>
2390
   *                              When <i>double_encode</i> is turned off PHP will not
2391
   *                              encode existing html entities. The default is to convert everything.
2392
   *                              </p>
2393
   *
2394
   *
2395
   * @return string the encoded string.
2396
   * </p>
2397
   * <p>
2398
   * If the input <i>string</i> contains an invalid code unit
2399
   * sequence within the given <i>encoding</i> an empty string
2400
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2401
   * <b>ENT_SUBSTITUTE</b> flags are set.
2402
   */
2403 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2404
  {
2405 2
    if ($encoding !== 'UTF-8') {
2406 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2407 1
    }
2408
2409 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2410
2411 2
    if ($encoding !== 'UTF-8') {
2412 1
      return $str;
2413
    }
2414
2415 2
    $byteLengths = self::chr_size_list($str);
2416 2
    $search = array();
2417 2
    $replacements = array();
2418 2
    foreach ($byteLengths as $counter => $byteLength) {
2419 2
      if ($byteLength >= 3) {
2420 1
        $char = self::access($str, $counter);
2421
2422 1
        if (!isset($replacements[$char])) {
2423 1
          $search[$char] = $char;
2424 1
          $replacements[$char] = self::html_encode($char);
2425 1
        }
2426 1
      }
2427 2
    }
2428
2429 2
    return str_replace($search, $replacements, $str);
2430
  }
2431
2432
  /**
2433
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2434
   *
2435
   * INFO: Take a look at "UTF8::htmlentities()"
2436
   *
2437
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2438
   *
2439
   * @param string $str           <p>
2440
   *                              The string being converted.
2441
   *                              </p>
2442
   * @param int    $flags         [optional] <p>
2443
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2444
   *                              invalid code unit sequences and the used document type. The default is
2445
   *                              ENT_COMPAT | ENT_HTML401.
2446
   *                              <table>
2447
   *                              Available <i>flags</i> constants
2448
   *                              <tr valign="top">
2449
   *                              <td>Constant Name</td>
2450
   *                              <td>Description</td>
2451
   *                              </tr>
2452
   *                              <tr valign="top">
2453
   *                              <td><b>ENT_COMPAT</b></td>
2454
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2455
   *                              </tr>
2456
   *                              <tr valign="top">
2457
   *                              <td><b>ENT_QUOTES</b></td>
2458
   *                              <td>Will convert both double and single quotes.</td>
2459
   *                              </tr>
2460
   *                              <tr valign="top">
2461
   *                              <td><b>ENT_NOQUOTES</b></td>
2462
   *                              <td>Will leave both double and single quotes unconverted.</td>
2463
   *                              </tr>
2464
   *                              <tr valign="top">
2465
   *                              <td><b>ENT_IGNORE</b></td>
2466
   *                              <td>
2467
   *                              Silently discard invalid code unit sequences instead of returning
2468
   *                              an empty string. Using this flag is discouraged as it
2469
   *                              may have security implications.
2470
   *                              </td>
2471
   *                              </tr>
2472
   *                              <tr valign="top">
2473
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2474
   *                              <td>
2475
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2476
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2477
   *                              </td>
2478
   *                              </tr>
2479
   *                              <tr valign="top">
2480
   *                              <td><b>ENT_DISALLOWED</b></td>
2481
   *                              <td>
2482
   *                              Replace invalid code points for the given document type with a
2483
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2484
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2485
   *                              instance, to ensure the well-formedness of XML documents with
2486
   *                              embedded external content.
2487
   *                              </td>
2488
   *                              </tr>
2489
   *                              <tr valign="top">
2490
   *                              <td><b>ENT_HTML401</b></td>
2491
   *                              <td>
2492
   *                              Handle code as HTML 4.01.
2493
   *                              </td>
2494
   *                              </tr>
2495
   *                              <tr valign="top">
2496
   *                              <td><b>ENT_XML1</b></td>
2497
   *                              <td>
2498
   *                              Handle code as XML 1.
2499
   *                              </td>
2500
   *                              </tr>
2501
   *                              <tr valign="top">
2502
   *                              <td><b>ENT_XHTML</b></td>
2503
   *                              <td>
2504
   *                              Handle code as XHTML.
2505
   *                              </td>
2506
   *                              </tr>
2507
   *                              <tr valign="top">
2508
   *                              <td><b>ENT_HTML5</b></td>
2509
   *                              <td>
2510
   *                              Handle code as HTML 5.
2511
   *                              </td>
2512
   *                              </tr>
2513
   *                              </table>
2514
   *                              </p>
2515
   * @param string $encoding      [optional] <p>
2516
   *                              Defines encoding used in conversion.
2517
   *                              </p>
2518
   *                              <p>
2519
   *                              For the purposes of this function, the encodings
2520
   *                              ISO-8859-1, ISO-8859-15,
2521
   *                              UTF-8, cp866,
2522
   *                              cp1251, cp1252, and
2523
   *                              KOI8-R are effectively equivalent, provided the
2524
   *                              <i>string</i> itself is valid for the encoding, as
2525
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2526
   *                              the same positions in all of these encodings.
2527
   *                              </p>
2528
   * @param bool   $double_encode [optional] <p>
2529
   *                              When <i>double_encode</i> is turned off PHP will not
2530
   *                              encode existing html entities, the default is to convert everything.
2531
   *                              </p>
2532
   *
2533
   * @return string The converted string.
2534
   * </p>
2535
   * <p>
2536
   * If the input <i>string</i> contains an invalid code unit
2537
   * sequence within the given <i>encoding</i> an empty string
2538
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2539
   * <b>ENT_SUBSTITUTE</b> flags are set.
2540
   */
2541 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2542
  {
2543 1
    if ($encoding !== 'UTF-8') {
2544 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2545 1
    }
2546
2547 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2548
  }
2549
2550
  /**
2551
   * Checks whether iconv is available on the server.
2552
   *
2553
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2554
   */
2555 1
  public static function iconv_loaded()
2556
  {
2557 1
    $return = extension_loaded('iconv') ? true : false;
2558
2559
    // INFO: "default_charset" is already set by the "Bootup"-class
2560
2561 1
    if (Bootup::is_php('5.6') === false) {
2562
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2563 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2564 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2565 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2566 1
    }
2567
2568 1
    return $return;
2569
  }
2570
2571
  /**
2572
   * alias for "UTF8::decimal_to_chr()"
2573
   *
2574
   * @see UTF8::decimal_to_chr()
2575
   *
2576
   * @param mixed $int
2577
   *
2578
   * @return string
2579
   */
2580 2
  public static function int_to_chr($int)
2581
  {
2582 2
    return self::decimal_to_chr($int);
2583
  }
2584
2585
  /**
2586
   * Converts Integer to hexadecimal U+xxxx code point representation.
2587
   *
2588
   * INFO: opposite to UTF8::hex_to_int()
2589
   *
2590
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2591
   * @param string $pfix [optional]
2592
   *
2593
   * @return string <p>The code point, or empty string on failure.</p>
2594
   */
2595 3
  public static function int_to_hex($int, $pfix = 'U+')
2596
  {
2597 3
    if ((int)$int === $int) {
2598 3
      $hex = dechex($int);
2599
2600 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2601
2602 3
      return $pfix . $hex;
2603
    }
2604
2605 1
    return '';
2606
  }
2607
2608
  /**
2609
   * Checks whether intl-char is available on the server.
2610
   *
2611
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2612
   */
2613 1
  public static function intlChar_loaded()
2614
  {
2615
    return (
2616 1
        Bootup::is_php('7.0') === true
2617 1
        &&
2618
        class_exists('IntlChar') === true
2619 1
    );
2620
  }
2621
2622
  /**
2623
   * Checks whether intl is available on the server.
2624
   *
2625
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2626
   */
2627 4
  public static function intl_loaded()
2628
  {
2629 4
    return extension_loaded('intl') ? true : false;
2630
  }
2631
2632
  /**
2633
   * alias for "UTF8::is_ascii()"
2634
   *
2635
   * @see UTF8::is_ascii()
2636
   *
2637
   * @param string $str
2638
   *
2639
   * @return boolean
2640
   *
2641
   * @deprecated
2642
   */
2643
  public static function isAscii($str)
2644
  {
2645
    return self::is_ascii($str);
2646
  }
2647
2648
  /**
2649
   * alias for "UTF8::is_base64()"
2650
   *
2651
   * @see UTF8::is_base64()
2652
   *
2653
   * @param string $str
2654
   *
2655
   * @return bool
2656
   *
2657
   * @deprecated
2658
   */
2659
  public static function isBase64($str)
2660
  {
2661
    return self::is_base64($str);
2662
  }
2663
2664
  /**
2665
   * alias for "UTF8::is_binary()"
2666
   *
2667
   * @see UTF8::is_binary()
2668
   *
2669
   * @param string $str
2670
   *
2671
   * @return bool
2672
   *
2673
   * @deprecated
2674
   */
2675
  public static function isBinary($str)
2676
  {
2677
    return self::is_binary($str);
2678
  }
2679
2680
  /**
2681
   * alias for "UTF8::is_bom()"
2682
   *
2683
   * @see UTF8::is_bom()
2684
   *
2685
   * @param string $utf8_chr
2686
   *
2687
   * @return boolean
2688
   *
2689
   * @deprecated
2690
   */
2691
  public static function isBom($utf8_chr)
2692
  {
2693
    return self::is_bom($utf8_chr);
2694
  }
2695
2696
  /**
2697
   * alias for "UTF8::is_html()"
2698
   *
2699
   * @see UTF8::is_html()
2700
   *
2701
   * @param string $str
2702
   *
2703
   * @return boolean
2704
   *
2705
   * @deprecated
2706
   */
2707
  public static function isHtml($str)
2708
  {
2709
    return self::is_html($str);
2710
  }
2711
2712
  /**
2713
   * alias for "UTF8::is_json()"
2714
   *
2715
   * @see UTF8::is_json()
2716
   *
2717
   * @param string $str
2718
   *
2719
   * @return bool
2720
   *
2721
   * @deprecated
2722
   */
2723
  public static function isJson($str)
2724
  {
2725
    return self::is_json($str);
2726
  }
2727
2728
  /**
2729
   * alias for "UTF8::is_utf16()"
2730
   *
2731
   * @see UTF8::is_utf16()
2732
   *
2733
   * @param string $str
2734
   *
2735
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2736
   *
2737
   * @deprecated
2738
   */
2739
  public static function isUtf16($str)
2740
  {
2741
    return self::is_utf16($str);
2742
  }
2743
2744
  /**
2745
   * alias for "UTF8::is_utf32()"
2746
   *
2747
   * @see UTF8::is_utf32()
2748
   *
2749
   * @param string $str
2750
   *
2751
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2752
   *
2753
   * @deprecated
2754
   */
2755
  public static function isUtf32($str)
2756
  {
2757
    return self::is_utf32($str);
2758
  }
2759
2760
  /**
2761
   * alias for "UTF8::is_utf8()"
2762
   *
2763
   * @see UTF8::is_utf8()
2764
   *
2765
   * @param string $str
2766
   * @param bool   $strict
2767
   *
2768
   * @return bool
2769
   *
2770
   * @deprecated
2771
   */
2772
  public static function isUtf8($str, $strict = false)
2773
  {
2774
    return self::is_utf8($str, $strict);
2775
  }
2776
2777
  /**
2778
   * Checks if a string is 7 bit ASCII.
2779
   *
2780
   * @param string $str <p>The string to check.</p>
2781
   *
2782
   * @return bool <p>
2783
   *              <strong>true</strong> if it is ASCII<br />
2784
   *              <strong>false</strong> otherwise
2785
   *              </p>
2786
   */
2787 42
  public static function is_ascii($str)
2788
  {
2789 42
    $str = (string)$str;
2790
2791 42
    if (!isset($str[0])) {
2792 6
      return true;
2793
    }
2794
2795 41
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2796
  }
2797
2798
  /**
2799
   * Returns true if the string is base64 encoded, false otherwise.
2800
   *
2801
   * @param string $str <p>The input string.</p>
2802
   *
2803
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2804
   */
2805 1
  public static function is_base64($str)
2806
  {
2807 1
    $str = (string)$str;
2808
2809 1
    if (!isset($str[0])) {
2810 1
      return false;
2811
    }
2812
2813 1
    $base64String = (string)base64_decode($str, true);
2814 1
    if ($base64String && base64_encode($base64String) === $str) {
2815 1
      return true;
2816
    }
2817
2818 1
    return false;
2819
  }
2820
2821
  /**
2822
   * Check if the input is binary... (is look like a hack).
2823
   *
2824
   * @param mixed $input
2825
   *
2826
   * @return bool
2827
   */
2828 18
  public static function is_binary($input)
2829
  {
2830 18
    $input = (string)$input;
2831
2832 18
    if (!isset($input[0])) {
2833 4
      return false;
2834
    }
2835
2836 18
    if (preg_match('~^[01]+$~', $input)) {
2837 4
      return true;
2838
    }
2839
2840 18
    $testLength = strlen($input);
2841 18
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2842 5
      return true;
2843
    }
2844
2845 17
    if (substr_count($input, "\x00") > 0) {
2846 1
      return true;
2847
    }
2848
2849 17
    return false;
2850
  }
2851
2852
  /**
2853
   * Check if the file is binary.
2854
   *
2855
   * @param string $file
2856
   *
2857
   * @return boolean
2858
   */
2859
  public static function is_binary_file($file)
2860
  {
2861
    try {
2862
      $fp = fopen($file, 'rb');
2863
      $block = fread($fp, 512);
2864
      fclose($fp);
2865
    } catch (\Exception $e) {
2866
      $block = '';
2867
    }
2868
2869
    return self::is_binary($block);
2870
  }
2871
2872
  /**
2873
   * Checks if the given string is equal to any "Byte Order Mark".
2874
   *
2875
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2876
   *
2877
   * @param string $str <p>The input string.</p>
2878
   *
2879
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2880
   */
2881 1
  public static function is_bom($str)
2882
  {
2883 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2884 1
      if ($str === $bomString) {
2885 1
        return true;
2886
      }
2887 1
    }
2888
2889 1
    return false;
2890
  }
2891
2892
  /**
2893
   * Check if the string contains any html-tags <lall>.
2894
   *
2895
   * @param string $str <p>The input string.</p>
2896
   *
2897
   * @return boolean
2898
   */
2899 1
  public static function is_html($str)
2900
  {
2901 1
    $str = (string)$str;
2902
2903 1
    if (!isset($str[0])) {
2904 1
      return false;
2905
    }
2906
2907
    // init
2908 1
    $matches = array();
2909
2910 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2911
2912 1
    if (count($matches) === 0) {
2913 1
      return false;
2914
    }
2915
2916 1
    return true;
2917
  }
2918
2919
  /**
2920
   * Try to check if "$str" is an json-string.
2921
   *
2922
   * @param string $str <p>The input string.</p>
2923
   *
2924
   * @return bool
2925
   */
2926 1
  public static function is_json($str)
2927
  {
2928 1
    $str = (string)$str;
2929
2930 1
    if (!isset($str[0])) {
2931
      return false;
2932
    }
2933
2934 1
    $json = self::json_decode($str);
2935
2936
    if (
2937
        (
2938 1
            is_object($json) === true
2939 1
            ||
2940 1
            is_array($json) === true
2941 1
        )
2942 1
        &&
2943 1
        json_last_error() === JSON_ERROR_NONE
2944 1
    ) {
2945 1
      return true;
2946
    }
2947
2948 1
    return false;
2949
  }
2950
2951
  /**
2952
   * Check if the string is UTF-16.
2953
   *
2954
   * @param string $str <p>The input string.</p>
2955
   *
2956
   * @return int|false <p>
2957
   *                   <strong>false</strong> if is't not UTF-16,<br />
2958
   *                   <strong>1</strong> for UTF-16LE,<br />
2959
   *                   <strong>2</strong> for UTF-16BE.
2960
   *                   </p>
2961
   */
2962 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2963
  {
2964 5
    $str = self::remove_bom($str);
2965
2966 5
    if (self::is_binary($str) === true) {
2967
2968 5
      $maybeUTF16LE = 0;
2969 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2970 5
      if ($test) {
2971 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2972 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2973 5
        if ($test3 === $test) {
2974 5
          $strChars = self::count_chars($str, true);
2975 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2976 4
            if (in_array($test3char, $strChars, true) === true) {
2977 2
              $maybeUTF16LE++;
2978 2
            }
2979 5
          }
2980 5
        }
2981 5
      }
2982
2983 5
      $maybeUTF16BE = 0;
2984 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2985 5
      if ($test) {
2986 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2987 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2988 5
        if ($test3 === $test) {
2989 5
          $strChars = self::count_chars($str, true);
2990 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2991 4
            if (in_array($test3char, $strChars, true) === true) {
2992 3
              $maybeUTF16BE++;
2993 3
            }
2994 5
          }
2995 5
        }
2996 5
      }
2997
2998 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
2999 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3000 2
          return 1;
3001
        }
3002
3003 3
        return 2;
3004
      }
3005
3006 3
    }
3007
3008 3
    return false;
3009
  }
3010
3011
  /**
3012
   * Check if the string is UTF-32.
3013
   *
3014
   * @param string $str
3015
   *
3016
   * @return int|false <p>
3017
   *                   <strong>false</strong> if is't not UTF-32,<br />
3018
   *                   <strong>1</strong> for UTF-32LE,<br />
3019
   *                   <strong>2</strong> for UTF-32BE.
3020
   *                   </p>
3021
   */
3022 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3023
  {
3024 3
    $str = self::remove_bom($str);
3025
3026 3
    if (self::is_binary($str) === true) {
3027
3028 3
      $maybeUTF32LE = 0;
3029 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3030 3
      if ($test) {
3031 3
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3032 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3033 3
        if ($test3 === $test) {
3034 3
          $strChars = self::count_chars($str, true);
3035 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3036 3
            if (in_array($test3char, $strChars, true) === true) {
3037 1
              $maybeUTF32LE++;
3038 1
            }
3039 3
          }
3040 3
        }
3041 3
      }
3042
3043 3
      $maybeUTF32BE = 0;
3044 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3045 3
      if ($test) {
3046 3
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3047 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3048 3
        if ($test3 === $test) {
3049 3
          $strChars = self::count_chars($str, true);
3050 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3051 3
            if (in_array($test3char, $strChars, true) === true) {
3052 1
              $maybeUTF32BE++;
3053 1
            }
3054 3
          }
3055 3
        }
3056 3
      }
3057
3058 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3059 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3060 1
          return 1;
3061
        }
3062
3063 1
        return 2;
3064
      }
3065
3066 3
    }
3067
3068 3
    return false;
3069
  }
3070
3071
  /**
3072
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3073
   *
3074
   * @see    http://hsivonen.iki.fi/php-utf8/
3075
   *
3076
   * @param string $str    <p>The string to be checked.</p>
3077
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3078
   *
3079
   * @return bool
3080
   */
3081 61
  public static function is_utf8($str, $strict = false)
3082
  {
3083 61
    $str = (string)$str;
3084
3085 61
    if (!isset($str[0])) {
3086 3
      return true;
3087
    }
3088
3089 59
    if ($strict === true) {
3090 1
      if (self::is_utf16($str) !== false) {
3091 1
        return false;
3092
      }
3093
3094
      if (self::is_utf32($str) !== false) {
3095
        return false;
3096
      }
3097
    }
3098
3099 59
    if (self::pcre_utf8_support() !== true) {
3100
3101
      // If even just the first character can be matched, when the /u
3102
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3103
      // invalid, nothing at all will match, even if the string contains
3104
      // some valid sequences
3105
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3106
    }
3107
3108 59
    $mState = 0; // cached expected number of octets after the current octet
3109
    // until the beginning of the next UTF8 character sequence
3110 59
    $mUcs4 = 0; // cached Unicode character
3111 59
    $mBytes = 1; // cached expected number of octets in the current sequence
3112
3113 59
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3114
      self::checkForSupport();
3115
    }
3116
3117 59 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3118
      $len = \mb_strlen($str, '8BIT');
3119
    } else {
3120 59
      $len = strlen($str);
3121
    }
3122
3123
    /** @noinspection ForeachInvariantsInspection */
3124 59
    for ($i = 0; $i < $len; $i++) {
3125 59
      $in = ord($str[$i]);
3126 59
      if ($mState === 0) {
3127
        // When mState is zero we expect either a US-ASCII character or a
3128
        // multi-octet sequence.
3129 59
        if (0 === (0x80 & $in)) {
3130
          // US-ASCII, pass straight through.
3131 52
          $mBytes = 1;
3132 59 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3133
          // First octet of 2 octet sequence.
3134 47
          $mUcs4 = $in;
3135 47
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3136 47
          $mState = 1;
3137 47
          $mBytes = 2;
3138 56
        } elseif (0xE0 === (0xF0 & $in)) {
3139
          // First octet of 3 octet sequence.
3140 30
          $mUcs4 = $in;
3141 30
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3142 30
          $mState = 2;
3143 30
          $mBytes = 3;
3144 47 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3145
          // First octet of 4 octet sequence.
3146 12
          $mUcs4 = $in;
3147 12
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3148 12
          $mState = 3;
3149 12
          $mBytes = 4;
3150 23
        } elseif (0xF8 === (0xFC & $in)) {
3151
          /* First octet of 5 octet sequence.
3152
          *
3153
          * This is illegal because the encoded codepoint must be either
3154
          * (a) not the shortest form or
3155
          * (b) outside the Unicode range of 0-0x10FFFF.
3156
          * Rather than trying to resynchronize, we will carry on until the end
3157
          * of the sequence and let the later error handling code catch it.
3158
          */
3159 4
          $mUcs4 = $in;
3160 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3161 4
          $mState = 4;
3162 4
          $mBytes = 5;
3163 12 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3164
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3165 4
          $mUcs4 = $in;
3166 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3167 4
          $mState = 5;
3168 4
          $mBytes = 6;
3169 4
        } else {
3170
          /* Current octet is neither in the US-ASCII range nor a legal first
3171
           * octet of a multi-octet sequence.
3172
           */
3173 6
          return false;
3174
        }
3175 58
      } else {
3176
        // When mState is non-zero, we expect a continuation of the multi-octet
3177
        // sequence
3178 52
        if (0x80 === (0xC0 & $in)) {
3179
          // Legal continuation.
3180 48
          $shift = ($mState - 1) * 6;
3181 48
          $tmp = $in;
3182 48
          $tmp = ($tmp & 0x0000003F) << $shift;
3183 48
          $mUcs4 |= $tmp;
3184
          /**
3185
           * End of the multi-octet sequence. mUcs4 now contains the final
3186
           * Unicode code point to be output
3187
           */
3188 48
          if (0 === --$mState) {
3189
            /*
3190
            * Check for illegal sequences and code points.
3191
            */
3192
            // From Unicode 3.1, non-shortest form is illegal
3193
            if (
3194 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3195 48
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3196 48
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3197 48
                (4 < $mBytes) ||
3198
                // From Unicode 3.2, surrogate characters are illegal.
3199 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3200
                // Code points outside the Unicode range are illegal.
3201 48
                ($mUcs4 > 0x10FFFF)
3202 48
            ) {
3203 7
              return false;
3204
            }
3205
            // initialize UTF8 cache
3206 48
            $mState = 0;
3207 48
            $mUcs4 = 0;
3208 48
            $mBytes = 1;
3209 48
          }
3210 48
        } else {
3211
          /**
3212
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3213
           * Incomplete multi-octet sequence.
3214
           */
3215 25
          return false;
3216
        }
3217
      }
3218 58
    }
3219
3220 28
    return true;
3221
  }
3222
3223
  /**
3224
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3225
   * Decodes a JSON string
3226
   *
3227
   * @link http://php.net/manual/en/function.json-decode.php
3228
   *
3229
   * @param string $json    <p>
3230
   *                        The <i>json</i> string being decoded.
3231
   *                        </p>
3232
   *                        <p>
3233
   *                        This function only works with UTF-8 encoded strings.
3234
   *                        </p>
3235
   *                        <p>PHP implements a superset of
3236
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3237
   *                        only supports these values when they are nested inside an array or an object.
3238
   *                        </p>
3239
   * @param bool   $assoc   [optional] <p>
3240
   *                        When <b>TRUE</b>, returned objects will be converted into
3241
   *                        associative arrays.
3242
   *                        </p>
3243
   * @param int    $depth   [optional] <p>
3244
   *                        User specified recursion depth.
3245
   *                        </p>
3246
   * @param int    $options [optional] <p>
3247
   *                        Bitmask of JSON decode options. Currently only
3248
   *                        <b>JSON_BIGINT_AS_STRING</b>
3249
   *                        is supported (default is to cast large integers as floats)
3250
   *                        </p>
3251
   *
3252
   * @return mixed the value encoded in <i>json</i> in appropriate
3253
   * PHP type. Values true, false and
3254
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3255
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3256
   * <i>json</i> cannot be decoded or if the encoded
3257
   * data is deeper than the recursion limit.
3258
   */
3259 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3260
  {
3261 2
    $json = (string)self::filter($json);
3262
3263 2
    if (Bootup::is_php('5.4') === true) {
3264 2
      $json = json_decode($json, $assoc, $depth, $options);
3265 2
    } else {
3266
      $json = json_decode($json, $assoc, $depth);
3267
    }
3268
3269 2
    return $json;
3270
  }
3271
3272
  /**
3273
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3274
   * Returns the JSON representation of a value.
3275
   *
3276
   * @link http://php.net/manual/en/function.json-encode.php
3277
   *
3278
   * @param mixed $value   <p>
3279
   *                       The <i>value</i> being encoded. Can be any type except
3280
   *                       a resource.
3281
   *                       </p>
3282
   *                       <p>
3283
   *                       All string data must be UTF-8 encoded.
3284
   *                       </p>
3285
   *                       <p>PHP implements a superset of
3286
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3287
   *                       only supports these values when they are nested inside an array or an object.
3288
   *                       </p>
3289
   * @param int   $options [optional] <p>
3290
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3291
   *                       <b>JSON_HEX_TAG</b>,
3292
   *                       <b>JSON_HEX_AMP</b>,
3293
   *                       <b>JSON_HEX_APOS</b>,
3294
   *                       <b>JSON_NUMERIC_CHECK</b>,
3295
   *                       <b>JSON_PRETTY_PRINT</b>,
3296
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3297
   *                       <b>JSON_FORCE_OBJECT</b>,
3298
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3299
   *                       constants is described on
3300
   *                       the JSON constants page.
3301
   *                       </p>
3302
   * @param int   $depth   [optional] <p>
3303
   *                       Set the maximum depth. Must be greater than zero.
3304
   *                       </p>
3305
   *
3306
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3307
   */
3308 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3309
  {
3310 2
    $value = self::filter($value);
3311
3312 2
    if (Bootup::is_php('5.5') === true) {
3313
      $json = json_encode($value, $options, $depth);
3314
    } else {
3315 2
      $json = json_encode($value, $options);
3316
    }
3317
3318 2
    return $json;
3319
  }
3320
3321
  /**
3322
   * Makes string's first char lowercase.
3323
   *
3324
   * @param string $str <p>The input string</p>
3325
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3326
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3327
   *
3328
   * @return string <p>The resulting string</p>
3329
   */
3330 7 View Code Duplication
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3331
  {
3332 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3333 7
    if ($strPartTwo === false) {
3334
      $strPartTwo = '';
3335
    }
3336
3337 7
    $strPartOne = self::strtolower(
3338 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3339 7
        $encoding,
3340
        $cleanUtf8
3341 7
    );
3342
3343 7
    return $strPartOne . $strPartTwo;
3344
  }
3345
3346
  /**
3347
   * alias for "UTF8::lcfirst()"
3348
   *
3349
   * @see UTF8::lcfirst()
3350
   *
3351
   * @param string  $word
3352
   * @param string  $encoding
3353
   * @param boolean $cleanUtf8
3354
   *
3355
   * @return string
3356
   */
3357 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3358
  {
3359 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3360
  }
3361
3362
  /**
3363
   * Lowercase for all words in the string.
3364
   *
3365
   * @param string   $str        <p>The input string.</p>
3366
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3367
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3368
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3369
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3370
   *
3371
   * @return string
3372
   */
3373 1 View Code Duplication
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3374
  {
3375 1
    if (!$str) {
3376 1
      return '';
3377
    }
3378
3379 1
    $words = self::str_to_words($str, $charlist);
3380 1
    $newWords = array();
3381
3382 1
    if (count($exceptions) > 0) {
3383 1
      $useExceptions = true;
3384 1
    } else {
3385 1
      $useExceptions = false;
3386
    }
3387
3388 1
    foreach ($words as $word) {
3389
3390 1
      if (!$word) {
3391 1
        continue;
3392
      }
3393
3394
      if (
3395
          $useExceptions === false
3396 1
          ||
3397
          (
3398
              $useExceptions === true
3399 1
              &&
3400 1
              !in_array($word, $exceptions, true)
3401 1
          )
3402 1
      ) {
3403 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3404 1
      }
3405
3406 1
      $newWords[] = $word;
3407 1
    }
3408
3409 1
    return implode('', $newWords);
3410
  }
3411
3412
  /**
3413
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3414
   *
3415
   * @param string $str   <p>The string to be trimmed</p>
3416
   * @param string $chars <p>Optional characters to be stripped</p>
3417
   *
3418
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3419
   */
3420 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3421
  {
3422 24
    $str = (string)$str;
3423
3424 24
    if (!isset($str[0])) {
3425 2
      return '';
3426
    }
3427
3428
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3429 23
    if ($chars === INF || !$chars) {
3430 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3431
    }
3432
3433 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3434
  }
3435
3436
  /**
3437
   * Returns the UTF-8 character with the maximum code point in the given data.
3438
   *
3439
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3440
   *
3441
   * @return string <p>The character with the highest code point than others.</p>
3442
   */
3443 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3444
  {
3445 1
    if (is_array($arg) === true) {
3446 1
      $arg = implode('', $arg);
3447 1
    }
3448
3449 1
    return self::chr(max(self::codepoints($arg)));
3450
  }
3451
3452
  /**
3453
   * Calculates and returns the maximum number of bytes taken by any
3454
   * UTF-8 encoded character in the given string.
3455
   *
3456
   * @param string $str <p>The original Unicode string.</p>
3457
   *
3458
   * @return int <p>Max byte lengths of the given chars.</p>
3459
   */
3460 1
  public static function max_chr_width($str)
3461
  {
3462 1
    $bytes = self::chr_size_list($str);
3463 1
    if (count($bytes) > 0) {
3464 1
      return (int)max($bytes);
3465
    }
3466
3467 1
    return 0;
3468
  }
3469
3470
  /**
3471
   * Checks whether mbstring is available on the server.
3472
   *
3473
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3474
   */
3475 15
  public static function mbstring_loaded()
3476
  {
3477 15
    $return = extension_loaded('mbstring') ? true : false;
3478
3479 15
    if ($return === true) {
3480 15
      \mb_internal_encoding('UTF-8');
3481 15
    }
3482
3483 15
    return $return;
3484
  }
3485
3486
  /**
3487
   * Returns the UTF-8 character with the minimum code point in the given data.
3488
   *
3489
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3490
   *
3491
   * @return string <p>The character with the lowest code point than others.</p>
3492
   */
3493 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3494
  {
3495 1
    if (is_array($arg) === true) {
3496 1
      $arg = implode('', $arg);
3497 1
    }
3498
3499 1
    return self::chr(min(self::codepoints($arg)));
3500
  }
3501
3502
  /**
3503
   * alias for "UTF8::normalize_encoding()"
3504
   *
3505
   * @see UTF8::normalize_encoding()
3506
   *
3507
   * @param string $encoding
3508
   * @param mixed  $fallback
3509
   *
3510
   * @return string
3511
   *
3512
   * @deprecated
3513
   */
3514
  public static function normalizeEncoding($encoding, $fallback = false)
3515
  {
3516
    return self::normalize_encoding($encoding, $fallback);
3517
  }
3518
3519
  /**
3520
   * Normalize the encoding-"name" input.
3521
   *
3522
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3523
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3524
   *
3525
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3526
   */
3527 76
  public static function normalize_encoding($encoding, $fallback = false)
3528
  {
3529 76
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3530
3531 76
    if (!$encoding) {
3532 2
      return $fallback;
3533
    }
3534
3535 76
    if ('UTF-8' === $encoding) {
3536 1
      return $encoding;
3537
    }
3538
3539 76
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3540 6
      return $encoding;
3541
    }
3542
3543 75
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3544 74
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3545
    }
3546
3547 5
    $encodingOrig = $encoding;
3548 5
    $encoding = strtoupper($encoding);
3549 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3550
3551
    $equivalences = array(
3552 5
        'ISO88591'    => 'ISO-8859-1',
3553 5
        'ISO8859'     => 'ISO-8859-1',
3554 5
        'ISO'         => 'ISO-8859-1',
3555 5
        'LATIN1'      => 'ISO-8859-1',
3556 5
        'LATIN'       => 'ISO-8859-1',
3557 5
        'WIN1252'     => 'ISO-8859-1',
3558 5
        'WINDOWS1252' => 'ISO-8859-1',
3559 5
        'UTF16'       => 'UTF-16',
3560 5
        'UTF32'       => 'UTF-32',
3561 5
        'UTF8'        => 'UTF-8',
3562 5
        'UTF'         => 'UTF-8',
3563 5
        'UTF7'        => 'UTF-7',
3564 5
        '8BIT'        => 'CP850',
3565 5
        'BINARY'      => 'CP850',
3566 5
    );
3567
3568 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3569 5
      $encoding = $equivalences[$encodingUpperHelper];
3570 5
    }
3571
3572 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3573
3574 5
    return $encoding;
3575
  }
3576
3577
  /**
3578
   * Normalize some MS Word special characters.
3579
   *
3580
   * @param string $str <p>The string to be normalized.</p>
3581
   *
3582
   * @return string
3583
   */
3584 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3585
  {
3586 16
    $str = (string)$str;
3587
3588 16
    if (!isset($str[0])) {
3589 1
      return '';
3590
    }
3591
3592 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3593 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3594
3595 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3596 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3597 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3598 1
    }
3599
3600 16
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3601
  }
3602
3603
  /**
3604
   * Normalize the whitespace.
3605
   *
3606
   * @param string $str                     <p>The string to be normalized.</p>
3607
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3608
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3609
   *                                        bidirectional text chars.</p>
3610
   *
3611
   * @return string
3612
   */
3613 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3614
  {
3615 37
    $str = (string)$str;
3616
3617 37
    if (!isset($str[0])) {
3618 4
      return '';
3619
    }
3620
3621 37
    static $WHITESPACE_CACHE = array();
3622 37
    $cacheKey = (int)$keepNonBreakingSpace;
3623
3624 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3625
3626 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3627
3628 2
      if ($keepNonBreakingSpace === true) {
3629
        /** @noinspection OffsetOperationsInspection */
3630 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3631 1
      }
3632
3633 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3634 2
    }
3635
3636 37
    if ($keepBidiUnicodeControls === false) {
3637 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3638
3639 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3640 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3641 1
      }
3642
3643 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3644 37
    }
3645
3646 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3647
  }
3648
3649
  /**
3650
   * Strip all whitespace characters. This includes tabs and newline
3651
   * characters, as well as multibyte whitespace such as the thin space
3652
   * and ideographic space.
3653
   *
3654
   * @param string $str
3655
   *
3656
   * @return string
3657
   */
3658 12
  public static function strip_whitespace($str)
3659
  {
3660 12
    $str = (string)$str;
3661
3662 12
    if (!isset($str[0])) {
3663 1
      return '';
3664
    }
3665
3666 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3667
  }
3668
3669
  /**
3670
   * Format a number with grouped thousands.
3671
   *
3672
   * @param float  $number
3673
   * @param int    $decimals
3674
   * @param string $dec_point
3675
   * @param string $thousands_sep
3676
   *
3677
   * @return string
3678
   *    *
3679
   * @deprecated Because this has nothing to do with UTF8. :/
3680
   */
3681
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3682
  {
3683
    $thousands_sep = (string)$thousands_sep;
3684
    $dec_point = (string)$dec_point;
3685
    $number = (float)$number;
3686
3687
    if (
3688
        isset($thousands_sep[1], $dec_point[1])
3689
        &&
3690
        Bootup::is_php('5.4') === true
3691
    ) {
3692
      return str_replace(
3693
          array(
3694
              '.',
3695
              ',',
3696
          ),
3697
          array(
3698
              $dec_point,
3699
              $thousands_sep,
3700
          ),
3701
          number_format($number, $decimals, '.', ',')
3702
      );
3703
    }
3704
3705
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3706
  }
3707
3708
  /**
3709
   * Calculates Unicode code point of the given UTF-8 encoded character.
3710
   *
3711
   * INFO: opposite to UTF8::chr()
3712
   *
3713
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3714
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3715
   *
3716
   * @return int <p>
3717
   *             Unicode code point of the given character,<br />
3718
   *             0 on invalid UTF-8 byte sequence.
3719
   *             </p>
3720
   */
3721 23
  public static function ord($chr, $encoding = 'UTF-8')
3722
  {
3723
3724 23
    if ($encoding !== 'UTF-8') {
3725 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3726
3727
      // check again, if it's still not UTF-8
3728
      /** @noinspection NotOptimalIfConditionsInspection */
3729 1
      if ($encoding !== 'UTF-8') {
3730 1
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3731 1
      }
3732 1
    }
3733
3734 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3735
      self::checkForSupport();
3736
    }
3737
3738 23
    if (self::$SUPPORT['intlChar'] === true) {
3739
      $tmpReturn = \IntlChar::ord($chr);
3740
      if ($tmpReturn) {
3741
        return $tmpReturn;
3742
      }
3743
    }
3744
3745
    // use static cache, if there is no support for "\IntlChar"
3746 23
    static $CHAR_CACHE = array();
3747 23
    if (isset($CHAR_CACHE[$chr]) === true) {
3748 23
      return $CHAR_CACHE[$chr];
3749
    }
3750
3751 10
    $chr_orig = $chr;
3752
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3753 10
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3754 10
    $code = $chr ? $chr[1] : 0;
3755
3756 10
    if (0xF0 <= $code && isset($chr[4])) {
3757 1
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3758
    }
3759
3760 10
    if (0xE0 <= $code && isset($chr[3])) {
3761 4
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3762
    }
3763
3764 10
    if (0xC0 <= $code && isset($chr[2])) {
3765 6
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3766
    }
3767
3768 10
    return $CHAR_CACHE[$chr_orig] = $code;
3769
  }
3770
3771
  /**
3772
   * Parses the string into an array (into the the second parameter).
3773
   *
3774
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3775
   *          if the second parameter is not set!
3776
   *
3777
   * @link http://php.net/manual/en/function.parse-str.php
3778
   *
3779
   * @param string  $str       <p>The input string.</p>
3780
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3781
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3782
   *
3783
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3784
   */
3785 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3786
  {
3787 1
    if ($cleanUtf8 === true) {
3788 1
      $str = self::clean($str);
3789 1
    }
3790
3791
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3792 1
    $return = \mb_parse_str($str, $result);
3793 1
    if ($return === false || empty($result)) {
3794 1
      return false;
3795
    }
3796
3797 1
    return true;
3798
  }
3799
3800
  /**
3801
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3802
   *
3803
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3804
   */
3805 59
  public static function pcre_utf8_support()
3806
  {
3807
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3808
    /** @noinspection UsageOfSilenceOperatorInspection */
3809 59
    return (bool)@preg_match('//u', '');
3810
  }
3811
3812
  /**
3813
   * Create an array containing a range of UTF-8 characters.
3814
   *
3815
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3816
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3817
   *
3818
   * @return array
3819
   */
3820 1
  public static function range($var1, $var2)
3821
  {
3822 1
    if (!$var1 || !$var2) {
3823 1
      return array();
3824
    }
3825
3826 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3827 1
      $start = (int)$var1;
3828 1
    } elseif (ctype_xdigit($var1)) {
3829
      $start = (int)self::hex_to_int($var1);
3830
    } else {
3831 1
      $start = self::ord($var1);
3832
    }
3833
3834 1
    if (!$start) {
3835
      return array();
3836
    }
3837
3838 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3839 1
      $end = (int)$var2;
3840 1
    } elseif (ctype_xdigit($var2)) {
3841
      $end = (int)self::hex_to_int($var2);
3842
    } else {
3843 1
      $end = self::ord($var2);
3844
    }
3845
3846 1
    if (!$end) {
3847
      return array();
3848
    }
3849
3850 1
    return array_map(
3851
        array(
3852 1
            '\\voku\\helper\\UTF8',
3853 1
            'chr',
3854 1
        ),
3855 1
        range($start, $end)
3856 1
    );
3857
  }
3858
3859
  /**
3860
   * Multi decode html entity & fix urlencoded-win1252-chars.
3861
   *
3862
   * e.g:
3863
   * 'test+test'                     => 'test+test'
3864
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3865
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3866
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3867
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3868
   * 'Düsseldorf'                   => 'Düsseldorf'
3869
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3870
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3871
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3872
   *
3873
   * @param string $str          <p>The input string.</p>
3874
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3875
   *
3876
   * @return string
3877
   */
3878 1 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3879
  {
3880 1
    $str = (string)$str;
3881
3882 1
    if (!isset($str[0])) {
3883 1
      return '';
3884
    }
3885
3886 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
3887 1
    if (preg_match($pattern, $str)) {
3888 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3889 1
    }
3890
3891 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3892
3893
    do {
3894 1
      $str_compare = $str;
3895
3896 1
      $str = self::fix_simple_utf8(
3897 1
          rawurldecode(
3898 1
              self::html_entity_decode(
3899 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3900
                  $flags
3901 1
              )
3902 1
          )
3903 1
      );
3904
3905 1
    } while ($multi_decode === true && $str_compare !== $str);
3906
3907 1
    return (string)$str;
3908
  }
3909
3910
  /**
3911
   * alias for "UTF8::remove_bom()"
3912
   *
3913
   * @see UTF8::remove_bom()
3914
   *
3915
   * @param string $str
3916
   *
3917
   * @return string
3918
   *
3919
   * @deprecated
3920
   */
3921
  public static function removeBOM($str)
3922
  {
3923
    return self::remove_bom($str);
3924
  }
3925
3926
  /**
3927
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3928
   *
3929
   * @param string $str <p>The input string.</p>
3930
   *
3931
   * @return string <p>String without UTF-BOM</p>
3932
   */
3933 40
  public static function remove_bom($str)
3934
  {
3935 40
    $str = (string)$str;
3936
3937 40
    if (!isset($str[0])) {
3938 5
      return '';
3939
    }
3940
3941 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
3942 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3943 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3944 5
        if ($strTmp === false) {
3945
          $strTmp = '';
3946
        }
3947 5
        $str = (string)$strTmp;
3948 5
      }
3949 40
    }
3950
3951 40
    return $str;
3952
  }
3953
3954
  /**
3955
   * Removes duplicate occurrences of a string in another string.
3956
   *
3957
   * @param string          $str  <p>The base string.</p>
3958
   * @param string|string[] $what <p>String to search for in the base string.</p>
3959
   *
3960
   * @return string <p>The result string with removed duplicates.</p>
3961
   */
3962 1
  public static function remove_duplicates($str, $what = ' ')
3963
  {
3964 1
    if (is_string($what) === true) {
3965 1
      $what = array($what);
3966 1
    }
3967
3968 1
    if (is_array($what) === true) {
3969
      /** @noinspection ForeachSourceInspection */
3970 1
      foreach ($what as $item) {
3971 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3972 1
      }
3973 1
    }
3974
3975 1
    return $str;
3976
  }
3977
3978
  /**
3979
   * Remove invisible characters from a string.
3980
   *
3981
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3982
   *
3983
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3984
   *
3985
   * @param string $str
3986
   * @param bool   $url_encoded
3987
   * @param string $replacement
3988
   *
3989
   * @return string
3990
   */
3991 57
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
3992
  {
3993
    // init
3994 57
    $non_displayables = array();
3995
3996
    // every control character except newline (dec 10),
3997
    // carriage return (dec 13) and horizontal tab (dec 09)
3998 57
    if ($url_encoded) {
3999 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4000 57
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4001 57
    }
4002
4003 57
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4004
4005
    do {
4006 57
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4007 57
    } while ($count !== 0);
4008
4009 57
    return $str;
4010
  }
4011
4012
  /**
4013
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4014
   *
4015
   * @param string $str                <p>The input string</p>
4016
   * @param string $replacementChar    <p>The replacement character.</p>
4017
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4018
   *
4019
   * @return string
4020
   */
4021 57
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4022
  {
4023 57
    $str = (string)$str;
4024
4025 57
    if (!isset($str[0])) {
4026 4
      return '';
4027
    }
4028
4029 57
    if ($processInvalidUtf8 === true) {
4030 57
      $replacementCharHelper = $replacementChar;
4031 57
      if ($replacementChar === '') {
4032 57
        $replacementCharHelper = 'none';
4033 57
      }
4034
4035 57
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4036
        self::checkForSupport();
4037
      }
4038
4039 57
      $save = \mb_substitute_character();
4040 57
      \mb_substitute_character($replacementCharHelper);
4041
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4042 57
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4043 57
      \mb_substitute_character($save);
4044 57
    }
4045
4046 57
    return str_replace(
4047
        array(
4048 57
            "\xEF\xBF\xBD",
4049 57
            '�',
4050 57
        ),
4051
        array(
4052 57
            $replacementChar,
4053 57
            $replacementChar,
4054 57
        ),
4055
        $str
4056 57
    );
4057
  }
4058
4059
  /**
4060
   * Strip whitespace or other characters from end of a UTF-8 string.
4061
   *
4062
   * @param string $str   <p>The string to be trimmed.</p>
4063
   * @param string $chars <p>Optional characters to be stripped.</p>
4064
   *
4065
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4066
   */
4067 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4068
  {
4069 23
    $str = (string)$str;
4070
4071 23
    if (!isset($str[0])) {
4072 5
      return '';
4073
    }
4074
4075
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4076 19
    if ($chars === INF || !$chars) {
4077 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4078
    }
4079
4080 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4081
  }
4082
4083
  /**
4084
   * rxClass
4085
   *
4086
   * @param string $s
4087
   * @param string $class
4088
   *
4089
   * @return string
4090
   */
4091 60
  private static function rxClass($s, $class = '')
4092
  {
4093 60
    static $RX_CLASSS_CACHE = array();
4094
4095 60
    $cacheKey = $s . $class;
4096
4097 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4098 48
      return $RX_CLASSS_CACHE[$cacheKey];
4099
    }
4100
4101
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4102 19
    $class = array($class);
4103
4104
    /** @noinspection SuspiciousLoopInspection */
4105 19
    foreach (self::str_split($s) as $s) {
4106 18
      if ('-' === $s) {
4107
        $class[0] = '-' . $class[0];
4108 18
      } elseif (!isset($s[2])) {
4109 18
        $class[0] .= preg_quote($s, '/');
4110 18
      } elseif (1 === self::strlen($s)) {
4111 2
        $class[0] .= $s;
4112 2
      } else {
4113
        $class[] = $s;
4114
      }
4115 19
    }
4116
4117 19
    if ($class[0]) {
4118 19
      $class[0] = '[' . $class[0] . ']';
4119 19
    }
4120
4121 19
    if (1 === count($class)) {
4122 19
      $return = $class[0];
4123 19
    } else {
4124
      $return = '(?:' . implode('|', $class) . ')';
4125
    }
4126
4127 19
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4128
4129 19
    return $return;
4130
  }
4131
4132
  /**
4133
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4134
   */
4135
  public static function showSupport()
4136
  {
4137
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4138
      self::checkForSupport();
4139
    }
4140
4141
    foreach (self::$SUPPORT as $utf8Support) {
4142
      echo $utf8Support . "\n<br>";
4143
    }
4144
  }
4145
4146
  /**
4147
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4148
   *
4149
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4150
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4151
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4152
   *
4153
   * @return string <p>The HTML numbered entity.</p>
4154
   */
4155 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4156
  {
4157 1
    $char = (string)$char;
4158
4159 1
    if (!isset($char[0])) {
4160 1
      return '';
4161
    }
4162
4163
    if (
4164
        $keepAsciiChars === true
4165 1
        &&
4166 1
        self::is_ascii($char) === true
4167 1
    ) {
4168 1
      return $char;
4169
    }
4170
4171 1
    if ($encoding !== 'UTF-8') {
4172
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4173
    }
4174
4175 1
    return '&#' . self::ord($char, $encoding) . ';';
4176
  }
4177
4178
  /**
4179
   * Convert a string to an array of Unicode characters.
4180
   *
4181
   * @param string  $str       <p>The string to split into array.</p>
4182
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4183
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4184
   *
4185
   * @return string[] <p>An array containing chunks of the string.</p>
4186
   */
4187 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4188
  {
4189 39
    $str = (string)$str;
4190
4191 39
    if (!isset($str[0])) {
4192 3
      return array();
4193
    }
4194
4195
    // init
4196 38
    $ret = array();
4197
4198 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4199
      self::checkForSupport();
4200
    }
4201
4202 38
    if ($cleanUtf8 === true) {
4203 7
      $str = self::clean($str);
4204 7
    }
4205
4206 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4207
4208 38
      preg_match_all('/./us', $str, $retArray);
4209 38
      if (isset($retArray[0])) {
4210 38
        $ret = $retArray[0];
4211 38
      }
4212 38
      unset($retArray);
4213
4214 38
    } else {
4215
4216
      // fallback
4217
4218
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4219
        self::checkForSupport();
4220
      }
4221
4222 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4223
        $len = \mb_strlen($str, '8BIT');
4224
      } else {
4225
        $len = strlen($str);
4226
      }
4227
4228
      /** @noinspection ForeachInvariantsInspection */
4229
      for ($i = 0; $i < $len; $i++) {
4230
4231
        if (($str[$i] & "\x80") === "\x00") {
4232
4233
          $ret[] = $str[$i];
4234
4235
        } elseif (
4236
            isset($str[$i + 1])
4237
            &&
4238
            ($str[$i] & "\xE0") === "\xC0"
4239
        ) {
4240
4241
          if (($str[$i + 1] & "\xC0") === "\x80") {
4242
            $ret[] = $str[$i] . $str[$i + 1];
4243
4244
            $i++;
4245
          }
4246
4247 View Code Duplication
        } elseif (
4248
            isset($str[$i + 2])
4249
            &&
4250
            ($str[$i] & "\xF0") === "\xE0"
4251
        ) {
4252
4253
          if (
4254
              ($str[$i + 1] & "\xC0") === "\x80"
4255
              &&
4256
              ($str[$i + 2] & "\xC0") === "\x80"
4257
          ) {
4258
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4259
4260
            $i += 2;
4261
          }
4262
4263
        } elseif (
4264
            isset($str[$i + 3])
4265
            &&
4266
            ($str[$i] & "\xF8") === "\xF0"
4267
        ) {
4268
4269 View Code Duplication
          if (
4270
              ($str[$i + 1] & "\xC0") === "\x80"
4271
              &&
4272
              ($str[$i + 2] & "\xC0") === "\x80"
4273
              &&
4274
              ($str[$i + 3] & "\xC0") === "\x80"
4275
          ) {
4276
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4277
4278
            $i += 3;
4279
          }
4280
4281
        }
4282
      }
4283
    }
4284
4285 38
    if ($length > 1) {
4286 5
      $ret = array_chunk($ret, $length);
4287
4288 5
      return array_map(
4289
          function ($item) {
4290 5
            return implode('', $item);
4291 5
          }, $ret
4292 5
      );
4293
    }
4294
4295
    /** @noinspection OffsetOperationsInspection */
4296 34
    if (isset($ret[0]) && $ret[0] === '') {
4297
      return array();
4298
    }
4299
4300 34
    return $ret;
4301
  }
4302
4303
  /**
4304
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4305
   *
4306
   * @param string $str <p>The input string.</p>
4307
   *
4308
   * @return false|string <p>
4309
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4310
   *                      otherwise it will return false.
4311
   *                      </p>
4312
   */
4313 14
  public static function str_detect_encoding($str)
4314
  {
4315
    //
4316
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4317
    //
4318
4319 14
    if (self::is_binary($str) === true) {
4320
4321 3
      if (self::is_utf16($str) === 1) {
4322 1
        return 'UTF-16LE';
4323
      }
4324
4325 3
      if (self::is_utf16($str) === 2) {
4326 1
        return 'UTF-16BE';
4327
      }
4328
4329 2
      if (self::is_utf32($str) === 1) {
4330
        return 'UTF-32LE';
4331
      }
4332
4333 2
      if (self::is_utf32($str) === 2) {
4334
        return 'UTF-32BE';
4335
      }
4336
4337 2
    }
4338
4339
    //
4340
    // 2.) simple check for ASCII chars
4341
    //
4342
4343 14
    if (self::is_ascii($str) === true) {
4344 4
      return 'ASCII';
4345
    }
4346
4347
    //
4348
    // 3.) simple check for UTF-8 chars
4349
    //
4350
4351 14
    if (self::is_utf8($str) === true) {
4352 11
      return 'UTF-8';
4353
    }
4354
4355
    //
4356
    // 4.) check via "\mb_detect_encoding()"
4357
    //
4358
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4359
4360
    $detectOrder = array(
4361 7
        'ISO-8859-1',
4362 7
        'ISO-8859-2',
4363 7
        'ISO-8859-3',
4364 7
        'ISO-8859-4',
4365 7
        'ISO-8859-5',
4366 7
        'ISO-8859-6',
4367 7
        'ISO-8859-7',
4368 7
        'ISO-8859-8',
4369 7
        'ISO-8859-9',
4370 7
        'ISO-8859-10',
4371 7
        'ISO-8859-13',
4372 7
        'ISO-8859-14',
4373 7
        'ISO-8859-15',
4374 7
        'ISO-8859-16',
4375 7
        'WINDOWS-1251',
4376 7
        'WINDOWS-1252',
4377 7
        'WINDOWS-1254',
4378 7
        'ISO-2022-JP',
4379 7
        'JIS',
4380 7
        'EUC-JP',
4381 7
    );
4382
4383 7
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4384 7
    if ($encoding) {
4385 7
      return $encoding;
4386
    }
4387
4388
    //
4389
    // 5.) check via "iconv()"
4390
    //
4391
4392
    $md5 = md5($str);
4393
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4394
      # INFO: //IGNORE and //TRANSLIT still throw notice
4395
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4396
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4397
        return $encodingTmp;
4398
      }
4399
    }
4400
4401
    return false;
4402
  }
4403
4404
  /**
4405
   * Check if the string ends with the given substring.
4406
   *
4407
   * @param string $haystack <p>The string to search in.</p>
4408
   * @param string $needle   <p>The substring to search for.</p>
4409
   *
4410
   * @return bool
4411
   */
4412 2
  public static function str_ends_with($haystack, $needle)
4413
  {
4414 2
    $haystack = (string)$haystack;
4415 2
    $needle = (string)$needle;
4416
4417 2
    if (!isset($haystack[0], $needle[0])) {
4418 1
      return false;
4419
    }
4420
4421 2
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4422 2
    if ($haystackSub === false) {
4423
      return false;
4424
    }
4425
4426 2
    if ($needle === $haystackSub) {
4427 2
      return true;
4428
    }
4429
4430 2
    return false;
4431
  }
4432
4433
  /**
4434
   * Check if the string ends with the given substring, case insensitive.
4435
   *
4436
   * @param string $haystack <p>The string to search in.</p>
4437
   * @param string $needle   <p>The substring to search for.</p>
4438
   *
4439
   * @return bool
4440
   */
4441 2 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4442
  {
4443 2
    $haystack = (string)$haystack;
4444 2
    $needle = (string)$needle;
4445
4446 2
    if (!isset($haystack[0], $needle[0])) {
4447 1
      return false;
4448
    }
4449
4450 2
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4451 2
      return true;
4452
    }
4453
4454 2
    return false;
4455
  }
4456
4457
  /**
4458
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4459
   *
4460
   * @link  http://php.net/manual/en/function.str-ireplace.php
4461
   *
4462
   * @param mixed $search  <p>
4463
   *                       Every replacement with search array is
4464
   *                       performed on the result of previous replacement.
4465
   *                       </p>
4466
   * @param mixed $replace <p>
4467
   *                       </p>
4468
   * @param mixed $subject <p>
4469
   *                       If subject is an array, then the search and
4470
   *                       replace is performed with every entry of
4471
   *                       subject, and the return value is an array as
4472
   *                       well.
4473
   *                       </p>
4474
   * @param int   $count   [optional] <p>
4475
   *                       The number of matched and replaced needles will
4476
   *                       be returned in count which is passed by
4477
   *                       reference.
4478
   *                       </p>
4479
   *
4480
   * @return mixed <p>A string or an array of replacements.</p>
4481
   */
4482 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4483
  {
4484 26
    $search = (array)$search;
4485
4486
    /** @noinspection AlterInForeachInspection */
4487 26
    foreach ($search as &$s) {
4488 26
      if ('' === $s .= '') {
4489 2
        $s = '/^(?<=.)$/';
4490 2
      } else {
4491 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4492
      }
4493 26
    }
4494
4495 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4496 26
    $count = $replace; // used as reference parameter
4497
4498 26
    return $subject;
4499
  }
4500
4501
  /**
4502
   * Check if the string starts with the given substring, case insensitive.
4503
   *
4504
   * @param string $haystack <p>The string to search in.</p>
4505
   * @param string $needle   <p>The substring to search for.</p>
4506
   *
4507
   * @return bool
4508
   */
4509 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4510
  {
4511 2
    $haystack = (string)$haystack;
4512 2
    $needle = (string)$needle;
4513
4514 2
    if (!isset($haystack[0], $needle[0])) {
4515 1
      return false;
4516
    }
4517
4518 2
    if (self::stripos($haystack, $needle) === 0) {
4519 2
      return true;
4520
    }
4521
4522 2
    return false;
4523
  }
4524
4525
  /**
4526
   * Limit the number of characters in a string, but also after the next word.
4527
   *
4528
   * @param string $str
4529
   * @param int    $length
4530
   * @param string $strAddOn
4531
   *
4532
   * @return string
4533
   */
4534 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4535
  {
4536 1
    $str = (string)$str;
4537
4538 1
    if (!isset($str[0])) {
4539 1
      return '';
4540
    }
4541
4542 1
    $length = (int)$length;
4543
4544 1
    if (self::strlen($str) <= $length) {
4545 1
      return $str;
4546
    }
4547
4548 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4549 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4550
    }
4551
4552 1
    $str = (string)self::substr($str, 0, $length);
4553 1
    $array = explode(' ', $str);
4554 1
    array_pop($array);
4555 1
    $new_str = implode(' ', $array);
4556
4557 1
    if ($new_str === '') {
4558 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4559 1
    } else {
4560 1
      $str = $new_str . $strAddOn;
4561
    }
4562
4563 1
    return $str;
4564
  }
4565
4566
  /**
4567
   * Pad a UTF-8 string to given length with another string.
4568
   *
4569
   * @param string $str        <p>The input string.</p>
4570
   * @param int    $pad_length <p>The length of return string.</p>
4571
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4572
   * @param int    $pad_type   [optional] <p>
4573
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4574
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4575
   *                           </p>
4576
   *
4577
   * @return string <strong>Returns the padded string</strong>
4578
   */
4579 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4580
  {
4581 2
    $str_length = self::strlen($str);
4582
4583
    if (
4584 2
        is_int($pad_length) === true
4585 2
        &&
4586
        $pad_length > 0
4587 2
        &&
4588
        $pad_length >= $str_length
4589 2
    ) {
4590 2
      $ps_length = self::strlen($pad_string);
4591
4592 2
      $diff = $pad_length - $str_length;
4593
4594
      switch ($pad_type) {
4595 2 View Code Duplication
        case STR_PAD_LEFT:
4596 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4597 2
          $pre = (string)self::substr($pre, 0, $diff);
4598 2
          $post = '';
4599 2
          break;
4600
4601 2
        case STR_PAD_BOTH:
4602 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4603 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4604 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4605 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4606 2
          break;
4607
4608 2
        case STR_PAD_RIGHT:
4609 2 View Code Duplication
        default:
4610 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4611 2
          $post = (string)self::substr($post, 0, $diff);
4612 2
          $pre = '';
4613 2
      }
4614
4615 2
      return $pre . $str . $post;
4616
    }
4617
4618 2
    return $str;
4619
  }
4620
4621
  /**
4622
   * Repeat a string.
4623
   *
4624
   * @param string $str        <p>
4625
   *                           The string to be repeated.
4626
   *                           </p>
4627
   * @param int    $multiplier <p>
4628
   *                           Number of time the input string should be
4629
   *                           repeated.
4630
   *                           </p>
4631
   *                           <p>
4632
   *                           multiplier has to be greater than or equal to 0.
4633
   *                           If the multiplier is set to 0, the function
4634
   *                           will return an empty string.
4635
   *                           </p>
4636
   *
4637
   * @return string <p>The repeated string.</p>
4638
   */
4639 1
  public static function str_repeat($str, $multiplier)
4640
  {
4641 1
    $str = self::filter($str);
4642
4643 1
    return str_repeat($str, $multiplier);
4644
  }
4645
4646
  /**
4647
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4648
   *
4649
   * Replace all occurrences of the search string with the replacement string
4650
   *
4651
   * @link http://php.net/manual/en/function.str-replace.php
4652
   *
4653
   * @param mixed $search  <p>
4654
   *                       The value being searched for, otherwise known as the needle.
4655
   *                       An array may be used to designate multiple needles.
4656
   *                       </p>
4657
   * @param mixed $replace <p>
4658
   *                       The replacement value that replaces found search
4659
   *                       values. An array may be used to designate multiple replacements.
4660
   *                       </p>
4661
   * @param mixed $subject <p>
4662
   *                       The string or array being searched and replaced on,
4663
   *                       otherwise known as the haystack.
4664
   *                       </p>
4665
   *                       <p>
4666
   *                       If subject is an array, then the search and
4667
   *                       replace is performed with every entry of
4668
   *                       subject, and the return value is an array as
4669
   *                       well.
4670
   *                       </p>
4671
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4672
   *
4673
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4674
   */
4675 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4676
  {
4677 12
    return str_replace($search, $replace, $subject, $count);
4678
  }
4679
4680
  /**
4681
   * Replace the first "$search"-term with the "$replace"-term.
4682
   *
4683
   * @param string $search
4684
   * @param string $replace
4685
   * @param string $subject
4686
   *
4687
   * @return string
4688
   */
4689 1
  public static function str_replace_first($search, $replace, $subject)
4690
  {
4691 1
    $pos = self::strpos($subject, $search);
4692
4693 1
    if ($pos !== false) {
4694 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4695
    }
4696
4697 1
    return $subject;
4698
  }
4699
4700
  /**
4701
   * Shuffles all the characters in the string.
4702
   *
4703
   * @param string $str <p>The input string</p>
4704
   *
4705
   * @return string <p>The shuffled string.</p>
4706
   */
4707 1
  public static function str_shuffle($str)
4708
  {
4709 1
    $array = self::split($str);
4710
4711 1
    shuffle($array);
4712
4713 1
    return implode('', $array);
4714
  }
4715
4716
  /**
4717
   * Sort all characters according to code points.
4718
   *
4719
   * @param string $str    <p>A UTF-8 string.</p>
4720
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4721
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4722
   *
4723
   * @return string <p>String of sorted characters.</p>
4724
   */
4725 1
  public static function str_sort($str, $unique = false, $desc = false)
4726
  {
4727 1
    $array = self::codepoints($str);
4728
4729 1
    if ($unique) {
4730 1
      $array = array_flip(array_flip($array));
4731 1
    }
4732
4733 1
    if ($desc) {
4734 1
      arsort($array);
4735 1
    } else {
4736 1
      asort($array);
4737
    }
4738
4739 1
    return self::string($array);
4740
  }
4741
4742
  /**
4743
   * Split a string into an array.
4744
   *
4745
   * @param string $str
4746
   * @param int    $len
4747
   *
4748
   * @return array
4749
   */
4750 22
  public static function str_split($str, $len = 1)
4751
  {
4752 22
    $str = (string)$str;
4753
4754 22
    if (!isset($str[0])) {
4755 1
      return array();
4756
    }
4757
4758 21
    $len = (int)$len;
4759
4760 21
    if ($len < 1) {
4761
      return str_split($str, $len);
4762
    }
4763
4764
    /** @noinspection PhpInternalEntityUsedInspection */
4765 21
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4766 21
    $a = $a[0];
4767
4768 21
    if ($len === 1) {
4769 21
      return $a;
4770
    }
4771
4772 1
    $arrayOutput = array();
4773 1
    $p = -1;
4774
4775
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4776 1
    foreach ($a as $l => $a) {
4777 1
      if ($l % $len) {
4778 1
        $arrayOutput[$p] .= $a;
4779 1
      } else {
4780 1
        $arrayOutput[++$p] = $a;
4781
      }
4782 1
    }
4783
4784 1
    return $arrayOutput;
4785
  }
4786
4787
  /**
4788
   * Check if the string starts with the given substring.
4789
   *
4790
   * @param string $haystack <p>The string to search in.</p>
4791
   * @param string $needle   <p>The substring to search for.</p>
4792
   *
4793
   * @return bool
4794
   */
4795 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4796
  {
4797 2
    $haystack = (string)$haystack;
4798 2
    $needle = (string)$needle;
4799
4800 2
    if (!isset($haystack[0], $needle[0])) {
4801 1
      return false;
4802
    }
4803
4804 2
    if (self::strpos($haystack, $needle) === 0) {
4805 2
      return true;
4806
    }
4807
4808 2
    return false;
4809
  }
4810
4811
  /**
4812
   * Get a binary representation of a specific string.
4813
   *
4814
   * @param string $str <p>The input string.</p>
4815
   *
4816
   * @return string
4817
   */
4818 1
  public static function str_to_binary($str)
4819
  {
4820 1
    $str = (string)$str;
4821
4822 1
    $value = unpack('H*', $str);
4823
4824 1
    return base_convert($value[1], 16, 2);
4825
  }
4826
4827
  /**
4828
   * Convert a string into an array of words.
4829
   *
4830
   * @param string   $str
4831
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4832
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4833
   * @param null|int $removeShortValues
4834
   *
4835
   * @return array
4836
   */
4837 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4838
  {
4839 10
    $str = (string)$str;
4840
4841 10
    if ($removeShortValues !== null) {
4842 1
      $removeShortValues = (int)$removeShortValues;
4843 1
    }
4844
4845 10
    if (!isset($str[0])) {
4846 2
      if ($removeEmptyValues === true) {
4847
        return array();
4848
      }
4849
4850 2
      return array('');
4851
    }
4852
4853 10
    $charList = self::rxClass($charList, '\pL');
4854
4855 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4856
4857
    if (
4858
        $removeShortValues === null
4859 10
        &&
4860
        $removeEmptyValues === false
4861 10
    ) {
4862 10
      return $return;
4863
    }
4864
4865 1
    $tmpReturn = array();
4866 1
    foreach ($return as $returnValue) {
4867
      if (
4868
          $removeShortValues !== null
4869 1
          &&
4870 1
          self::strlen($returnValue) <= $removeShortValues
4871 1
      ) {
4872 1
        continue;
4873
      }
4874
4875
      if (
4876
          $removeEmptyValues === true
4877 1
          &&
4878 1
          trim($returnValue) === ''
4879 1
      ) {
4880 1
        continue;
4881
      }
4882
4883 1
      $tmpReturn[] = $returnValue;
4884 1
    }
4885
4886 1
    return $tmpReturn;
4887
  }
4888
4889
  /**
4890
   * alias for "UTF8::to_ascii()"
4891
   *
4892
   * @see UTF8::to_ascii()
4893
   *
4894
   * @param string $str
4895
   * @param string $unknown
4896
   * @param bool   $strict
4897
   *
4898
   * @return string
4899
   */
4900 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4901
  {
4902 7
    return self::to_ascii($str, $unknown, $strict);
4903
  }
4904
4905
  /**
4906
   * Counts number of words in the UTF-8 string.
4907
   *
4908
   * @param string $str      <p>The input string.</p>
4909
   * @param int    $format   [optional] <p>
4910
   *                         <strong>0</strong> => return a number of words (default)<br />
4911
   *                         <strong>1</strong> => return an array of words<br />
4912
   *                         <strong>2</strong> => return an array of words with word-offset as key
4913
   *                         </p>
4914
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4915
   *
4916
   * @return array|int <p>The number of words in the string</p>
4917
   */
4918 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4919
  {
4920 1
    $strParts = self::str_to_words($str, $charlist);
4921
4922 1
    $len = count($strParts);
4923
4924 1
    if ($format === 1) {
4925
4926 1
      $numberOfWords = array();
4927 1
      for ($i = 1; $i < $len; $i += 2) {
4928 1
        $numberOfWords[] = $strParts[$i];
4929 1
      }
4930
4931 1
    } elseif ($format === 2) {
4932
4933 1
      $numberOfWords = array();
4934 1
      $offset = self::strlen($strParts[0]);
4935 1
      for ($i = 1; $i < $len; $i += 2) {
4936 1
        $numberOfWords[$offset] = $strParts[$i];
4937 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4938 1
      }
4939
4940 1
    } else {
4941
4942 1
      $numberOfWords = ($len - 1) / 2;
4943
4944
    }
4945
4946 1
    return $numberOfWords;
4947
  }
4948
4949
  /**
4950
   * Case-insensitive string comparison.
4951
   *
4952
   * INFO: Case-insensitive version of UTF8::strcmp()
4953
   *
4954
   * @param string $str1
4955
   * @param string $str2
4956
   *
4957
   * @return int <p>
4958
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4959
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4960
   *             <strong>0</strong> if they are equal.
4961
   *             </p>
4962
   */
4963 11
  public static function strcasecmp($str1, $str2)
4964
  {
4965 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4966
  }
4967
4968
  /**
4969
   * alias for "UTF8::strstr()"
4970
   *
4971
   * @see UTF8::strstr()
4972
   *
4973
   * @param string  $haystack
4974
   * @param string  $needle
4975
   * @param bool    $before_needle
4976
   * @param string  $encoding
4977
   * @param boolean $cleanUtf8
4978
   *
4979
   * @return string|false
4980
   */
4981 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4982
  {
4983 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4984
  }
4985
4986
  /**
4987
   * Case-sensitive string comparison.
4988
   *
4989
   * @param string $str1
4990
   * @param string $str2
4991
   *
4992
   * @return int  <p>
4993
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
4994
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
4995
   *              <strong>0</strong> if they are equal.
4996
   *              </p>
4997
   */
4998 14
  public static function strcmp($str1, $str2)
4999
  {
5000
    /** @noinspection PhpUndefinedClassInspection */
5001 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5002 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5003 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
5004 14
    );
5005
  }
5006
5007
  /**
5008
   * Find length of initial segment not matching mask.
5009
   *
5010
   * @param string $str
5011
   * @param string $charList
5012
   * @param int    $offset
5013
   * @param int    $length
5014
   *
5015
   * @return int|null
5016
   */
5017 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5018
  {
5019 15
    if ('' === $charList .= '') {
5020 1
      return null;
5021
    }
5022
5023 14 View Code Duplication
    if ($offset || $length !== null) {
5024 2
      $strTmp = self::substr($str, $offset, $length);
5025 2
      if ($strTmp === false) {
5026
        return null;
5027
      }
5028 2
      $str = (string)$strTmp;
5029 2
    }
5030
5031 14
    $str = (string)$str;
5032 14
    if (!isset($str[0])) {
5033 1
      return null;
5034
    }
5035
5036 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5037
      /** @noinspection OffsetOperationsInspection */
5038 13
      return self::strlen($length[1]);
5039
    }
5040
5041 1
    return self::strlen($str);
5042
  }
5043
5044
  /**
5045
   * alias for "UTF8::stristr()"
5046
   *
5047
   * @see UTF8::stristr()
5048
   *
5049
   * @param string  $haystack
5050
   * @param string  $needle
5051
   * @param bool    $before_needle
5052
   * @param string  $encoding
5053
   * @param boolean $cleanUtf8
5054
   *
5055
   * @return string|false
5056
   */
5057 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5058
  {
5059 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5060
  }
5061
5062
  /**
5063
   * Create a UTF-8 string from code points.
5064
   *
5065
   * INFO: opposite to UTF8::codepoints()
5066
   *
5067
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5068
   *
5069
   * @return string <p>UTF-8 encoded string.</p>
5070
   */
5071 2
  public static function string(array $array)
5072
  {
5073 2
    return implode(
5074 2
        '',
5075 2
        array_map(
5076
            array(
5077 2
                '\\voku\\helper\\UTF8',
5078 2
                'chr',
5079 2
            ),
5080
            $array
5081 2
        )
5082 2
    );
5083
  }
5084
5085
  /**
5086
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5087
   *
5088
   * @param string $str <p>The input string.</p>
5089
   *
5090
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5091
   */
5092 3
  public static function string_has_bom($str)
5093
  {
5094 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5095 3
      if (0 === strpos($str, $bomString)) {
5096 3
        return true;
5097
      }
5098 3
    }
5099
5100 3
    return false;
5101
  }
5102
5103
  /**
5104
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5105
   *
5106
   * @link http://php.net/manual/en/function.strip-tags.php
5107
   *
5108
   * @param string  $str            <p>
5109
   *                                The input string.
5110
   *                                </p>
5111
   * @param string  $allowable_tags [optional] <p>
5112
   *                                You can use the optional second parameter to specify tags which should
5113
   *                                not be stripped.
5114
   *                                </p>
5115
   *                                <p>
5116
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5117
   *                                can not be changed with allowable_tags.
5118
   *                                </p>
5119
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5120
   *
5121
   * @return string <p>The stripped string.</p>
5122
   */
5123 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5124
  {
5125 2
    $str = (string)$str;
5126
5127 2
    if (!isset($str[0])) {
5128 1
      return '';
5129
    }
5130
5131 2
    if ($cleanUtf8 === true) {
5132 1
      $str = self::clean($str);
5133 1
    }
5134
5135 2
    return strip_tags($str, $allowable_tags);
5136
  }
5137
5138
  /**
5139
   * Finds position of first occurrence of a string within another, case insensitive.
5140
   *
5141
   * @link http://php.net/manual/en/function.mb-stripos.php
5142
   *
5143
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5144
   * @param string  $needle    <p>The string to find in haystack.</p>
5145
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5146
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5147
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5148
   *
5149
   * @return int|false <p>
5150
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
5151
   *                   or false if needle is not found.
5152
   *                   </p>
5153
   */
5154 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5155
  {
5156 10
    $haystack = (string)$haystack;
5157 10
    $needle = (string)$needle;
5158 10
    $offset = (int)$offset;
5159
5160 10
    if (!isset($haystack[0], $needle[0])) {
5161 3
      return false;
5162
    }
5163
5164 9
    if ($cleanUtf8 === true) {
5165
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5166
      // if invalid characters are found in $haystack before $needle
5167 1
      $haystack = self::clean($haystack);
5168 1
      $needle = self::clean($needle);
5169 1
    }
5170
5171 View Code Duplication
    if (
5172
        $encoding === 'UTF-8'
5173 9
        ||
5174 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5175 9
    ) {
5176 9
      $encoding = 'UTF-8';
5177 9
    } else {
5178 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5179
    }
5180
5181 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5182
      self::checkForSupport();
5183
    }
5184
5185
    if (
5186
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5187 9
        &&
5188 9
        self::$SUPPORT['intl'] === true
5189 9
        &&
5190 9
        Bootup::is_php('5.4') === true
5191 9
    ) {
5192 9
      return \grapheme_stripos($haystack, $needle, $offset);
5193
    }
5194
5195
    // fallback to "mb_"-function via polyfill
5196 1
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5197
  }
5198
5199
  /**
5200
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5201
   *
5202
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5203
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5204
   * @param bool    $before_needle [optional] <p>
5205
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5206
   *                               haystack before the first occurrence of the needle (excluding the needle).
5207
   *                               </p>
5208
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5209
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5210
   *
5211
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5212
   */
5213 17 View Code Duplication
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5214
  {
5215 17
    $haystack = (string)$haystack;
5216 17
    $needle = (string)$needle;
5217 17
    $before_needle = (bool)$before_needle;
5218
5219 17
    if (!isset($haystack[0], $needle[0])) {
5220 6
      return false;
5221
    }
5222
5223 11
    if ($encoding !== 'UTF-8') {
5224 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5225 1
    }
5226
5227 11
    if ($cleanUtf8 === true) {
5228
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5229
      // if invalid characters are found in $haystack before $needle
5230 1
      $needle = self::clean($needle);
5231 1
      $haystack = self::clean($haystack);
5232 1
    }
5233
5234 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5235
      self::checkForSupport();
5236
    }
5237
5238
    if (
5239
        $encoding !== 'UTF-8'
5240 11
        &&
5241 1
        self::$SUPPORT['mbstring'] === false
5242 11
    ) {
5243
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5244
    }
5245
5246 11
    if (self::$SUPPORT['mbstring'] === true) {
5247 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5248
    }
5249
5250
    if (
5251
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5252
        &&
5253
        self::$SUPPORT['intl'] === true
5254
        &&
5255
        Bootup::is_php('5.4') === true
5256
    ) {
5257
      return \grapheme_stristr($haystack, $needle, $before_needle);
5258
    }
5259
5260
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5261
5262
    if (!isset($match[1])) {
5263
      return false;
5264
    }
5265
5266
    if ($before_needle) {
5267
      return $match[1];
5268
    }
5269
5270
    return self::substr($haystack, self::strlen($match[1]));
5271
  }
5272
5273
  /**
5274
   * Get the string length, not the byte-length!
5275
   *
5276
   * @link     http://php.net/manual/en/function.mb-strlen.php
5277
   *
5278
   * @param string  $str       <p>The string being checked for length.</p>
5279
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5280
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5281
   *
5282
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5283
   *             character counted as +1)</p>
5284
   */
5285 89
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5286
  {
5287 89
    $str = (string)$str;
5288
5289 89
    if (!isset($str[0])) {
5290 5
      return 0;
5291
    }
5292
5293 View Code Duplication
    if (
5294
        $encoding === 'UTF-8'
5295 88
        ||
5296 13
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5297 88
    ) {
5298 79
      $encoding = 'UTF-8';
5299 79
    } else {
5300 12
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5301
    }
5302
5303
    switch ($encoding) {
5304 88
      case 'ASCII':
5305 88
      case 'CP850':
5306
        if (
5307
            $encoding === 'CP850'
5308 10
            &&
5309 10
            self::$SUPPORT['mbstring_func_overload'] === false
5310 10
        ) {
5311 10
          return strlen($str);
5312
        }
5313
5314
        return \mb_strlen($str, '8BIT');
5315
    }
5316
5317 80
    if ($cleanUtf8 === true) {
5318
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5319
      // if invalid characters are found in $str
5320 2
      $str = self::clean($str);
5321 2
    }
5322
5323 80
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5324
      self::checkForSupport();
5325
    }
5326
5327 View Code Duplication
    if (
5328
        $encoding !== 'UTF-8'
5329 80
        &&
5330 2
        self::$SUPPORT['mbstring'] === false
5331 80
        &&
5332
        self::$SUPPORT['iconv'] === false
5333 80
    ) {
5334
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5335
    }
5336
5337
    if (
5338
        $encoding !== 'UTF-8'
5339 80
        &&
5340 2
        self::$SUPPORT['iconv'] === true
5341 80
        &&
5342 2
        self::$SUPPORT['mbstring'] === false
5343 80
    ) {
5344
      return \iconv_strlen($str, $encoding);
5345
    }
5346
5347 80
    if (self::$SUPPORT['mbstring'] === true) {
5348 80
      return \mb_strlen($str, $encoding);
5349
    }
5350
5351
    if (self::$SUPPORT['iconv'] === true) {
5352
      return \iconv_strlen($str, $encoding);
5353
    }
5354
5355
    if (
5356
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5357
        &&
5358
        self::$SUPPORT['intl'] === true
5359
        &&
5360
        Bootup::is_php('5.4') === true
5361
    ) {
5362
      return \grapheme_strlen($str);
5363
    }
5364
5365
    // fallback via vanilla php
5366
    preg_match_all('/./us', $str, $parts);
5367
    $returnTmp = count($parts[0]);
5368
    if ($returnTmp !== 0) {
5369
      return $returnTmp;
5370
    }
5371
5372
    // fallback to "mb_"-function via polyfill
5373
    return \mb_strlen($str, $encoding);
5374
  }
5375
5376
  /**
5377
   * Case insensitive string comparisons using a "natural order" algorithm.
5378
   *
5379
   * INFO: natural order version of UTF8::strcasecmp()
5380
   *
5381
   * @param string $str1 <p>The first string.</p>
5382
   * @param string $str2 <p>The second string.</p>
5383
   *
5384
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5385
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5386
   *             <strong>0</strong> if they are equal
5387
   */
5388 1
  public static function strnatcasecmp($str1, $str2)
5389
  {
5390 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5391
  }
5392
5393
  /**
5394
   * String comparisons using a "natural order" algorithm
5395
   *
5396
   * INFO: natural order version of UTF8::strcmp()
5397
   *
5398
   * @link  http://php.net/manual/en/function.strnatcmp.php
5399
   *
5400
   * @param string $str1 <p>The first string.</p>
5401
   * @param string $str2 <p>The second string.</p>
5402
   *
5403
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5404
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5405
   *             <strong>0</strong> if they are equal
5406
   */
5407 2
  public static function strnatcmp($str1, $str2)
5408
  {
5409 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5410
  }
5411
5412
  /**
5413
   * Case-insensitive string comparison of the first n characters.
5414
   *
5415
   * @link  http://php.net/manual/en/function.strncasecmp.php
5416
   *
5417
   * @param string $str1 <p>The first string.</p>
5418
   * @param string $str2 <p>The second string.</p>
5419
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5420
   *
5421
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5422
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5423
   *             <strong>0</strong> if they are equal
5424
   */
5425 1
  public static function strncasecmp($str1, $str2, $len)
5426
  {
5427 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5428
  }
5429
5430
  /**
5431
   * String comparison of the first n characters.
5432
   *
5433
   * @link  http://php.net/manual/en/function.strncmp.php
5434
   *
5435
   * @param string $str1 <p>The first string.</p>
5436
   * @param string $str2 <p>The second string.</p>
5437
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5438
   *
5439
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5440
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5441
   *             <strong>0</strong> if they are equal
5442
   */
5443 2
  public static function strncmp($str1, $str2, $len)
5444
  {
5445 2
    $str1 = (string)self::substr($str1, 0, $len);
5446 2
    $str2 = (string)self::substr($str2, 0, $len);
5447
5448 2
    return self::strcmp($str1, $str2);
5449
  }
5450
5451
  /**
5452
   * Search a string for any of a set of characters.
5453
   *
5454
   * @link  http://php.net/manual/en/function.strpbrk.php
5455
   *
5456
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5457
   * @param string $char_list <p>This parameter is case sensitive.</p>
5458
   *
5459
   * @return string String starting from the character found, or false if it is not found.
5460
   */
5461 1
  public static function strpbrk($haystack, $char_list)
5462
  {
5463 1
    $haystack = (string)$haystack;
5464 1
    $char_list = (string)$char_list;
5465
5466 1
    if (!isset($haystack[0], $char_list[0])) {
5467 1
      return false;
5468
    }
5469
5470 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5471 1
      return substr($haystack, strpos($haystack, $m[0]));
5472
    }
5473
5474 1
    return false;
5475
  }
5476
5477
  /**
5478
   * Find position of first occurrence of string in a string.
5479
   *
5480
   * @link http://php.net/manual/en/function.mb-strpos.php
5481
   *
5482
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5483
   * @param string  $needle    <p>The string to find in haystack.</p>
5484
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5485
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5486
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5487
   *
5488
   * @return int|false <p>
5489
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5490
   *                   If needle is not found it returns false.
5491
   *                   </p>
5492
   */
5493 58
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5494
  {
5495 58
    $haystack = (string)$haystack;
5496 58
    $needle = (string)$needle;
5497
5498 58
    if (!isset($haystack[0], $needle[0])) {
5499 3
      return false;
5500
    }
5501
5502
    // init
5503 57
    $offset = (int)$offset;
5504
5505
    // iconv and mbstring do not support integer $needle
5506
5507 57
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5508
      $needle = (string)self::chr($needle);
5509
    }
5510
5511 57
    if ($cleanUtf8 === true) {
5512
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5513
      // if invalid characters are found in $haystack before $needle
5514 2
      $needle = self::clean($needle);
5515 2
      $haystack = self::clean($haystack);
5516 2
    }
5517
5518 View Code Duplication
    if (
5519
        $encoding === 'UTF-8'
5520 57
        ||
5521 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5522 57
    ) {
5523 17
      $encoding = 'UTF-8';
5524 17
    } else {
5525 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5526
    }
5527
5528 57
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5529
      self::checkForSupport();
5530
    }
5531
5532
    if (
5533
        $encoding === 'CP850'
5534 57
        &&
5535 41
        self::$SUPPORT['mbstring_func_overload'] === false
5536 57
    ) {
5537 41
      return strpos($haystack, $needle, $offset);
5538
    }
5539
5540 View Code Duplication
    if (
5541
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5542 17
        &
5543 17
        self::$SUPPORT['iconv'] === true
5544 17
        &&
5545 1
        self::$SUPPORT['mbstring'] === false
5546 17
    ) {
5547
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5548
    }
5549
5550
    if (
5551
        $offset >= 0 // iconv_strpos() can't handle negative offset
5552 17
        &&
5553
        $encoding !== 'UTF-8'
5554 17
        &&
5555 1
        self::$SUPPORT['mbstring'] === false
5556 17
        &&
5557
        self::$SUPPORT['iconv'] === true
5558 17
    ) {
5559
      // ignore invalid negative offset to keep compatibility
5560
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5561
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5562
    }
5563
5564 17
    if (self::$SUPPORT['mbstring'] === true) {
5565 17
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5566
    }
5567
5568
    if (
5569
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5570
        &&
5571
        self::$SUPPORT['intl'] === true
5572
        &&
5573
        Bootup::is_php('5.4') === true
5574
    ) {
5575
      return \grapheme_strpos($haystack, $needle, $offset);
5576
    }
5577
5578
    if (
5579
        $offset >= 0 // iconv_strpos() can't handle negative offset
5580
        &&
5581
        self::$SUPPORT['iconv'] === true
5582
    ) {
5583
      // ignore invalid negative offset to keep compatibility
5584
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5585
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5586
    }
5587
5588
    // fallback via vanilla php
5589
5590
    $haystackTmp = self::substr($haystack, $offset);
5591
    if ($haystackTmp === false) {
5592
      $haystackTmp = '';
5593
    }
5594
    $haystack = (string)$haystackTmp;
5595
5596
    if ($offset < 0) {
5597
      $offset = 0;
5598
    }
5599
5600
    $pos = strpos($haystack, $needle);
5601
    if ($pos === false) {
5602
      return false;
5603
    }
5604
5605
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5606
    if ($returnTmp !== false) {
5607
      return $returnTmp;
5608
    }
5609
5610
    // fallback to "mb_"-function via polyfill
5611
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5612
  }
5613
5614
  /**
5615
   * Finds the last occurrence of a character in a string within another.
5616
   *
5617
   * @link http://php.net/manual/en/function.mb-strrchr.php
5618
   *
5619
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5620
   * @param string $needle        <p>The string to find in haystack</p>
5621
   * @param bool   $before_needle [optional] <p>
5622
   *                              Determines which portion of haystack
5623
   *                              this function returns.
5624
   *                              If set to true, it returns all of haystack
5625
   *                              from the beginning to the last occurrence of needle.
5626
   *                              If set to false, it returns all of haystack
5627
   *                              from the last occurrence of needle to the end,
5628
   *                              </p>
5629
   * @param string $encoding      [optional] <p>
5630
   *                              Character encoding name to use.
5631
   *                              If it is omitted, internal character encoding is used.
5632
   *                              </p>
5633
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5634
   *
5635
   * @return string|false The portion of haystack or false if needle is not found.
5636
   */
5637 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5638
  {
5639 1
    if ($encoding !== 'UTF-8') {
5640 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5641 1
    }
5642
5643 1
    if ($cleanUtf8 === true) {
5644
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5645
      // if invalid characters are found in $haystack before $needle
5646
      $needle = self::clean($needle);
5647
      $haystack = self::clean($haystack);
5648
    }
5649
5650
    // fallback to "mb_"-function via polyfill
5651 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5652
  }
5653
5654
  /**
5655
   * Reverses characters order in the string.
5656
   *
5657
   * @param string $str The input string
5658
   *
5659
   * @return string The string with characters in the reverse sequence
5660
   */
5661 4
  public static function strrev($str)
5662
  {
5663 4
    $str = (string)$str;
5664
5665 4
    if (!isset($str[0])) {
5666 2
      return '';
5667
    }
5668
5669 3
    return implode('', array_reverse(self::split($str)));
5670
  }
5671
5672
  /**
5673
   * Finds the last occurrence of a character in a string within another, case insensitive.
5674
   *
5675
   * @link http://php.net/manual/en/function.mb-strrichr.php
5676
   *
5677
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5678
   * @param string  $needle        <p>The string to find in haystack.</p>
5679
   * @param bool    $before_needle [optional] <p>
5680
   *                               Determines which portion of haystack
5681
   *                               this function returns.
5682
   *                               If set to true, it returns all of haystack
5683
   *                               from the beginning to the last occurrence of needle.
5684
   *                               If set to false, it returns all of haystack
5685
   *                               from the last occurrence of needle to the end,
5686
   *                               </p>
5687
   * @param string  $encoding      [optional] <p>
5688
   *                               Character encoding name to use.
5689
   *                               If it is omitted, internal character encoding is used.
5690
   *                               </p>
5691
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5692
   *
5693
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5694
   */
5695 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5696
  {
5697 1
    if ($encoding !== 'UTF-8') {
5698 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5699 1
    }
5700
5701 1
    if ($cleanUtf8 === true) {
5702
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5703
      // if invalid characters are found in $haystack before $needle
5704
      $needle = self::clean($needle);
5705
      $haystack = self::clean($haystack);
5706
    }
5707
5708 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5709
  }
5710
5711
  /**
5712
   * Find position of last occurrence of a case-insensitive string.
5713
   *
5714
   * @param string  $haystack  <p>The string to look in.</p>
5715
   * @param string  $needle    <p>The string to look for.</p>
5716
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5717
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5718
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5719
   *
5720
   * @return int|false <p>
5721
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5722
   *                   not found, it returns false.
5723
   *                   </p>
5724
   */
5725 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5726
  {
5727 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5728
      $needle = (string)self::chr($needle);
5729
    }
5730
5731
    // init
5732 1
    $haystack = (string)$haystack;
5733 1
    $needle = (string)$needle;
5734 1
    $offset = (int)$offset;
5735
5736 1
    if (!isset($haystack[0], $needle[0])) {
5737
      return false;
5738
    }
5739
5740 View Code Duplication
    if (
5741
        $cleanUtf8 === true
5742 1
        ||
5743
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5744 1
    ) {
5745
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5746
5747
      $needle = self::clean($needle);
5748
      $haystack = self::clean($haystack);
5749
    }
5750
5751 View Code Duplication
    if (
5752
        $encoding === 'UTF-8'
5753 1
        ||
5754
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5755 1
    ) {
5756 1
      $encoding = 'UTF-8';
5757 1
    } else {
5758
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5759
    }
5760
5761 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5762
      self::checkForSupport();
5763
    }
5764
5765
    if (
5766
        $encoding !== 'UTF-8'
5767 1
        &&
5768
        self::$SUPPORT['mbstring'] === false
5769 1
    ) {
5770
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5771
    }
5772
5773 1
    if (self::$SUPPORT['mbstring'] === true) {
5774 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5775
    }
5776
5777
    if (
5778
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5779
        &&
5780
        self::$SUPPORT['intl'] === true
5781
        &&
5782
        Bootup::is_php('5.4') === true
5783
    ) {
5784
      return \grapheme_strripos($haystack, $needle, $offset);
5785
    }
5786
5787
    // fallback via vanilla php
5788
5789
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5790
  }
5791
5792
  /**
5793
   * Find position of last occurrence of a string in a string.
5794
   *
5795
   * @link http://php.net/manual/en/function.mb-strrpos.php
5796
   *
5797
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5798
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5799
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5800
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5801
   *                              the end of the string.
5802
   *                              </p>
5803
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5804
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5805
   *
5806
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5807
   *                   is not found, it returns false.</p>
5808
   */
5809 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5810
  {
5811 10
    if ((int)$needle === $needle && $needle >= 0) {
5812 2
      $needle = (string)self::chr($needle);
5813 2
    }
5814
5815
    // init
5816 10
    $haystack = (string)$haystack;
5817 10
    $needle = (string)$needle;
5818 10
    $offset = (int)$offset;
5819
5820 10
    if (!isset($haystack[0], $needle[0])) {
5821 2
      return false;
5822
    }
5823
5824 View Code Duplication
    if (
5825
        $cleanUtf8 === true
5826 9
        ||
5827
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5828 9
    ) {
5829
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5830 3
      $needle = self::clean($needle);
5831 3
      $haystack = self::clean($haystack);
5832 3
    }
5833
5834 View Code Duplication
    if (
5835
        $encoding === 'UTF-8'
5836 9
        ||
5837 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5838 9
    ) {
5839 9
      $encoding = 'UTF-8';
5840 9
    } else {
5841 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5842
    }
5843
5844 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5845
      self::checkForSupport();
5846
    }
5847
5848
    if (
5849
        $encoding !== 'UTF-8'
5850 9
        &&
5851 1
        self::$SUPPORT['mbstring'] === false
5852 9
    ) {
5853
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5854
    }
5855
5856 9
    if (self::$SUPPORT['mbstring'] === true) {
5857 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5858
    }
5859
5860
    if (
5861
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5862
        &&
5863
        self::$SUPPORT['intl'] === true
5864
        &&
5865
        Bootup::is_php('5.4') === true
5866
    ) {
5867
      return \grapheme_strrpos($haystack, $needle, $offset);
5868
    }
5869
5870
    // fallback via vanilla php
5871
5872
    $haystackTmp = null;
5873
    if ($offset > 0) {
5874
      $haystackTmp = self::substr($haystack, $offset);
5875
    } elseif ($offset < 0) {
5876
      $haystackTmp = self::substr($haystack, 0, $offset);
5877
      $offset = 0;
5878
    }
5879
5880
    if ($haystackTmp !== null) {
5881
      if ($haystackTmp === false) {
5882
        $haystackTmp = '';
5883
      }
5884
      $haystack = (string)$haystackTmp;
5885
    }
5886
5887
    $pos = strrpos($haystack, $needle);
5888
    if ($pos === false) {
5889
      return false;
5890
    }
5891
5892
    return $offset + self::strlen(substr($haystack, 0, $pos));
5893
  }
5894
5895
  /**
5896
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5897
   * mask.
5898
   *
5899
   * @param string $str    <p>The input string.</p>
5900
   * @param string $mask   <p>The mask of chars</p>
5901
   * @param int    $offset [optional]
5902
   * @param int    $length [optional]
5903
   *
5904
   * @return int
5905
   */
5906 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
5907
  {
5908 10 View Code Duplication
    if ($offset || $length !== null) {
5909 2
      $strTmp = self::substr($str, $offset, $length);
5910 2
      if ($strTmp === false) {
5911
        $strTmp = '';
5912
      }
5913 2
      $str = (string)$strTmp;
5914 2
    }
5915
5916 10
    $str = (string)$str;
5917 10
    if (!isset($str[0], $mask[0])) {
5918 2
      return 0;
5919
    }
5920
5921 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5922
  }
5923
5924
  /**
5925
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5926
   *
5927
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5928
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5929
   * @param bool    $before_needle [optional] <p>
5930
   *                               If <b>TRUE</b>, strstr() returns the part of the
5931
   *                               haystack before the first occurrence of the needle (excluding the needle).
5932
   *                               </p>
5933
   * @param string  $encoding      [optional] <p>Set the charset.</p>
5934
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5935
   *
5936
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5937
   */
5938 2 View Code Duplication
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5939
  {
5940 2
    $haystack = (string)$haystack;
5941 2
    $needle = (string)$needle;
5942
5943 2
    if (!isset($haystack[0], $needle[0])) {
5944 1
      return false;
5945
    }
5946
5947 2
    if ($cleanUtf8 === true) {
5948
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5949
      // if invalid characters are found in $haystack before $needle
5950
      $needle = self::clean($needle);
5951
      $haystack = self::clean($haystack);
5952
    }
5953
5954 2
    if ($encoding !== 'UTF-8') {
5955 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5956 1
    }
5957
5958 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5959
      self::checkForSupport();
5960
    }
5961
5962
    if (
5963
        $encoding !== 'UTF-8'
5964 2
        &&
5965 1
        self::$SUPPORT['mbstring'] === false
5966 2
    ) {
5967
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5968
    }
5969
5970 2
    if (self::$SUPPORT['mbstring'] === true) {
5971 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
5972
    }
5973
5974
    if (
5975
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5976
        &&
5977
        self::$SUPPORT['intl'] === true
5978
        &&
5979
        Bootup::is_php('5.4') === true
5980
    ) {
5981
      return \grapheme_strstr($haystack, $needle, $before_needle);
5982
    }
5983
5984
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
5985
5986
    if (!isset($match[1])) {
5987
      return false;
5988
    }
5989
5990
    if ($before_needle) {
5991
      return $match[1];
5992
    }
5993
5994
    return self::substr($haystack, self::strlen($match[1]));
5995
  }
5996
5997
  /**
5998
   * Unicode transformation for case-less matching.
5999
   *
6000
   * @link http://unicode.org/reports/tr21/tr21-5.html
6001
   *
6002
   * @param string  $str       <p>The input string.</p>
6003
   * @param bool    $full      [optional] <p>
6004
   *                           <b>true</b>, replace full case folding chars (default)<br />
6005
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6006
   *                           </p>
6007
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6008
   *
6009
   * @return string
6010
   */
6011 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6012
  {
6013
    // init
6014 13
    $str = (string)$str;
6015
6016 13
    if (!isset($str[0])) {
6017 4
      return '';
6018
    }
6019
6020 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6021 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6022
6023 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6024 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6025 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6026 1
    }
6027
6028 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6029
6030 12
    if ($full) {
6031
6032 12
      static $FULL_CASE_FOLD = null;
6033
6034 12
      if ($FULL_CASE_FOLD === null) {
6035 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6036 1
      }
6037
6038
      /** @noinspection OffsetOperationsInspection */
6039 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6040 12
    }
6041
6042 12
    if ($cleanUtf8 === true) {
6043 1
      $str = self::clean($str);
6044 1
    }
6045
6046 12
    return self::strtolower($str);
6047
  }
6048
6049
  /**
6050
   * Make a string lowercase.
6051
   *
6052
   * @link http://php.net/manual/en/function.mb-strtolower.php
6053
   *
6054
   * @param string      $str       <p>The string being lowercased.</p>
6055
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6056
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6057
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6058
   *
6059
   * @return string str with all alphabetic characters converted to lowercase.
6060
   */
6061 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6062
  {
6063
    // init
6064 25
    $str = (string)$str;
6065
6066 25
    if (!isset($str[0])) {
6067 3
      return '';
6068
    }
6069
6070 23
    if ($cleanUtf8 === true) {
6071
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6072
      // if invalid characters are found in $haystack before $needle
6073 1
      $str = self::clean($str);
6074 1
    }
6075
6076 23
    if ($encoding !== 'UTF-8') {
6077 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6078 2
    }
6079
6080 23
    if ($lang !== null) {
6081
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6082
        self::checkForSupport();
6083
      }
6084
6085
      if (
6086
          self::$SUPPORT['intl'] === true
6087
          &&
6088
          Bootup::is_php('5.4') === true
6089
      ) {
6090
6091
        $langCode = $lang . '-Lower';
6092
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6093
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6094
6095
          $langCode = 'Any-Lower';
6096
        }
6097
6098
        return transliterator_transliterate($langCode, $str);
6099
      }
6100
6101
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6102
    }
6103
6104 23
    return \mb_strtolower($str, $encoding);
6105
  }
6106
6107
  /**
6108
   * Generic case sensitive transformation for collation matching.
6109
   *
6110
   * @param string $str <p>The input string</p>
6111
   *
6112
   * @return string
6113
   */
6114 3
  private static function strtonatfold($str)
6115
  {
6116
    /** @noinspection PhpUndefinedClassInspection */
6117 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6118
  }
6119
6120
  /**
6121
   * Make a string uppercase.
6122
   *
6123
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6124
   *
6125
   * @param string      $str       <p>The string being uppercased.</p>
6126
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6127
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6128
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6129
   *
6130
   * @return string str with all alphabetic characters converted to uppercase.
6131
   */
6132 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6133
  {
6134 19
    $str = (string)$str;
6135
6136 19
    if (!isset($str[0])) {
6137 3
      return '';
6138
    }
6139
6140 17
    if ($cleanUtf8 === true) {
6141
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6142
      // if invalid characters are found in $haystack before $needle
6143 1
      $str = self::clean($str);
6144 1
    }
6145
6146 17
    if ($encoding !== 'UTF-8') {
6147 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6148 2
    }
6149
6150 17
    if ($lang !== null) {
6151
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6152
        self::checkForSupport();
6153
      }
6154
6155
      if (
6156
          self::$SUPPORT['intl'] === true
6157
          &&
6158
          Bootup::is_php('5.4') === true
6159
      ) {
6160
6161
        $langCode = $lang . '-Upper';
6162
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6163
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6164
6165
          $langCode = 'Any-Upper';
6166
        }
6167
6168
        return transliterator_transliterate($langCode, $str);
6169
      }
6170
6171
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6172
    }
6173
6174 17
    return \mb_strtoupper($str, $encoding);
6175
  }
6176
6177
  /**
6178
   * Translate characters or replace sub-strings.
6179
   *
6180
   * @link  http://php.net/manual/en/function.strtr.php
6181
   *
6182
   * @param string          $str  <p>The string being translated.</p>
6183
   * @param string|string[] $from <p>The string replacing from.</p>
6184
   * @param string|string[] $to   <p>The string being translated to to.</p>
6185
   *
6186
   * @return string <p>
6187
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6188
   *                corresponding character in to.
6189
   *                </p>
6190
   */
6191 1
  public static function strtr($str, $from, $to = INF)
6192
  {
6193 1
    $str = (string)$str;
6194
6195 1
    if (!isset($str[0])) {
6196
      return '';
6197
    }
6198
6199 1
    if ($from === $to) {
6200
      return $str;
6201
    }
6202
6203 1
    if (INF !== $to) {
6204 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6204 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6205 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6205 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6206 1
      $countFrom = count($from);
6207 1
      $countTo = count($to);
6208
6209 1
      if ($countFrom > $countTo) {
6210 1
        $from = array_slice($from, 0, $countTo);
6211 1
      } elseif ($countFrom < $countTo) {
6212 1
        $to = array_slice($to, 0, $countFrom);
6213 1
      }
6214
6215 1
      $from = array_combine($from, $to);
6216 1
    }
6217
6218 1
    if (is_string($from)) {
6219 1
      return str_replace($from, '', $str);
6220
    }
6221
6222 1
    return strtr($str, $from);
6223
  }
6224
6225
  /**
6226
   * Return the width of a string.
6227
   *
6228
   * @param string  $str       <p>The input string.</p>
6229
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6230
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6231
   *
6232
   * @return int
6233
   */
6234 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6235
  {
6236 1
    if ($encoding !== 'UTF-8') {
6237 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6238 1
    }
6239
6240 1
    if ($cleanUtf8 === true) {
6241
      // iconv and mbstring are not tolerant to invalid encoding
6242
      // further, their behaviour is inconsistent with that of PHP's substr
6243 1
      $str = self::clean($str);
6244 1
    }
6245
6246
    // fallback to "mb_"-function via polyfill
6247 1
    return \mb_strwidth($str, $encoding);
6248
  }
6249
6250
  /**
6251
   * Changes all keys in an array.
6252
   *
6253
   * @param array $array <p>The array to work on</p>
6254
   * @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br />
6255
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6256
   *
6257
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6258
   *                     input is not an array.</p>
6259
   */
6260 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
6261
  {
6262 1
    if (!is_array($array)) {
6263
      return false;
6264
    }
6265
6266
    if (
6267
        $case !== CASE_LOWER
6268 1
        &&
6269
        $case !== CASE_UPPER
6270 1
    ) {
6271
      $case = CASE_UPPER;
6272
    }
6273
6274 1
    $return = array();
6275 1
    foreach ($array as $key => $value) {
6276 1
      if ($case  === CASE_LOWER) {
6277 1
        $key = self::strtolower($key);
6278 1
      } else {
6279 1
        $key = self::strtoupper($key);
6280
      }
6281
6282 1
      $return[$key] = $value;
6283 1
    }
6284
6285 1
    return $return;
6286
  }
6287
6288
  /**
6289
   * Get part of a string.
6290
   *
6291
   * @link http://php.net/manual/en/function.mb-substr.php
6292
   *
6293
   * @param string  $str       <p>The string being checked.</p>
6294
   * @param int     $offset    <p>The first position used in str.</p>
6295
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6296
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6297
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6298
   *
6299
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6300
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6301
   *                      characters long, <b>FALSE</b> will be returned.</p>
6302
   */
6303 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6304
  {
6305
    // init
6306 76
    $str = (string)$str;
6307
6308 76
    if (!isset($str[0])) {
6309 10
      return '';
6310
    }
6311
6312 74
    if ($cleanUtf8 === true) {
6313
      // iconv and mbstring are not tolerant to invalid encoding
6314
      // further, their behaviour is inconsistent with that of PHP's substr
6315 1
      $str = self::clean($str);
6316 1
    }
6317
6318 74
    $str_length = 0;
6319 74
    if ($offset || $length === null) {
6320 50
      $str_length = (int)self::strlen($str, $encoding);
6321 50
    }
6322
6323 74
    if ($offset && $offset > $str_length) {
6324 2
      return false;
6325
    }
6326
6327 72
    if ($length === null) {
6328 35
      $length = $str_length;
6329 35
    } else {
6330 60
      $length = (int)$length;
6331
    }
6332
6333 View Code Duplication
    if (
6334
        $encoding === 'UTF-8'
6335 72
        ||
6336 23
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6337 72
    ) {
6338 51
      $encoding = 'UTF-8';
6339 51
    } else {
6340 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6341
    }
6342
6343 72
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6344
      self::checkForSupport();
6345
    }
6346
6347
    if (
6348
        $encoding === 'CP850'
6349 72
        &&
6350 21
        self::$SUPPORT['mbstring_func_overload'] === false
6351 72
    ) {
6352 21
      return substr($str, $offset, $length === null ? $str_length : $length);
6353
    }
6354
6355
    if (
6356
        $encoding !== 'UTF-8'
6357 51
        &&
6358 1
        self::$SUPPORT['mbstring'] === false
6359 51
    ) {
6360
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6361
    }
6362
6363 51
    if (self::$SUPPORT['mbstring'] === true) {
6364 51
      return \mb_substr($str, $offset, $length, $encoding);
6365
    }
6366
6367
    if (
6368
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6369
        &&
6370
        self::$SUPPORT['intl'] === true
6371
        &&
6372
        Bootup::is_php('5.4') === true
6373
    ) {
6374
      return \grapheme_substr($str, $offset, $length);
6375
    }
6376
6377
    if (
6378
        $length >= 0 // "iconv_substr()" can't handle negative length
6379
        &&
6380
        self::$SUPPORT['iconv'] === true
6381
    ) {
6382
      return \iconv_substr($str, $offset, $length);
6383
    }
6384
6385
    // fallback via vanilla php
6386
6387
    // split to array, and remove invalid characters
6388
    $array = self::split($str);
6389
6390
    // extract relevant part, and join to make sting again
6391
    return implode('', array_slice($array, $offset, $length));
6392
  }
6393
6394
  /**
6395
   * Binary safe comparison of two strings from an offset, up to length characters.
6396
   *
6397
   * @param string  $str1               <p>The main string being compared.</p>
6398
   * @param string  $str2               <p>The secondary string being compared.</p>
6399
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6400
   *                                    counting from the end of the string.</p>
6401
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6402
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6403
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6404
   *                                    insensitive.</p>
6405
   *
6406
   * @return int <p>
6407
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
6408
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
6409
   *             <strong>0</strong> if they are equal.
6410
   *             </p>
6411
   */
6412 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6413
  {
6414
    if (
6415
        $offset !== 0
6416 1
        ||
6417
        $length !== null
6418 1
    ) {
6419 1
      $str1Tmp = self::substr($str1, $offset, $length);
6420 1
      if ($str1Tmp === false) {
6421
        $str1Tmp = '';
6422
      }
6423 1
      $str1 = (string)$str1Tmp;
6424
6425 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6426 1
      if ($str2Tmp === false) {
6427
        $str2Tmp = '';
6428
      }
6429 1
      $str2 = (string)$str2Tmp;
6430 1
    }
6431
6432 1
    if ($case_insensitivity === true) {
6433 1
      return self::strcasecmp($str1, $str2);
6434
    }
6435
6436 1
    return self::strcmp($str1, $str2);
6437
  }
6438
6439
  /**
6440
   * Count the number of substring occurrences.
6441
   *
6442
   * @link  http://php.net/manual/en/function.substr-count.php
6443
   *
6444
   * @param string  $haystack  <p>The string to search in.</p>
6445
   * @param string  $needle    <p>The substring to search for.</p>
6446
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6447
   * @param int     $length    [optional] <p>
6448
   *                           The maximum length after the specified offset to search for the
6449
   *                           substring. It outputs a warning if the offset plus the length is
6450
   *                           greater than the haystack length.
6451
   *                           </p>
6452
   * @param string  $encoding  <p>Set the charset.</p>
6453
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6454
   *
6455
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6456
   */
6457 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6458
  {
6459
    // init
6460 1
    $haystack = (string)$haystack;
6461 1
    $needle = (string)$needle;
6462
6463 1
    if (!isset($haystack[0], $needle[0])) {
6464 1
      return false;
6465
    }
6466
6467 1
    if ($offset || $length !== null) {
6468
6469 1
      if ($length === null) {
6470 1
        $length = (int)self::strlen($haystack);
6471 1
      }
6472
6473 1
      $offset = (int)$offset;
6474 1
      $length = (int)$length;
6475
6476
      if (
6477
          (
6478
            $length !== 0
6479 1
            &&
6480
            $offset !== 0
6481 1
          )
6482 1
          &&
6483 1
          $length + $offset <= 0
6484 1
          &&
6485 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6486 1
      ) {
6487 1
        return false;
6488
      }
6489
6490 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6491 1
      if ($haystackTmp === false) {
6492
        $haystackTmp = '';
6493
      }
6494 1
      $haystack = (string)$haystackTmp;
6495 1
    }
6496
6497 1
    if ($encoding !== 'UTF-8') {
6498 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6499 1
    }
6500
6501 1
    if ($cleanUtf8 === true) {
6502
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6503
      // if invalid characters are found in $haystack before $needle
6504
      $needle = self::clean($needle);
6505
      $haystack = self::clean($haystack);
6506
    }
6507
6508 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6509
      self::checkForSupport();
6510
    }
6511
6512
    if (
6513
        $encoding !== 'UTF-8'
6514 1
        &&
6515 1
        self::$SUPPORT['mbstring'] === false
6516 1
    ) {
6517
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6518
    }
6519
6520 1
    if (self::$SUPPORT['mbstring'] === true) {
6521 1
      return \mb_substr_count($haystack, $needle, $encoding);
6522
    }
6523
6524
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6525
6526
    return count($matches);
6527
  }
6528
6529
  /**
6530
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6531
   *
6532
   * @param string $haystack <p>The string to search in.</p>
6533
   * @param string $needle   <p>The substring to search for.</p>
6534
   *
6535
   * @return string <p>Return the sub-string.</p>
6536
   */
6537 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6538
  {
6539
    // init
6540 1
    $haystack = (string)$haystack;
6541 1
    $needle = (string)$needle;
6542
6543 1
    if (!isset($haystack[0])) {
6544 1
      return '';
6545
    }
6546
6547 1
    if (!isset($needle[0])) {
6548 1
      return $haystack;
6549
    }
6550
6551 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6552 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6553 1
      if ($haystackTmp === false) {
6554
        $haystackTmp = '';
6555
      }
6556 1
      $haystack = (string)$haystackTmp;
6557 1
    }
6558
6559 1
    return $haystack;
6560
  }
6561
6562
  /**
6563
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6564
   *
6565
   * @param string $haystack <p>The string to search in.</p>
6566
   * @param string $needle   <p>The substring to search for.</p>
6567
   *
6568
   * @return string <p>Return the sub-string.</p>
6569
   */
6570 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6571
  {
6572
    // init
6573 1
    $haystack = (string)$haystack;
6574 1
    $needle = (string)$needle;
6575
6576 1
    if (!isset($haystack[0])) {
6577 1
      return '';
6578
    }
6579
6580 1
    if (!isset($needle[0])) {
6581 1
      return $haystack;
6582
    }
6583
6584 1
    if (self::str_iends_with($haystack, $needle) === true) {
6585 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6586 1
      if ($haystackTmp === false) {
6587
        $haystackTmp = '';
6588
      }
6589 1
      $haystack = (string)$haystackTmp;
6590 1
    }
6591
6592 1
    return $haystack;
6593
  }
6594
6595
  /**
6596
   * Removes an prefix ($needle) from start of the string ($haystack).
6597
   *
6598
   * @param string $haystack <p>The string to search in.</p>
6599
   * @param string $needle   <p>The substring to search for.</p>
6600
   *
6601
   * @return string <p>Return the sub-string.</p>
6602
   */
6603 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6604
  {
6605
    // init
6606 1
    $haystack = (string)$haystack;
6607 1
    $needle = (string)$needle;
6608
6609 1
    if (!isset($haystack[0])) {
6610 1
      return '';
6611
    }
6612
6613 1
    if (!isset($needle[0])) {
6614 1
      return $haystack;
6615
    }
6616
6617 1
    if (self::str_starts_with($haystack, $needle) === true) {
6618 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6619 1
      if ($haystackTmp === false) {
6620
        $haystackTmp = '';
6621
      }
6622 1
      $haystack = (string)$haystackTmp;
6623 1
    }
6624
6625 1
    return $haystack;
6626
  }
6627
6628
  /**
6629
   * Replace text within a portion of a string.
6630
   *
6631
   * source: https://gist.github.com/stemar/8287074
6632
   *
6633
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6634
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6635
   * @param int|int[]       $offset           <p>
6636
   *                                          If start is positive, the replacing will begin at the start'th offset
6637
   *                                          into string.
6638
   *                                          <br /><br />
6639
   *                                          If start is negative, the replacing will begin at the start'th character
6640
   *                                          from the end of string.
6641
   *                                          </p>
6642
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6643
   *                                          portion of string which is to be replaced. If it is negative, it
6644
   *                                          represents the number of characters from the end of string at which to
6645
   *                                          stop replacing. If it is not given, then it will default to strlen(
6646
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6647
   *                                          length is zero then this function will have the effect of inserting
6648
   *                                          replacement into string at the given start offset.</p>
6649
   *
6650
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6651
   */
6652 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6653
  {
6654 7
    if (is_array($str) === true) {
6655 1
      $num = count($str);
6656
6657
      // the replacement
6658 1
      if (is_array($replacement) === true) {
6659 1
        $replacement = array_slice($replacement, 0, $num);
6660 1
      } else {
6661 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6662
      }
6663
6664
      // the offset
6665 1 View Code Duplication
      if (is_array($offset) === true) {
6666 1
        $offset = array_slice($offset, 0, $num);
6667 1
        foreach ($offset as &$valueTmp) {
6668 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6669 1
        }
6670 1
        unset($valueTmp);
6671 1
      } else {
6672 1
        $offset = array_pad(array($offset), $num, $offset);
6673
      }
6674
6675
      // the length
6676 1
      if (!isset($length)) {
6677 1
        $length = array_fill(0, $num, 0);
6678 1 View Code Duplication
      } elseif (is_array($length) === true) {
6679 1
        $length = array_slice($length, 0, $num);
6680 1
        foreach ($length as &$valueTmpV2) {
6681 1
          if (isset($valueTmpV2)) {
6682 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6683 1
          } else {
6684
            $valueTmpV2 = 0;
6685
          }
6686 1
        }
6687 1
        unset($valueTmpV2);
6688 1
      } else {
6689 1
        $length = array_pad(array($length), $num, $length);
6690
      }
6691
6692
      // recursive call
6693 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
6694
6695
    }
6696
6697 7
    if (is_array($replacement) === true) {
6698 1
      if (count($replacement) > 0) {
6699 1
        $replacement = $replacement[0];
6700 1
      } else {
6701 1
        $replacement = '';
6702
      }
6703 1
    }
6704
6705
    // init
6706 7
    $str = (string)$str;
6707 7
    $replacement = (string)$replacement;
6708
6709 7
    if (!isset($str[0])) {
6710 1
      return $replacement;
6711
    }
6712
6713 6
    preg_match_all('/./us', $str, $smatches);
6714 6
    preg_match_all('/./us', $replacement, $rmatches);
6715
6716 6
    if ($length === null) {
6717 3
      $length = (int)self::strlen($str);
6718 3
    }
6719
6720 6
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6721
6722 6
    return implode('', $smatches[0]);
6723
  }
6724
6725
  /**
6726
   * Removes an suffix ($needle) from end of the string ($haystack).
6727
   *
6728
   * @param string $haystack <p>The string to search in.</p>
6729
   * @param string $needle   <p>The substring to search for.</p>
6730
   *
6731
   * @return string <p>Return the sub-string.</p>
6732
   */
6733 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6734
  {
6735 1
    $haystack = (string)$haystack;
6736 1
    $needle = (string)$needle;
6737
6738 1
    if (!isset($haystack[0])) {
6739 1
      return '';
6740
    }
6741
6742 1
    if (!isset($needle[0])) {
6743 1
      return $haystack;
6744
    }
6745
6746 1
    if (self::str_ends_with($haystack, $needle) === true) {
6747 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6748 1
      if ($haystackTmp === false) {
6749
        $haystackTmp = '';
6750
      }
6751 1
      $haystack = (string)$haystackTmp;
6752 1
    }
6753
6754 1
    return $haystack;
6755
  }
6756
6757
  /**
6758
   * Returns a case swapped version of the string.
6759
   *
6760
   * @param string  $str       <p>The input string.</p>
6761
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6762
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6763
   *
6764
   * @return string <p>Each character's case swapped.</p>
6765
   */
6766 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6767
  {
6768 1
    $str = (string)$str;
6769
6770 1
    if (!isset($str[0])) {
6771 1
      return '';
6772
    }
6773
6774 1
    if ($encoding !== 'UTF-8') {
6775 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6776 1
    }
6777
6778 1
    if ($cleanUtf8 === true) {
6779
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6780
      // if invalid characters are found in $haystack before $needle
6781 1
      $str = self::clean($str);
6782 1
    }
6783
6784 1
    $strSwappedCase = preg_replace_callback(
6785 1
        '/[\S]/u',
6786
        function ($match) use ($encoding) {
6787 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6788
6789 1
          if ($match[0] === $marchToUpper) {
6790 1
            return UTF8::strtolower($match[0], $encoding);
6791
          }
6792
6793 1
          return $marchToUpper;
6794 1
        },
6795
        $str
6796 1
    );
6797
6798 1
    return $strSwappedCase;
6799
  }
6800
6801
  /**
6802
   * alias for "UTF8::to_ascii()"
6803
   *
6804
   * @see UTF8::to_ascii()
6805
   *
6806
   * @param string $s
6807
   * @param string $subst_chr
6808
   * @param bool   $strict
6809
   *
6810
   * @return string
6811
   *
6812
   * @deprecated
6813
   */
6814
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6815
  {
6816
    return self::to_ascii($s, $subst_chr, $strict);
6817
  }
6818
6819
  /**
6820
   * alias for "UTF8::to_iso8859()"
6821
   *
6822
   * @see UTF8::to_iso8859()
6823
   *
6824
   * @param string $str
6825
   *
6826
   * @return string|string[]
6827
   *
6828
   * @deprecated
6829
   */
6830
  public static function toIso8859($str)
6831
  {
6832
    return self::to_iso8859($str);
6833
  }
6834
6835
  /**
6836
   * alias for "UTF8::to_latin1()"
6837
   *
6838
   * @see UTF8::to_latin1()
6839
   *
6840
   * @param $str
6841
   *
6842
   * @return string
6843
   *
6844
   * @deprecated
6845
   */
6846
  public static function toLatin1($str)
6847
  {
6848
    return self::to_latin1($str);
6849
  }
6850
6851
  /**
6852
   * alias for "UTF8::to_utf8()"
6853
   *
6854
   * @see UTF8::to_utf8()
6855
   *
6856
   * @param string $str
6857
   *
6858
   * @return string
6859
   *
6860
   * @deprecated
6861
   */
6862
  public static function toUTF8($str)
6863
  {
6864
    return self::to_utf8($str);
6865
  }
6866
6867
  /**
6868
   * Convert a string into ASCII.
6869
   *
6870
   * @param string $str     <p>The input string.</p>
6871
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6872
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6873
   *                        performance</p>
6874
   *
6875
   * @return string
6876
   */
6877 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
6878
  {
6879 21
    static $UTF8_TO_ASCII;
6880
6881
    // init
6882 21
    $str = (string)$str;
6883
6884 21
    if (!isset($str[0])) {
6885 4
      return '';
6886
    }
6887
6888
    // check if we only have ASCII, first (better performance)
6889 18
    if (self::is_ascii($str) === true) {
6890 6
      return $str;
6891
    }
6892
6893 13
    $str = self::clean($str, true, true, true);
6894
6895
    // check again, if we only have ASCII, now ...
6896 13
    if (self::is_ascii($str) === true) {
6897 7
      return $str;
6898
    }
6899
6900 7
    if ($strict === true) {
6901 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6902
        self::checkForSupport();
6903
      }
6904
6905
      if (
6906 1
          self::$SUPPORT['intl'] === true
6907 1
          &&
6908 1
          Bootup::is_php('5.4') === true
6909 1
      ) {
6910
6911
        // HACK for issue from "transliterator_transliterate()"
6912 1
        $str = str_replace(
6913 1
            'ℌ',
6914 1
            'H',
6915
            $str
6916 1
        );
6917
6918 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6919
6920
        // check again, if we only have ASCII, now ...
6921 1
        if (self::is_ascii($str) === true) {
6922 1
          return $str;
6923
        }
6924
6925 1
      }
6926 1
    }
6927
6928 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6929 7
    $chars = $ar[0];
6930 7
    foreach ($chars as &$c) {
6931
6932 7
      $ordC0 = ord($c[0]);
6933
6934 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6935 7
        continue;
6936
      }
6937
6938 7
      $ordC1 = ord($c[1]);
6939
6940
      // ASCII - next please
6941 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6942 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6943 7
      }
6944
6945 7
      if ($ordC0 >= 224) {
6946 2
        $ordC2 = ord($c[2]);
6947
6948 2
        if ($ordC0 <= 239) {
6949 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
6950 2
        }
6951
6952 2
        if ($ordC0 >= 240) {
6953 1
          $ordC3 = ord($c[3]);
6954
6955 1
          if ($ordC0 <= 247) {
6956 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
6957 1
          }
6958
6959 1
          if ($ordC0 >= 248) {
6960
            $ordC4 = ord($c[4]);
6961
6962 View Code Duplication
            if ($ordC0 <= 251) {
6963
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
6964
            }
6965
6966
            if ($ordC0 >= 252) {
6967
              $ordC5 = ord($c[5]);
6968
6969 View Code Duplication
              if ($ordC0 <= 253) {
6970
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
6971
              }
6972
            }
6973
          }
6974 1
        }
6975 2
      }
6976
6977 7
      if ($ordC0 === 254 || $ordC0 === 255) {
6978
        $c = $unknown;
6979
        continue;
6980
      }
6981
6982 7
      if (!isset($ord)) {
6983
        $c = $unknown;
6984
        continue;
6985
      }
6986
6987 7
      $bank = $ord >> 8;
6988 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
6989 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
6990 3
        if ($UTF8_TO_ASCII[$bank] === false) {
6991 1
          $UTF8_TO_ASCII[$bank] = array();
6992 1
        }
6993 3
      }
6994
6995 7
      $newchar = $ord & 255;
6996
6997 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
6998
6999
        // keep for debugging
7000
        /*
7001
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7002
        echo "char: " . $c . "\n";
7003
        echo "ord: " . $ord . "\n";
7004
        echo "newchar: " . $newchar . "\n";
7005
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7006
        echo "bank:" . $bank . "\n\n";
7007
        */
7008
7009 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7010 7
      } else {
7011
7012
        // keep for debugging missing chars
7013
        /*
7014
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7015
        echo "char: " . $c . "\n";
7016
        echo "ord: " . $ord . "\n";
7017
        echo "newchar: " . $newchar . "\n";
7018
        echo "bank:" . $bank . "\n\n";
7019
        */
7020
7021 1
        $c = $unknown;
7022
      }
7023 7
    }
7024
7025 7
    return implode('', $chars);
7026
  }
7027
7028
  /**
7029
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7030
   *
7031
   * @param string|string[] $str
7032
   *
7033
   * @return string|string[]
7034
   */
7035 3
  public static function to_iso8859($str)
7036
  {
7037 3
    if (is_array($str) === true) {
7038
7039
      /** @noinspection ForeachSourceInspection */
7040 1
      foreach ($str as $k => $v) {
7041
        /** @noinspection AlterInForeachInspection */
7042
        /** @noinspection OffsetOperationsInspection */
7043 1
        $str[$k] = self::to_iso8859($v);
7044 1
      }
7045
7046 1
      return $str;
7047
    }
7048
7049 3
    $str = (string)$str;
7050
7051 3
    if (!isset($str[0])) {
7052 1
      return '';
7053
    }
7054
7055 3
    return self::utf8_decode($str);
7056
  }
7057
7058
  /**
7059
   * alias for "UTF8::to_iso8859()"
7060
   *
7061
   * @see UTF8::to_iso8859()
7062
   *
7063
   * @param string|string[] $str
7064
   *
7065
   * @return string|string[]
7066
   */
7067 1
  public static function to_latin1($str)
7068
  {
7069 1
    return self::to_iso8859($str);
7070
  }
7071
7072
  /**
7073
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7074
   *
7075
   * <ul>
7076
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7077
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
7078
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7079
   * case.</li>
7080
   * </ul>
7081
   *
7082
   * @param string|string[] $str                    <p>Any string or array.</p>
7083
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7084
   *
7085
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7086
   */
7087 22
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7088
  {
7089 22
    if (is_array($str) === true) {
7090
      /** @noinspection ForeachSourceInspection */
7091 2
      foreach ($str as $k => $v) {
7092
        /** @noinspection AlterInForeachInspection */
7093
        /** @noinspection OffsetOperationsInspection */
7094 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7095 2
      }
7096
7097 2
      return $str;
7098
    }
7099
7100 22
    $str = (string)$str;
7101
7102 22
    if (!isset($str[0])) {
7103 3
      return $str;
7104
    }
7105
7106 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7107
      self::checkForSupport();
7108
    }
7109
7110 22 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7111
      $max = \mb_strlen($str, '8BIT');
7112
    } else {
7113 22
      $max = strlen($str);
7114
    }
7115
7116 22
    $buf = '';
7117
7118
    /** @noinspection ForeachInvariantsInspection */
7119 22
    for ($i = 0; $i < $max; $i++) {
7120 22
      $c1 = $str[$i];
7121
7122 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7123
7124 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7125
7126 19
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7127
7128 19
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7129 17
            $buf .= $c1 . $c2;
7130 17
            $i++;
7131 17
          } else { // not valid UTF8 - convert it
7132 8
            $buf .= self::to_utf8_convert($c1);
7133
          }
7134
7135 22
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7136
7137 21
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7138 21
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7139
7140 21
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7141 15
            $buf .= $c1 . $c2 . $c3;
7142 15
            $i += 2;
7143 15
          } else { // not valid UTF8 - convert it
7144 11
            $buf .= self::to_utf8_convert($c1);
7145
          }
7146
7147 22
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7148
7149 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7150 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7151 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7152
7153 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7154 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7155 5
            $i += 3;
7156 5
          } else { // not valid UTF8 - convert it
7157 9
            $buf .= self::to_utf8_convert($c1);
7158
          }
7159
7160 12
        } else { // doesn't look like UTF8, but should be converted
7161 9
          $buf .= self::to_utf8_convert($c1);
7162
        }
7163
7164 22
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7165
7166 5
        $buf .= self::to_utf8_convert($c1);
7167
7168 5
      } else { // it doesn't need conversion
7169 19
        $buf .= $c1;
7170
      }
7171 22
    }
7172
7173
    // decode unicode escape sequences
7174 22
    $buf = preg_replace_callback(
7175 22
        '/\\\\u([0-9a-f]{4})/i',
7176 22
        function ($match) {
7177 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7178 22
        },
7179
        $buf
7180 22
    );
7181
7182
    // decode UTF-8 codepoints
7183 22
    if ($decodeHtmlEntityToUtf8 === true) {
7184 1
      $buf = self::html_entity_decode($buf);
7185 1
    }
7186
7187 22
    return $buf;
7188
  }
7189
7190
  /**
7191
   * @param int $int
7192
   *
7193
   * @return string
7194
   */
7195 16
  private static function to_utf8_convert($int)
7196
  {
7197 16
    $buf = '';
7198
7199 16
    $ordC1 = ord($int);
7200 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7201 2
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7202 2
    } else {
7203 16
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7204 16
      $cc2 = ($int & "\x3F") | "\x80";
7205 16
      $buf .= $cc1 . $cc2;
7206
    }
7207
7208 16
    return $buf;
7209
  }
7210
7211
  /**
7212
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7213
   *
7214
   * INFO: This is slower then "trim()"
7215
   *
7216
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7217
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7218
   *
7219
   * @param string $str   <p>The string to be trimmed</p>
7220
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7221
   *
7222
   * @return string <p>The trimmed string.</p>
7223
   */
7224 26
  public static function trim($str = '', $chars = INF)
7225
  {
7226 26
    $str = (string)$str;
7227
7228 26
    if (!isset($str[0])) {
7229 5
      return '';
7230
    }
7231
7232
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7233 22
    if ($chars === INF || !$chars) {
7234 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7235
    }
7236
7237 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7238
  }
7239
7240
  /**
7241
   * Makes string's first char uppercase.
7242
   *
7243
   * @param string  $str       <p>The input string.</p>
7244
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7245
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7246
   *
7247
   * @return string <p>The resulting string</p>
7248
   */
7249 14 View Code Duplication
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7250
  {
7251 14
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
7252 14
    if ($strPartTwo === false) {
7253
      $strPartTwo = '';
7254
    }
7255
7256 14
    $strPartOne = self::strtoupper(
7257 14
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
7258 14
        $encoding,
7259
        $cleanUtf8
7260 14
    );
7261
7262 14
    return $strPartOne . $strPartTwo;
7263
  }
7264
7265
  /**
7266
   * alias for "UTF8::ucfirst()"
7267
   *
7268
   * @see UTF8::ucfirst()
7269
   *
7270
   * @param string  $word
7271
   * @param string  $encoding
7272
   * @param boolean $cleanUtf8
7273
   *
7274
   * @return string
7275
   */
7276 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7277
  {
7278 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7279
  }
7280
7281
  /**
7282
   * Uppercase for all words in the string.
7283
   *
7284
   * @param string   $str        <p>The input string.</p>
7285
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7286
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7287
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7288
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7289
   *
7290
   * @return string
7291
   */
7292 8 View Code Duplication
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7293
  {
7294 8
    if (!$str) {
7295 2
      return '';
7296
    }
7297
7298 7
    $words = self::str_to_words($str, $charlist);
7299 7
    $newWords = array();
7300
7301 7
    if (count($exceptions) > 0) {
7302 1
      $useExceptions = true;
7303 1
    } else {
7304 7
      $useExceptions = false;
7305
    }
7306
7307 7
    foreach ($words as $word) {
7308
7309 7
      if (!$word) {
7310 7
        continue;
7311
      }
7312
7313
      if (
7314
          $useExceptions === false
7315 7
          ||
7316
          (
7317
              $useExceptions === true
7318 1
              &&
7319 1
              !in_array($word, $exceptions, true)
7320 1
          )
7321 7
      ) {
7322 7
        $word = self::ucfirst($word, $encoding, $cleanUtf8);
7323 7
      }
7324
7325 7
      $newWords[] = $word;
7326 7
    }
7327
7328 7
    return implode('', $newWords);
7329
  }
7330
7331
  /**
7332
   * Multi decode html entity & fix urlencoded-win1252-chars.
7333
   *
7334
   * e.g:
7335
   * 'test+test'                     => 'test test'
7336
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7337
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7338
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7339
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7340
   * 'Düsseldorf'                   => 'Düsseldorf'
7341
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7342
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7343
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7344
   *
7345
   * @param string $str          <p>The input string.</p>
7346
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7347
   *
7348
   * @return string
7349
   */
7350 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7351
  {
7352 1
    $str = (string)$str;
7353
7354 1
    if (!isset($str[0])) {
7355 1
      return '';
7356
    }
7357
7358 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7359 1
    if (preg_match($pattern, $str)) {
7360 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7361 1
    }
7362
7363 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7364
7365
    do {
7366 1
      $str_compare = $str;
7367
7368 1
      $str = self::fix_simple_utf8(
7369 1
          urldecode(
7370 1
              self::html_entity_decode(
7371 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7372
                  $flags
7373 1
              )
7374 1
          )
7375 1
      );
7376
7377 1
    } while ($multi_decode === true && $str_compare !== $str);
7378
7379 1
    return (string)$str;
7380
  }
7381
7382
  /**
7383
   * Return a array with "urlencoded"-win1252 -> UTF-8
7384
   *
7385
   * @deprecated use the "UTF8::urldecode()" function to decode a string
7386
   *
7387
   * @return array
7388
   */
7389
  public static function urldecode_fix_win1252_chars()
7390
  {
7391
    return array(
7392
        '%20' => ' ',
7393
        '%21' => '!',
7394
        '%22' => '"',
7395
        '%23' => '#',
7396
        '%24' => '$',
7397
        '%25' => '%',
7398
        '%26' => '&',
7399
        '%27' => "'",
7400
        '%28' => '(',
7401
        '%29' => ')',
7402
        '%2A' => '*',
7403
        '%2B' => '+',
7404
        '%2C' => ',',
7405
        '%2D' => '-',
7406
        '%2E' => '.',
7407
        '%2F' => '/',
7408
        '%30' => '0',
7409
        '%31' => '1',
7410
        '%32' => '2',
7411
        '%33' => '3',
7412
        '%34' => '4',
7413
        '%35' => '5',
7414
        '%36' => '6',
7415
        '%37' => '7',
7416
        '%38' => '8',
7417
        '%39' => '9',
7418
        '%3A' => ':',
7419
        '%3B' => ';',
7420
        '%3C' => '<',
7421
        '%3D' => '=',
7422
        '%3E' => '>',
7423
        '%3F' => '?',
7424
        '%40' => '@',
7425
        '%41' => 'A',
7426
        '%42' => 'B',
7427
        '%43' => 'C',
7428
        '%44' => 'D',
7429
        '%45' => 'E',
7430
        '%46' => 'F',
7431
        '%47' => 'G',
7432
        '%48' => 'H',
7433
        '%49' => 'I',
7434
        '%4A' => 'J',
7435
        '%4B' => 'K',
7436
        '%4C' => 'L',
7437
        '%4D' => 'M',
7438
        '%4E' => 'N',
7439
        '%4F' => 'O',
7440
        '%50' => 'P',
7441
        '%51' => 'Q',
7442
        '%52' => 'R',
7443
        '%53' => 'S',
7444
        '%54' => 'T',
7445
        '%55' => 'U',
7446
        '%56' => 'V',
7447
        '%57' => 'W',
7448
        '%58' => 'X',
7449
        '%59' => 'Y',
7450
        '%5A' => 'Z',
7451
        '%5B' => '[',
7452
        '%5C' => '\\',
7453
        '%5D' => ']',
7454
        '%5E' => '^',
7455
        '%5F' => '_',
7456
        '%60' => '`',
7457
        '%61' => 'a',
7458
        '%62' => 'b',
7459
        '%63' => 'c',
7460
        '%64' => 'd',
7461
        '%65' => 'e',
7462
        '%66' => 'f',
7463
        '%67' => 'g',
7464
        '%68' => 'h',
7465
        '%69' => 'i',
7466
        '%6A' => 'j',
7467
        '%6B' => 'k',
7468
        '%6C' => 'l',
7469
        '%6D' => 'm',
7470
        '%6E' => 'n',
7471
        '%6F' => 'o',
7472
        '%70' => 'p',
7473
        '%71' => 'q',
7474
        '%72' => 'r',
7475
        '%73' => 's',
7476
        '%74' => 't',
7477
        '%75' => 'u',
7478
        '%76' => 'v',
7479
        '%77' => 'w',
7480
        '%78' => 'x',
7481
        '%79' => 'y',
7482
        '%7A' => 'z',
7483
        '%7B' => '{',
7484
        '%7C' => '|',
7485
        '%7D' => '}',
7486
        '%7E' => '~',
7487
        '%7F' => '',
7488
        '%80' => '`',
7489
        '%81' => '',
7490
        '%82' => '‚',
7491
        '%83' => 'ƒ',
7492
        '%84' => '„',
7493
        '%85' => '…',
7494
        '%86' => '†',
7495
        '%87' => '‡',
7496
        '%88' => 'ˆ',
7497
        '%89' => '‰',
7498
        '%8A' => 'Š',
7499
        '%8B' => '‹',
7500
        '%8C' => 'Œ',
7501
        '%8D' => '',
7502
        '%8E' => 'Ž',
7503
        '%8F' => '',
7504
        '%90' => '',
7505
        '%91' => '‘',
7506
        '%92' => '’',
7507
        '%93' => '“',
7508
        '%94' => '”',
7509
        '%95' => '•',
7510
        '%96' => '–',
7511
        '%97' => '—',
7512
        '%98' => '˜',
7513
        '%99' => '™',
7514
        '%9A' => 'š',
7515
        '%9B' => '›',
7516
        '%9C' => 'œ',
7517
        '%9D' => '',
7518
        '%9E' => 'ž',
7519
        '%9F' => 'Ÿ',
7520
        '%A0' => '',
7521
        '%A1' => '¡',
7522
        '%A2' => '¢',
7523
        '%A3' => '£',
7524
        '%A4' => '¤',
7525
        '%A5' => '¥',
7526
        '%A6' => '¦',
7527
        '%A7' => '§',
7528
        '%A8' => '¨',
7529
        '%A9' => '©',
7530
        '%AA' => 'ª',
7531
        '%AB' => '«',
7532
        '%AC' => '¬',
7533
        '%AD' => '',
7534
        '%AE' => '®',
7535
        '%AF' => '¯',
7536
        '%B0' => '°',
7537
        '%B1' => '±',
7538
        '%B2' => '²',
7539
        '%B3' => '³',
7540
        '%B4' => '´',
7541
        '%B5' => 'µ',
7542
        '%B6' => '¶',
7543
        '%B7' => '·',
7544
        '%B8' => '¸',
7545
        '%B9' => '¹',
7546
        '%BA' => 'º',
7547
        '%BB' => '»',
7548
        '%BC' => '¼',
7549
        '%BD' => '½',
7550
        '%BE' => '¾',
7551
        '%BF' => '¿',
7552
        '%C0' => 'À',
7553
        '%C1' => 'Á',
7554
        '%C2' => 'Â',
7555
        '%C3' => 'Ã',
7556
        '%C4' => 'Ä',
7557
        '%C5' => 'Å',
7558
        '%C6' => 'Æ',
7559
        '%C7' => 'Ç',
7560
        '%C8' => 'È',
7561
        '%C9' => 'É',
7562
        '%CA' => 'Ê',
7563
        '%CB' => 'Ë',
7564
        '%CC' => 'Ì',
7565
        '%CD' => 'Í',
7566
        '%CE' => 'Î',
7567
        '%CF' => 'Ï',
7568
        '%D0' => 'Ð',
7569
        '%D1' => 'Ñ',
7570
        '%D2' => 'Ò',
7571
        '%D3' => 'Ó',
7572
        '%D4' => 'Ô',
7573
        '%D5' => 'Õ',
7574
        '%D6' => 'Ö',
7575
        '%D7' => '×',
7576
        '%D8' => 'Ø',
7577
        '%D9' => 'Ù',
7578
        '%DA' => 'Ú',
7579
        '%DB' => 'Û',
7580
        '%DC' => 'Ü',
7581
        '%DD' => 'Ý',
7582
        '%DE' => 'Þ',
7583
        '%DF' => 'ß',
7584
        '%E0' => 'à',
7585
        '%E1' => 'á',
7586
        '%E2' => 'â',
7587
        '%E3' => 'ã',
7588
        '%E4' => 'ä',
7589
        '%E5' => 'å',
7590
        '%E6' => 'æ',
7591
        '%E7' => 'ç',
7592
        '%E8' => 'è',
7593
        '%E9' => 'é',
7594
        '%EA' => 'ê',
7595
        '%EB' => 'ë',
7596
        '%EC' => 'ì',
7597
        '%ED' => 'í',
7598
        '%EE' => 'î',
7599
        '%EF' => 'ï',
7600
        '%F0' => 'ð',
7601
        '%F1' => 'ñ',
7602
        '%F2' => 'ò',
7603
        '%F3' => 'ó',
7604
        '%F4' => 'ô',
7605
        '%F5' => 'õ',
7606
        '%F6' => 'ö',
7607
        '%F7' => '÷',
7608
        '%F8' => 'ø',
7609
        '%F9' => 'ù',
7610
        '%FA' => 'ú',
7611
        '%FB' => 'û',
7612
        '%FC' => 'ü',
7613
        '%FD' => 'ý',
7614
        '%FE' => 'þ',
7615
        '%FF' => 'ÿ',
7616
    );
7617
  }
7618
7619
  /**
7620
   * Decodes an UTF-8 string to ISO-8859-1.
7621
   *
7622
   * @param string $str <p>The input string.</p>
7623
   *
7624
   * @return string
7625
   */
7626 6
  public static function utf8_decode($str)
7627
  {
7628
    // init
7629 6
    $str = (string)$str;
7630
7631 6
    if (!isset($str[0])) {
7632 3
      return '';
7633
    }
7634
7635 6
    $str = (string)self::to_utf8($str);
7636
7637 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7638 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7639
7640 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7641 1
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7642 1
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7643 1
    }
7644
7645
    /** @noinspection PhpInternalEntityUsedInspection */
7646 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7647
7648 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7649
      self::checkForSupport();
7650
    }
7651
7652 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7653
      $len = \mb_strlen($str, '8BIT');
7654
    } else {
7655 6
      $len = strlen($str);
7656
    }
7657
7658
    /** @noinspection ForeachInvariantsInspection */
7659 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7660 6
      switch ($str[$i] & "\xF0") {
7661 6
        case "\xC0":
7662 6
        case "\xD0":
7663 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7664 6
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7665 6
          break;
7666
7667
        /** @noinspection PhpMissingBreakStatementInspection */
7668 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7669
          ++$i;
7670 6
        case "\xE0":
7671 4
          $str[$j] = '?';
7672 4
          $i += 2;
7673 4
          break;
7674
7675 6
        default:
7676 6
          $str[$j] = $str[$i];
7677 6
      }
7678 6
    }
7679
7680 6
    return (string)self::substr($str, 0, $j, '8BIT');
7681
  }
7682
7683
  /**
7684
   * Encodes an ISO-8859-1 string to UTF-8.
7685
   *
7686
   * @param string $str <p>The input string.</p>
7687
   *
7688
   * @return string
7689
   */
7690 7
  public static function utf8_encode($str)
7691
  {
7692
    // init
7693 7
    $str = (string)$str;
7694
7695 7
    if (!isset($str[0])) {
7696 7
      return '';
7697
    }
7698
7699 7
    $strTmp = \utf8_encode($str);
7700 7
    if ($strTmp === false) {
7701
      return '';
7702
    }
7703
7704 7
    $str = (string)$strTmp;
7705 7
    if (false === strpos($str, "\xC2")) {
7706 3
      return $str;
7707
    }
7708
7709 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7710 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7711
7712 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7713 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7714 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7715 1
    }
7716
7717 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7718
  }
7719
7720
  /**
7721
   * fix -> utf8-win1252 chars
7722
   *
7723
   * @param string $str <p>The input string.</p>
7724
   *
7725
   * @return string
7726
   *
7727
   * @deprecated use "UTF8::fix_simple_utf8()"
7728
   */
7729
  public static function utf8_fix_win1252_chars($str)
7730
  {
7731
    return self::fix_simple_utf8($str);
7732
  }
7733
7734
  /**
7735
   * Returns an array with all utf8 whitespace characters.
7736
   *
7737
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7738
   *
7739
   * @author: Derek E. [email protected]
7740
   *
7741
   * @return array <p>
7742
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7743
   *               as defined in above URL.
7744
   *               </p>
7745
   */
7746 1
  public static function whitespace_table()
7747
  {
7748 1
    return self::$WHITESPACE_TABLE;
7749
  }
7750
7751
  /**
7752
   * Limit the number of words in a string.
7753
   *
7754
   * @param string $str      <p>The input string.</p>
7755
   * @param int    $limit    <p>The limit of words as integer.</p>
7756
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7757
   *
7758
   * @return string
7759
   */
7760 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7761
  {
7762 1
    $str = (string)$str;
7763
7764 1
    if (!isset($str[0])) {
7765 1
      return '';
7766
    }
7767
7768
    // init
7769 1
    $limit = (int)$limit;
7770
7771 1
    if ($limit < 1) {
7772 1
      return '';
7773
    }
7774
7775 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7776
7777
    if (
7778 1
        !isset($matches[0])
7779 1
        ||
7780 1
        self::strlen($str) === self::strlen($matches[0])
7781 1
    ) {
7782 1
      return $str;
7783
    }
7784
7785 1
    return self::rtrim($matches[0]) . $strAddOn;
7786
  }
7787
7788
  /**
7789
   * Wraps a string to a given number of characters
7790
   *
7791
   * @link  http://php.net/manual/en/function.wordwrap.php
7792
   *
7793
   * @param string $str   <p>The input string.</p>
7794
   * @param int    $width [optional] <p>The column width.</p>
7795
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7796
   * @param bool   $cut   [optional] <p>
7797
   *                      If the cut is set to true, the string is
7798
   *                      always wrapped at or before the specified width. So if you have
7799
   *                      a word that is larger than the given width, it is broken apart.
7800
   *                      </p>
7801
   *
7802
   * @return string <p>The given string wrapped at the specified column.</p>
7803
   */
7804 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7805
  {
7806 10
    $str = (string)$str;
7807 10
    $break = (string)$break;
7808
7809 10
    if (!isset($str[0], $break[0])) {
7810 3
      return '';
7811
    }
7812
7813 8
    $w = '';
7814 8
    $strSplit = explode($break, $str);
7815 8
    $count = count($strSplit);
7816
7817 8
    $chars = array();
7818
    /** @noinspection ForeachInvariantsInspection */
7819 8
    for ($i = 0; $i < $count; ++$i) {
7820
7821 8
      if ($i) {
7822 1
        $chars[] = $break;
7823 1
        $w .= '#';
7824 1
      }
7825
7826 8
      $c = $strSplit[$i];
7827 8
      unset($strSplit[$i]);
7828
7829 8
      foreach (self::split($c) as $c) {
7830 8
        $chars[] = $c;
7831 8
        $w .= ' ' === $c ? ' ' : '?';
7832 8
      }
7833 8
    }
7834
7835 8
    $strReturn = '';
7836 8
    $j = 0;
7837 8
    $b = $i = -1;
7838 8
    $w = wordwrap($w, $width, '#', $cut);
7839
7840 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7841 6
      for (++$i; $i < $b; ++$i) {
7842 6
        $strReturn .= $chars[$j];
7843 6
        unset($chars[$j++]);
7844 6
      }
7845
7846 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7847 3
        unset($chars[$j++]);
7848 3
      }
7849
7850 6
      $strReturn .= $break;
7851 6
    }
7852
7853 8
    return $strReturn . implode('', $chars);
7854
  }
7855
7856
  /**
7857
   * Returns an array of Unicode White Space characters.
7858
   *
7859
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7860
   */
7861 1
  public static function ws()
7862
  {
7863 1
    return self::$WHITESPACE;
7864
  }
7865
7866
}
7867