Completed
Push — master ( 7256af...924fd9 )
by Lars
02:30
created

UTF8::str_transliterate()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1

Importance

Changes 0
Metric Value
dl 0
loc 4
ccs 2
cts 2
cp 1
rs 10
c 0
b 0
f 0
cc 1
eloc 2
nc 1
nop 3
crap 1
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965 10
    // init
966
    static $CHAR_CACHE = array();
967
968
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
969 10
      self::checkForSupport();
970 2
    }
971 10
972
    if ($encoding !== 'UTF-8') {
973
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
974
    }
975
976 10 View Code Duplication
    if (
977 10
        $encoding !== 'UTF-8'
978 1
        &&
979
        $encoding !== 'WINDOWS-1252'
980
        &&
981
        self::$SUPPORT['mbstring'] === false
982 10
    ) {
983 10
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
984 10
    }
985 8
986
    $cacheKey = $code_point . $encoding;
987
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
988 9
      return $CHAR_CACHE[$cacheKey];
989 7
    }
990 9
991 6
    if (self::$SUPPORT['intlChar'] === true) {
992 6
      $str = \IntlChar::chr($code_point);
993 7
994 7
      if ($encoding !== 'UTF-8') {
995 7
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
996 7
      }
997 7
998 1
      $CHAR_CACHE[$cacheKey] = $str;
999 1
      return $str;
1000 1
    }
1001 1
1002
    // check type of code_point, only if there is no support for "\IntlChar"
1003
    if ((int)$code_point !== $code_point) {
1004 9
      $CHAR_CACHE[$cacheKey] = null;
1005 1
      return null;
1006 1
    }
1007
1008
    if ($code_point <= 0x7F) {
1009 9
      $str = self::chr_and_parse_int($code_point);
1010
    } elseif ($code_point <= 0x7FF) {
1011 9
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
1012
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1013
    } elseif ($code_point <= 0xFFFF) {
1014
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
1015
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1016
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1017
    } else {
1018
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
1019 26
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1020
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1021 26
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1022
    }
1023
1024
    if ($encoding !== 'UTF-8') {
1025
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1026
    }
1027
1028
    // add into static cache
1029
    $CHAR_CACHE[$cacheKey] = $str;
1030
1031
    return $str;
1032 1
  }
1033
1034 1
  /**
1035
   * @param int $int
1036 1
   *
1037
   * @return string
1038
   */
1039
  private static function chr_and_parse_int($int)
1040
  {
1041
    return chr((int)$int);
1042
  }
1043
1044
  /**
1045
   * Applies callback to all characters of a string.
1046
   *
1047
   * @param string|array $callback <p>The callback function.</p>
1048
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1049
   *
1050
   * @return array <p>The outcome of callback.</p>
1051 4
   */
1052
  public static function chr_map($callback, $str)
1053 4
  {
1054
    $chars = self::split($str);
1055 4
1056 3
    return array_map($callback, $chars);
1057
  }
1058
1059 4
  /**
1060
   * Generates an array of byte length of each character of a Unicode string.
1061 4
   *
1062 4
   * 1 byte => U+0000  - U+007F
1063 4
   * 2 byte => U+0080  - U+07FF
1064 4
   * 3 byte => U+0800  - U+FFFF
1065
   * 4 byte => U+10000 - U+10FFFF
1066
   *
1067
   * @param string $str <p>The original Unicode string.</p>
1068
   *
1069
   * @return array <p>An array of byte lengths of each character.</p>
1070
   */
1071
  public static function chr_size_list($str)
1072
  {
1073
    $str = (string)$str;
1074 2
1075
    if (!isset($str[0])) {
1076 2
      return array();
1077 2
    }
1078 2
1079
    return array_map(
1080 2
        function ($data) {
1081
          return self::strlen($data, '8BIT');
1082 2
        },
1083
        self::split($str)
1084
    );
1085 2
  }
1086
1087 2
  /**
1088 2
   * Get a decimal code representation of a specific character.
1089 2
   *
1090
   * @param string $char <p>The input character.</p>
1091 2
   *
1092 2
   * @return int
1093 2
   */
1094
  public static function chr_to_decimal($char)
1095 1
  {
1096 1
    $char = (string)$char;
1097 1
    $code = self::ord($char[0]);
1098
    $bytes = 1;
1099 2
1100
    if (!($code & 0x80)) {
1101 2
      // 0xxxxxxx
1102 2
      return $code;
1103
    }
1104 2
1105
    if (($code & 0xe0) === 0xc0) {
1106
      // 110xxxxx
1107
      $bytes = 2;
1108
      $code &= ~0xc0;
1109
    } elseif (($code & 0xf0) === 0xe0) {
1110
      // 1110xxxx
1111
      $bytes = 3;
1112
      $code &= ~0xe0;
1113
    } elseif (($code & 0xf8) === 0xf0) {
1114
      // 11110xxx
1115 1
      $bytes = 4;
1116
      $code &= ~0xf0;
1117 1
    }
1118
1119 1
    for ($i = 2; $i <= $bytes; $i++) {
1120 1
      // 10xxxxxx
1121
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1122
    }
1123 1
1124
    return $code;
1125
  }
1126
1127 1
  /**
1128
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1129
   *
1130
   * @param string $char <p>The input character</p>
1131
   * @param string $pfix [optional]
1132
   *
1133
   * @return string <p>The code point encoded as U+xxxx<p>
1134
   */
1135
  public static function chr_to_hex($char, $pfix = 'U+')
1136
  {
1137
    $char = (string)$char;
1138
1139 1
    if (!isset($char[0])) {
1140
      return '';
1141 1
    }
1142
1143
    if ($char === '&#0;') {
1144
      $char = '';
1145
    }
1146
1147
    return self::int_to_hex(self::ord($char), $pfix);
1148
  }
1149
1150
  /**
1151
   * alias for "UTF8::chr_to_decimal()"
1152
   *
1153 1
   * @see UTF8::chr_to_decimal()
1154
   *
1155 1
   * @param string $chr
1156
   *
1157
   * @return int
1158
   */
1159
  public static function chr_to_int($chr)
1160
  {
1161
    return self::chr_to_decimal($chr);
1162
  }
1163
1164
  /**
1165
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1166
   *
1167
   * @param string $body     <p>The original string to be split.</p>
1168
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1169
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1170
   *
1171 56
   * @return string <p>The chunked string</p>
1172
   */
1173
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1174
  {
1175
    return implode($end, self::split($body, $chunklen));
1176
  }
1177
1178
  /**
1179
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1180
   *
1181
   * @param string $str                     <p>The string to be sanitized.</p>
1182
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1183
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1184
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1185
   *                                        => "..."</p>
1186 56
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1187 56
   *                                        $normalize_whitespace</p>
1188
   *
1189 56
   * @return string <p>Clean UTF-8 encoded string.</p>
1190 56
   */
1191
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1192 56
  {
1193 36
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1194 36
    // caused connection reset problem on larger strings
1195
1196 56
    $regx = '/
1197 15
      (
1198 15
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1199
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1200 56
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1201 35
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1202 35
        ){1,100}                      # ...one or more times
1203
      )
1204 56
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1205
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1206
    /x';
1207
    $str = preg_replace($regx, '$1', $str);
1208
1209
    $str = self::replace_diamond_question_mark($str, '');
1210
    $str = self::remove_invisible_characters($str);
1211
1212
    if ($normalize_whitespace === true) {
1213
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1214 21
    }
1215
1216 21
    if ($normalize_msword === true) {
1217
      $str = self::normalize_msword($str);
1218 21
    }
1219 2
1220
    if ($remove_bom === true) {
1221
      $str = self::remove_bom($str);
1222
    }
1223 21
1224
    return $str;
1225
  }
1226
1227
  /**
1228
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1229
   *
1230 21
   * @param string $str <p>The input string.</p>
1231
   *
1232 21
   * @return string
1233
   */
1234 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236
    $str = (string)$str;
1237
1238
    if (!isset($str[0])) {
1239
      return '';
1240
    }
1241
1242
    // fixed ISO <-> UTF-8 Errors
1243
    $str = self::fix_simple_utf8($str);
1244
1245
    // remove all none UTF-8 symbols
1246 7
    // && remove diamond question mark (�)
1247
    // && remove remove invisible characters (e.g. "\0")
1248 7
    // && remove BOM
1249 7
    // && normalize whitespace chars (but keep non-breaking-spaces)
1250 7
    $str = self::clean($str, true, true, false, true);
1251
1252 7
    return (string)$str;
1253
  }
1254 7
1255 7
  /**
1256 7
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1257
   *
1258 7
   * INFO: opposite to UTF8::string()
1259
   *
1260 7
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1261 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1262
   *                                    default, code points will be returned as integers.</p>
1263 1
   *
1264 1
   * @return array <p>The array of code points.</p>
1265 1
   */
1266
  public static function codepoints($arg, $u_style = false)
1267 1
  {
1268 1
    if (is_string($arg) === true) {
1269
      $arg = self::split($arg);
1270 7
    }
1271
1272
    $arg = array_map(
1273
        array(
1274
            '\\voku\\helper\\UTF8',
1275
            'ord',
1276
        ),
1277
        $arg
1278
    );
1279
1280
    if ($u_style) {
1281
      $arg = array_map(
1282 7
          array(
1283
              '\\voku\\helper\\UTF8',
1284 7
              'int_to_hex',
1285
          ),
1286
          $arg
1287
      );
1288
    }
1289
1290
    return $arg;
1291
  }
1292
1293
  /**
1294 5
   * Returns count of characters used in a string.
1295
   *
1296 5
   * @param string $str       <p>The input string.</p>
1297
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1298
   *
1299 5
   * @return array <p>An associative array of Character as keys and
1300
   *               their count as values.</p>
1301
   */
1302 5
  public static function count_chars($str, $cleanUtf8 = false)
1303
  {
1304
    return array_count_values(self::split($str, 1, $cleanUtf8));
1305
  }
1306
1307
  /**
1308
   * Converts a int-value into an UTF-8 character.
1309
   *
1310
   * @param mixed $int
1311
   *
1312
   * @return string
1313
   */
1314
  public static function decimal_to_chr($int)
1315
  {
1316
    if (Bootup::is_php('5.4') === true) {
1317
      $flags = ENT_QUOTES | ENT_HTML5;
1318 11
    } else {
1319
      $flags = ENT_QUOTES;
1320 11
    }
1321 11
1322
    return self::html_entity_decode('&#' . $int . ';', $flags);
1323 11
  }
1324 5
1325
  /**
1326
   * Encode a string with a new charset-encoding.
1327 11
   *
1328 2
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1329 2
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1330
   *
1331 11
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1332
   * @param string $str      <p>The input string</p>
1333
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1334
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1335 11
   *
1336
   * @return string
1337
   */
1338
  public static function encode($encoding, $str, $force = true)
1339 11
  {
1340
    $str = (string)$str;
1341
    $encoding = (string)$encoding;
1342 11
1343
    if (!isset($str[0], $encoding[0])) {
1344 3
      return $str;
1345 11
    }
1346
1347
    if ($encoding !== 'UTF-8') {
1348
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1349 11
    }
1350
1351
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1352 11
      self::checkForSupport();
1353 2
    }
1354 2
1355 2
    $encodingDetected = self::str_detect_encoding($str);
1356 11
1357 11
    if (
1358
        $encodingDetected !== false
1359
        &&
1360
        (
1361
            $force === true
1362 3
            ||
1363
            $encodingDetected !== $encoding
1364
        )
1365 2
    ) {
1366 1
1367 1 View Code Duplication
      if (
1368 3
          $encoding === 'UTF-8'
1369 2
          &&
1370
          (
1371
              $force === true
1372
              || $encodingDetected === 'UTF-8'
1373
              || $encodingDetected === 'WINDOWS-1252'
1374 2
              || $encodingDetected === 'ISO-8859-1'
1375
          )
1376 2
      ) {
1377 1
        return self::to_utf8($str);
1378 2
      }
1379
1380 View Code Duplication
      if (
1381
          $encoding === 'ISO-8859-1'
1382 2
          &&
1383 2
          (
1384 2
              $force === true
1385
              || $encodingDetected === 'ISO-8859-1'
1386 2
              || $encodingDetected === 'WINDOWS-1252'
1387
              || $encodingDetected === 'UTF-8'
1388 2
          )
1389 2
      ) {
1390
        return self::to_iso8859($str);
1391
      }
1392
1393 1 View Code Duplication
      if (
1394
          $encoding !== 'UTF-8'
1395
          &&
1396
          $encoding !== 'WINDOWS-1252'
1397
          &&
1398
          self::$SUPPORT['mbstring'] === false
1399
      ) {
1400
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1401
      }
1402
1403
      $strEncoded = \mb_convert_encoding(
1404
          $str,
1405
          $encoding,
1406
          $encodingDetected
1407
      );
1408
1409
      if ($strEncoded) {
1410
        return $strEncoded;
1411
      }
1412
    }
1413
1414
    return $str;
1415
  }
1416
1417
  /**
1418
   * Reads entire file into a string.
1419
   *
1420
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1421
   *
1422
   * @link http://php.net/manual/en/function.file-get-contents.php
1423
   *
1424
   * @param string        $filename      <p>
1425
   *                                     Name of the file to read.
1426
   *                                     </p>
1427
   * @param int|false     $flags         [optional] <p>
1428
   *                                     Prior to PHP 6, this parameter is called
1429
   *                                     use_include_path and is a bool.
1430
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1431
   *                                     to trigger include path
1432
   *                                     search.
1433
   *                                     </p>
1434
   *                                     <p>
1435
   *                                     The value of flags can be any combination of
1436
   *                                     the following flags (with some restrictions), joined with the
1437
   *                                     binary OR (|)
1438
   *                                     operator.
1439
   *                                     </p>
1440
   *                                     <p>
1441
   *                                     <table>
1442
   *                                     Available flags
1443
   *                                     <tr valign="top">
1444
   *                                     <td>Flag</td>
1445
   *                                     <td>Description</td>
1446
   *                                     </tr>
1447
   *                                     <tr valign="top">
1448
   *                                     <td>
1449
   *                                     FILE_USE_INCLUDE_PATH
1450
   *                                     </td>
1451
   *                                     <td>
1452
   *                                     Search for filename in the include directory.
1453
   *                                     See include_path for more
1454
   *                                     information.
1455
   *                                     </td>
1456
   *                                     </tr>
1457
   *                                     <tr valign="top">
1458
   *                                     <td>
1459
   *                                     FILE_TEXT
1460
   *                                     </td>
1461
   *                                     <td>
1462
   *                                     As of PHP 6, the default encoding of the read
1463
   *                                     data is UTF-8. You can specify a different encoding by creating a
1464
   *                                     custom context or by changing the default using
1465
   *                                     stream_default_encoding. This flag cannot be
1466
   *                                     used with FILE_BINARY.
1467
   *                                     </td>
1468
   *                                     </tr>
1469
   *                                     <tr valign="top">
1470
   *                                     <td>
1471
   *                                     FILE_BINARY
1472
   *                                     </td>
1473
   *                                     <td>
1474
   *                                     With this flag, the file is read in binary mode. This is the default
1475
   *                                     setting and cannot be used with FILE_TEXT.
1476
   *                                     </td>
1477
   *                                     </tr>
1478 3
   *                                     </table>
1479
   *                                     </p>
1480
   * @param resource|null $context       [optional] <p>
1481 3
   *                                     A valid context resource created with
1482 3
   *                                     stream_context_create. If you don't need to use a
1483
   *                                     custom context, you can skip this parameter by &null;.
1484 3
   *                                     </p>
1485 2
   * @param int|null $offset             [optional] <p>
1486
   *                                     The offset where the reading starts.
1487
   *                                     </p>
1488
   * @param int|null $maxLength          [optional] <p>
1489 2
   *                                     Maximum length of data read. The default is to read until end
1490 2
   *                                     of file is reached.
1491
   *                                     </p>
1492 2
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1493 2
   *
1494
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1495 3
   *                                     or pdf, because they used non default utf-8 chars</p>
1496 3
   *
1497 3
   * @return string <p>The function returns the read data or false on failure.</p>
1498
   */
1499 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1500 3
  {
1501 3
    // init
1502
    $timeout = (int)$timeout;
1503 3
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1504 1
1505 1
    if ($timeout && $context === null) {
1506 3
      $context = stream_context_create(
1507
          array(
1508
              'http' =>
1509
                  array(
1510 3
                      'timeout' => $timeout,
1511 1
                  ),
1512
          )
1513
      );
1514 2
    }
1515 2
1516 2
    if (!$flags) {
1517 2
      $flags = false;
1518
    }
1519 2
1520
    if ($offset === null) {
1521
      $offset = 0;
1522
    }
1523
1524
    if (is_int($maxLength) === true) {
1525
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1526
    } else {
1527
      $data = file_get_contents($filename, $flags, $context, $offset);
1528
    }
1529 1
1530
    // return false on error
1531 1
    if ($data === false) {
1532
      return false;
1533
    }
1534
1535
    if ($convertToUtf8 === true) {
1536
      $data = self::encode('UTF-8', $data, false);
1537
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1538
    }
1539
1540
    return $data;
1541
  }
1542
1543 9
  /**
1544
   * Checks if a file starts with BOM (Byte Order Mark) character.
1545 9
   *
1546 9
   * @param string $file_path <p>Path to a valid file.</p>
1547 3
   *
1548
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1549 3
   */
1550 3
  public static function file_has_bom($file_path)
1551 3
  {
1552 9
    return self::string_has_bom(file_get_contents($file_path));
1553 2
  }
1554 2
1555 2
  /**
1556 2
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1557 9
   *
1558
   * @param mixed  $var
1559 8
   * @param int    $normalization_form
1560
   * @param string $leading_combining
1561 2
   *
1562 2
   * @return mixed
1563
   */
1564 8
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1565
  {
1566 8
    switch (gettype($var)) {
1567 6 View Code Duplication
      case 'array':
1568 6
        foreach ($var as $k => $v) {
1569
          /** @noinspection AlterInForeachInspection */
1570 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1571
        }
1572 6
        break;
1573 3 View Code Duplication
      case 'object':
1574 3
        foreach ($var as $k => $v) {
1575 5
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1576
        }
1577
        break;
1578
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1579
1580 8
        if (false !== strpos($var, "\r")) {
1581 8
          // Workaround https://bugs.php.net/65732
1582 6
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1583 8
        }
1584 5
1585 8
        if (self::is_ascii($var) === false) {
1586
          /** @noinspection PhpUndefinedClassInspection */
1587
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1588 2
            $n = '-';
1589 2
          } else {
1590 8
            /** @noinspection PhpUndefinedClassInspection */
1591
            $n = \Normalizer::normalize($var, $normalization_form);
1592 8
1593 9
            if (isset($n[0])) {
1594
              $var = $n;
1595 9
            } else {
1596
              $var = self::encode('UTF-8', $var, true);
1597
            }
1598
          }
1599
1600
          if (
1601
              $var[0] >= "\x80"
1602
              &&
1603
              isset($n[0], $leading_combining[0])
1604
              &&
1605
              preg_match('/^\p{Mn}/u', $var)
1606
          ) {
1607
            // Prevent leading combining chars
1608
            // for NFC-safe concatenations.
1609
            $var = $leading_combining . $var;
1610
          }
1611
        }
1612
1613
        break;
1614
    }
1615
1616
    return $var;
1617
  }
1618
1619
  /**
1620
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1621
   *
1622
   * Gets a specific external variable by name and optionally filters it
1623
   *
1624
   * @link  http://php.net/manual/en/function.filter-input.php
1625
   *
1626
   * @param int    $type          <p>
1627
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1628
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1629
   *                              <b>INPUT_ENV</b>.
1630
   *                              </p>
1631
   * @param string $variable_name <p>
1632
   *                              Name of a variable to get.
1633
   *                              </p>
1634
   * @param int    $filter        [optional] <p>
1635
   *                              The ID of the filter to apply. The
1636
   *                              manual page lists the available filters.
1637
   *                              </p>
1638
   * @param mixed  $options       [optional] <p>
1639
   *                              Associative array of options or bitwise disjunction of flags. If filter
1640
   *                              accepts options, flags can be provided in "flags" field of array.
1641
   *                              </p>
1642
   *
1643
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1644
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1645
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1646
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1647
   * @since 5.2.0
1648
   */
1649 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1650
  {
1651
    if (4 > func_num_args()) {
1652
      $var = filter_input($type, $variable_name, $filter);
1653
    } else {
1654
      $var = filter_input($type, $variable_name, $filter, $options);
1655
    }
1656
1657
    return self::filter($var);
1658
  }
1659
1660
  /**
1661
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1662
   *
1663
   * Gets external variables and optionally filters them
1664
   *
1665
   * @link  http://php.net/manual/en/function.filter-input-array.php
1666
   *
1667
   * @param int   $type       <p>
1668
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1669
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1670
   *                          <b>INPUT_ENV</b>.
1671
   *                          </p>
1672
   * @param mixed $definition [optional] <p>
1673
   *                          An array defining the arguments. A valid key is a string
1674
   *                          containing a variable name and a valid value is either a filter type, or an array
1675
   *                          optionally specifying the filter, flags and options. If the value is an
1676
   *                          array, valid keys are filter which specifies the
1677
   *                          filter type,
1678
   *                          flags which specifies any flags that apply to the
1679
   *                          filter, and options which specifies any options that
1680
   *                          apply to the filter. See the example below for a better understanding.
1681
   *                          </p>
1682
   *                          <p>
1683
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1684
   *                          input array are filtered by this filter.
1685
   *                          </p>
1686
   * @param bool  $add_empty  [optional] <p>
1687
   *                          Add missing keys as <b>NULL</b> to the return value.
1688
   *                          </p>
1689
   *
1690
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1691
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1692
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1693
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1694
   * fails.
1695
   * @since 5.2.0
1696
   */
1697 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1698
  {
1699
    if (2 > func_num_args()) {
1700
      $a = filter_input_array($type);
1701
    } else {
1702
      $a = filter_input_array($type, $definition, $add_empty);
1703
    }
1704
1705
    return self::filter($a);
1706
  }
1707
1708
  /**
1709
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
   *
1711
   * Filters a variable with a specified filter
1712
   *
1713
   * @link  http://php.net/manual/en/function.filter-var.php
1714
   *
1715
   * @param mixed $variable <p>
1716
   *                        Value to filter.
1717
   *                        </p>
1718
   * @param int   $filter   [optional] <p>
1719
   *                        The ID of the filter to apply. The
1720
   *                        manual page lists the available filters.
1721
   *                        </p>
1722
   * @param mixed $options  [optional] <p>
1723
   *                        Associative array of options or bitwise disjunction of flags. If filter
1724
   *                        accepts options, flags can be provided in "flags" field of array. For
1725
   *                        the "callback" filter, callable type should be passed. The
1726
   *                        callback must accept one argument, the value to be filtered, and return
1727
   *                        the value after filtering/sanitizing it.
1728
   *                        </p>
1729
   *                        <p>
1730
   *                        <code>
1731
   *                        // for filters that accept options, use this format
1732
   *                        $options = array(
1733
   *                        'options' => array(
1734
   *                        'default' => 3, // value to return if the filter fails
1735
   *                        // other options here
1736
   *                        'min_range' => 0
1737
   *                        ),
1738
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1739
   *                        );
1740
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1741
   *                        // for filter that only accept flags, you can pass them directly
1742
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1743
   *                        // for filter that only accept flags, you can also pass as an array
1744
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1745
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1746 1
   *                        // callback validate filter
1747
   *                        function foo($value)
1748 1
   *                        {
1749 1
   *                        // Expected format: Surname, GivenNames
1750 1
   *                        if (strpos($value, ", ") === false) return false;
1751 1
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1752
   *                        $empty = (empty($surname) || empty($givennames));
1753
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1754 1
   *                        if ($empty || $notstrings) {
1755
   *                        return false;
1756
   *                        } else {
1757
   *                        return $value;
1758
   *                        }
1759
   *                        }
1760
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1761
   *                        </code>
1762
   *                        </p>
1763
   *
1764
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1765
   * @since 5.2.0
1766
   */
1767 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1768
  {
1769
    if (3 > func_num_args()) {
1770
      $variable = filter_var($variable, $filter);
1771
    } else {
1772
      $variable = filter_var($variable, $filter, $options);
1773
    }
1774
1775
    return self::filter($variable);
1776
  }
1777
1778
  /**
1779
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1780
   *
1781
   * Gets multiple variables and optionally filters them
1782
   *
1783
   * @link  http://php.net/manual/en/function.filter-var-array.php
1784
   *
1785
   * @param array $data       <p>
1786
   *                          An array with string keys containing the data to filter.
1787
   *                          </p>
1788
   * @param mixed $definition [optional] <p>
1789
   *                          An array defining the arguments. A valid key is a string
1790
   *                          containing a variable name and a valid value is either a
1791 1
   *                          filter type, or an
1792
   *                          array optionally specifying the filter, flags and options.
1793 1
   *                          If the value is an array, valid keys are filter
1794 1
   *                          which specifies the filter type,
1795 1
   *                          flags which specifies any flags that apply to the
1796 1
   *                          filter, and options which specifies any options that
1797
   *                          apply to the filter. See the example below for a better understanding.
1798
   *                          </p>
1799 1
   *                          <p>
1800
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1801
   *                          input array are filtered by this filter.
1802
   *                          </p>
1803
   * @param bool  $add_empty  [optional] <p>
1804
   *                          Add missing keys as <b>NULL</b> to the return value.
1805
   *                          </p>
1806
   *
1807
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1808
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1809
   * the variable is not set.
1810 1
   * @since 5.2.0
1811
   */
1812 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1813
  {
1814
    if (2 > func_num_args()) {
1815
      $a = filter_var_array($data);
1816
    } else {
1817
      $a = filter_var_array($data, $definition, $add_empty);
1818
    }
1819
1820
    return self::filter($a);
1821
  }
1822
1823
  /**
1824
   * Check if the number of unicode characters are not more than the specified integer.
1825
   *
1826
   * @param string $str      The original string to be checked.
1827
   * @param int    $box_size The size in number of chars to be checked against string.
1828 26
   *
1829
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1830
   */
1831 26
  public static function fits_inside($str, $box_size)
1832
  {
1833 26
    return (self::strlen($str) <= $box_size);
1834 2
  }
1835
1836
  /**
1837 26
   * Try to fix simple broken UTF-8 strings.
1838 26
   *
1839
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1840 26
   *
1841 1
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1842 1
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1843 1
   * See: http://en.wikipedia.org/wiki/Windows-1252
1844
   *
1845 26
   * @param string $str <p>The input string</p>
1846
   *
1847
   * @return string
1848
   */
1849 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1850
  {
1851
    // init
1852
    $str = (string)$str;
1853
1854
    if (!isset($str[0])) {
1855
      return '';
1856 1
    }
1857
1858 1
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1859
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1860
1861 1
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1862
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1863
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1864 1
    }
1865 1
1866
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1867 1
  }
1868
1869
  /**
1870 1
   * Fix a double (or multiple) encoded UTF8 string.
1871 1
   *
1872 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1873 1
   *
1874 1
   * @return string|string[] <p>Will return the fixed input-"array" or
1875 1
   *                         the fixed input-"string".</p>
1876 1
   */
1877
  public static function fix_utf8($str)
1878 1
  {
1879
    if (is_array($str) === true) {
1880
1881
      /** @noinspection ForeachSourceInspection */
1882
      foreach ($str as $k => $v) {
1883
        /** @noinspection AlterInForeachInspection */
1884
        /** @noinspection OffsetOperationsInspection */
1885
        $str[$k] = self::fix_utf8($v);
1886
      }
1887
1888 1
      return $str;
1889
    }
1890 1
1891
    $last = '';
1892
    while ($last !== $str) {
1893
      $last = $str;
1894 1
      $str = self::to_utf8(
1895
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1894 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1896
      );
1897
    }
1898
1899
    return $str;
1900
  }
1901
1902
  /**
1903
   * Get character of a specific character.
1904
   *
1905
   * @param string $char
1906
   *
1907
   * @return string <p>'RTL' or 'LTR'</p>
1908
   */
1909
  public static function getCharDirection($char)
1910
  {
1911
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1912 1
      self::checkForSupport();
1913
    }
1914 1
1915 1
    if (self::$SUPPORT['intlChar'] === true) {
1916
      $tmpReturn = \IntlChar::charDirection($char);
1917
1918 1
      // from "IntlChar"-Class
1919
      $charDirection = array(
1920 1
          'RTL' => array(1, 13, 14, 15, 21),
1921 1
          'LTR' => array(0, 11, 12, 20),
1922 1
      );
1923 1
1924 1
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1925 1
        return 'LTR';
1926 1
      }
1927 1
1928 1
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1929 1
        return 'RTL';
1930 1
      }
1931 1
    }
1932 1
1933 1
    $c = static::chr_to_decimal($char);
1934 1
1935 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1936 1
      return 'LTR';
1937 1
    }
1938 1
1939 1
    if (0x85e >= $c) {
1940 1
1941 1
      if (0x5be === $c ||
1942 1
          0x5c0 === $c ||
1943 1
          0x5c3 === $c ||
1944 1
          0x5c6 === $c ||
1945 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1946 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1947 1
          0x608 === $c ||
1948 1
          0x60b === $c ||
1949
          0x60d === $c ||
1950 1
          0x61b === $c ||
1951 1
          (0x61e <= $c && 0x64a >= $c) ||
1952
          (0x66d <= $c && 0x66f >= $c) ||
1953
          (0x671 <= $c && 0x6d5 >= $c) ||
1954 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1955
          (0x6ee <= $c && 0x6ef >= $c) ||
1956
          (0x6fa <= $c && 0x70d >= $c) ||
1957
          0x710 === $c ||
1958 1
          (0x712 <= $c && 0x72f >= $c) ||
1959
          (0x74d <= $c && 0x7a5 >= $c) ||
1960 1
          0x7b1 === $c ||
1961 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1962 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1963 1
          0x7fa === $c ||
1964 1
          (0x800 <= $c && 0x815 >= $c) ||
1965 1
          0x81a === $c ||
1966 1
          0x824 === $c ||
1967 1
          0x828 === $c ||
1968 1
          (0x830 <= $c && 0x83e >= $c) ||
1969 1
          (0x840 <= $c && 0x858 >= $c) ||
1970 1
          0x85e === $c
1971 1
      ) {
1972 1
        return 'RTL';
1973 1
      }
1974 1
1975 1
    } elseif (0x200f === $c) {
1976 1
1977 1
      return 'RTL';
1978 1
1979 1
    } elseif (0xfb1d <= $c) {
1980 1
1981 1
      if (0xfb1d === $c ||
1982 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1983 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1984 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1985 1
          0xfb3e === $c ||
1986 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1987 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1988 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1989 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1990 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1991 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1992 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1993 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1994
          (0xfe76 <= $c && 0xfefc >= $c) ||
1995 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1996 1
          0x10808 === $c ||
1997
          (0x1080a <= $c && 0x10835 >= $c) ||
1998
          (0x10837 <= $c && 0x10838 >= $c) ||
1999
          0x1083c === $c ||
2000 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2001
          (0x10857 <= $c && 0x1085f >= $c) ||
2002
          (0x10900 <= $c && 0x1091b >= $c) ||
2003
          (0x10920 <= $c && 0x10939 >= $c) ||
2004
          0x1093f === $c ||
2005
          0x10a00 === $c ||
2006
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2007
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2008
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2009
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2010 4
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2011
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2012 4
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2013 4
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2014
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2015 4
          (0x10b78 <= $c && 0x10b7f >= $c)
2016
      ) {
2017
        return 'RTL';
2018 1
      }
2019
    }
2020
2021
    return 'LTR';
2022
  }
2023
2024
  /**
2025
   * get data from "/data/*.ser"
2026
   *
2027
   * @param string $file
2028
   *
2029
   * @return bool|string|array|int <p>Will return false on error.</p>
2030 7
   */
2031
  private static function getData($file)
2032 7
  {
2033
    $file = __DIR__ . '/data/' . $file . '.php';
2034
    if (file_exists($file)) {
2035
      /** @noinspection PhpIncludeInspection */
2036 7
      return require $file;
2037 2
    }
2038
2039
    return false;
2040 5
  }
2041
2042
  /**
2043
   * Check for php-support.
2044 5
   *
2045
   * @param string|null $key
2046
   *
2047
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2048
   *               return bool-value, if $key is used and available<br>
2049
   *               otherwise return null</p>
2050
   */
2051
  public static function getSupportInfo($key = null)
2052
  {
2053
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2054
      self::checkForSupport();
2055
    }
2056
2057
    if ($key === null) {
2058
      return self::$SUPPORT;
2059
    }
2060
2061
    if (!isset(self::$SUPPORT[$key])) {
2062
      return null;
2063
    }
2064
2065
    return self::$SUPPORT[$key];
2066
  }
2067
2068
  /**
2069
   * alias for "UTF8::string_has_bom()"
2070 2
   *
2071
   * @see UTF8::string_has_bom()
2072 2
   *
2073
   * @param string $str
2074
   *
2075
   * @return bool
2076
   *
2077
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2078
   */
2079
  public static function hasBom($str)
2080
  {
2081
    return self::string_has_bom($str);
2082
  }
2083
2084 1
  /**
2085
   * Converts a hexadecimal-value into an UTF-8 character.
2086 1
   *
2087
   * @param string $hexdec <p>The hexadecimal value.</p>
2088 1
   *
2089 1
   * @return string|false <p>One single UTF-8 character.</p>
2090
   */
2091
  public static function hex_to_chr($hexdec)
2092 1
  {
2093 1
    return self::decimal_to_chr(hexdec($hexdec));
2094
  }
2095
2096 1
  /**
2097
   * Converts hexadecimal U+xxxx code point representation to integer.
2098
   *
2099
   * INFO: opposite to UTF8::int_to_hex()
2100
   *
2101
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2102
   *
2103
   * @return int|false <p>The code point, or false on failure.</p>
2104
   */
2105
  public static function hex_to_int($hexDec)
2106
  {
2107
    $hexDec = (string)$hexDec;
2108
2109
    if (!isset($hexDec[0])) {
2110 1
      return false;
2111
    }
2112 1
2113
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2114
      return intval($match[1], 16);
2115
    }
2116
2117
    return false;
2118
  }
2119
2120
  /**
2121
   * alias for "UTF8::html_entity_decode()"
2122
   *
2123
   * @see UTF8::html_entity_decode()
2124
   *
2125
   * @param string $str
2126 2
   * @param int    $flags
2127
   * @param string $encoding
2128
   *
2129 2
   * @return string
2130
   */
2131 2
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2132 1
  {
2133
    return self::html_entity_decode($str, $flags, $encoding);
2134
  }
2135 2
2136 1
  /**
2137 1
   * Converts a UTF-8 string to a series of HTML numbered entities.
2138
   *
2139
   * INFO: opposite to UTF8::html_decode()
2140 2
   *
2141
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2142 2
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2143 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2144 1
   *
2145 1
   * @return string <p>HTML numbered entities.</p>
2146
   */
2147 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2148 2
  {
2149 2
    // init
2150
    $str = (string)$str;
2151 2
2152
    if (!isset($str[0])) {
2153
      return '';
2154
    }
2155
2156
    if ($encoding !== 'UTF-8') {
2157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2158
    }
2159
2160
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2161
    if (function_exists('mb_encode_numericentity')) {
2162
2163
      $startCode = 0x00;
2164
      if ($keepAsciiChars === true) {
2165
        $startCode = 0x80;
2166
      }
2167
2168
      return mb_encode_numericentity(
2169
          $str,
2170
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2171
          $encoding
2172
      );
2173
    }
2174
2175
    return implode(
2176
        '',
2177
        array_map(
2178
            function ($data) use ($keepAsciiChars, $encoding) {
2179
              return self::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2180
            },
2181
            self::split($str)
2182
        )
2183
    );
2184
  }
2185
2186
  /**
2187
   * UTF-8 version of html_entity_decode()
2188
   *
2189
   * The reason we are not using html_entity_decode() by itself is because
2190
   * while it is not technically correct to leave out the semicolon
2191
   * at the end of an entity most browsers will still interpret the entity
2192
   * correctly. html_entity_decode() does not convert entities without
2193
   * semicolons, so we are left with our own little solution here. Bummer.
2194
   *
2195
   * Convert all HTML entities to their applicable characters
2196
   *
2197
   * INFO: opposite to UTF8::html_encode()
2198
   *
2199
   * @link http://php.net/manual/en/function.html-entity-decode.php
2200
   *
2201
   * @param string $str      <p>
2202
   *                         The input string.
2203
   *                         </p>
2204
   * @param int    $flags    [optional] <p>
2205
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2206
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2207
   *                         <table>
2208
   *                         Available <i>flags</i> constants
2209
   *                         <tr valign="top">
2210
   *                         <td>Constant Name</td>
2211
   *                         <td>Description</td>
2212
   *                         </tr>
2213
   *                         <tr valign="top">
2214
   *                         <td><b>ENT_COMPAT</b></td>
2215
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2216
   *                         </tr>
2217
   *                         <tr valign="top">
2218
   *                         <td><b>ENT_QUOTES</b></td>
2219
   *                         <td>Will convert both double and single quotes.</td>
2220
   *                         </tr>
2221
   *                         <tr valign="top">
2222
   *                         <td><b>ENT_NOQUOTES</b></td>
2223
   *                         <td>Will leave both double and single quotes unconverted.</td>
2224
   *                         </tr>
2225
   *                         <tr valign="top">
2226
   *                         <td><b>ENT_HTML401</b></td>
2227
   *                         <td>
2228
   *                         Handle code as HTML 4.01.
2229
   *                         </td>
2230
   *                         </tr>
2231
   *                         <tr valign="top">
2232
   *                         <td><b>ENT_XML1</b></td>
2233
   *                         <td>
2234 16
   *                         Handle code as XML 1.
2235
   *                         </td>
2236
   *                         </tr>
2237 16
   *                         <tr valign="top">
2238
   *                         <td><b>ENT_XHTML</b></td>
2239 16
   *                         <td>
2240 5
   *                         Handle code as XHTML.
2241
   *                         </td>
2242
   *                         </tr>
2243 16
   *                         <tr valign="top">
2244 9
   *                         <td><b>ENT_HTML5</b></td>
2245
   *                         <td>
2246
   *                         Handle code as HTML 5.
2247
   *                         </td>
2248 15
   *                         </tr>
2249 15
   *                         </table>
2250
   *                         </p>
2251 15
   * @param string $encoding [optional] <p>Encoding to use.</p>
2252 15
   *
2253 9
   * @return string <p>The decoded string.</p>
2254 9
   */
2255 15
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2256 8
  {
2257
    // init
2258
    $str = (string)$str;
2259 15
2260 2
    if (!isset($str[0])) {
2261 2
      return '';
2262
    }
2263 15
2264 5
    if (!isset($str[3])) { // examples: &; || &x;
2265
      return $str;
2266
    }
2267 5
2268
    if (
2269 5
        strpos($str, '&') === false
2270
        ||
2271
        (
2272 15
            strpos($str, '&#') === false
2273
            &&
2274 15
            strpos($str, ';') === false
2275 15
        )
2276
    ) {
2277 13
      return $str;
2278
    }
2279 13
2280 13
    if ($encoding !== 'UTF-8') {
2281
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2282
    }
2283 6
2284 15
    if ($flags === null) {
2285
      if (Bootup::is_php('5.4') === true) {
2286 15
        $flags = ENT_QUOTES | ENT_HTML5;
2287
      } else {
2288
        $flags = ENT_QUOTES;
2289 15
      }
2290 15
    }
2291 15
2292 View Code Duplication
    if (
2293 15
        $encoding !== 'UTF-8'
2294
        &&
2295 15
        $encoding !== 'WINDOWS-1252'
2296
        &&
2297 15
        self::$SUPPORT['mbstring'] === false
2298
    ) {
2299
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2300
    }
2301
2302
    do {
2303
      $str_compare = $str;
2304
2305
      $str = preg_replace_callback(
2306
          "/&#\d{2,6};/",
2307
          function ($matches) use ($encoding) {
2308
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2309
2310
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2311
              return $returnTmp;
2312
            }
2313
2314
            return $matches[0];
2315
          },
2316
          $str
2317
      );
2318
2319
      // decode numeric & UTF16 two byte entities
2320
      $str = html_entity_decode(
2321
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2322
          $flags,
2323
          $encoding
2324
      );
2325
2326
    } while ($str_compare !== $str);
2327
2328
    return $str;
2329
  }
2330
2331
  /**
2332
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2333
   *
2334
   * @link http://php.net/manual/en/function.htmlentities.php
2335
   *
2336
   * @param string $str           <p>
2337
   *                              The input string.
2338
   *                              </p>
2339
   * @param int    $flags         [optional] <p>
2340
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2341
   *                              invalid code unit sequences and the used document type. The default is
2342
   *                              ENT_COMPAT | ENT_HTML401.
2343
   *                              <table>
2344
   *                              Available <i>flags</i> constants
2345
   *                              <tr valign="top">
2346
   *                              <td>Constant Name</td>
2347
   *                              <td>Description</td>
2348
   *                              </tr>
2349
   *                              <tr valign="top">
2350
   *                              <td><b>ENT_COMPAT</b></td>
2351
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_QUOTES</b></td>
2355
   *                              <td>Will convert both double and single quotes.</td>
2356
   *                              </tr>
2357
   *                              <tr valign="top">
2358
   *                              <td><b>ENT_NOQUOTES</b></td>
2359
   *                              <td>Will leave both double and single quotes unconverted.</td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_IGNORE</b></td>
2363
   *                              <td>
2364
   *                              Silently discard invalid code unit sequences instead of returning
2365
   *                              an empty string. Using this flag is discouraged as it
2366
   *                              may have security implications.
2367
   *                              </td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2371
   *                              <td>
2372
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2373
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2374
   *                              </td>
2375
   *                              </tr>
2376
   *                              <tr valign="top">
2377
   *                              <td><b>ENT_DISALLOWED</b></td>
2378
   *                              <td>
2379
   *                              Replace invalid code points for the given document type with a
2380
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2381
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2382
   *                              instance, to ensure the well-formedness of XML documents with
2383
   *                              embedded external content.
2384
   *                              </td>
2385
   *                              </tr>
2386
   *                              <tr valign="top">
2387
   *                              <td><b>ENT_HTML401</b></td>
2388
   *                              <td>
2389
   *                              Handle code as HTML 4.01.
2390
   *                              </td>
2391
   *                              </tr>
2392
   *                              <tr valign="top">
2393
   *                              <td><b>ENT_XML1</b></td>
2394
   *                              <td>
2395
   *                              Handle code as XML 1.
2396
   *                              </td>
2397
   *                              </tr>
2398
   *                              <tr valign="top">
2399
   *                              <td><b>ENT_XHTML</b></td>
2400
   *                              <td>
2401
   *                              Handle code as XHTML.
2402
   *                              </td>
2403 2
   *                              </tr>
2404
   *                              <tr valign="top">
2405 2
   *                              <td><b>ENT_HTML5</b></td>
2406 1
   *                              <td>
2407 1
   *                              Handle code as HTML 5.
2408
   *                              </td>
2409 2
   *                              </tr>
2410
   *                              </table>
2411
   *                              </p>
2412
   * @param string $encoding      [optional] <p>
2413
   *                              Like <b>htmlspecialchars</b>,
2414
   *                              <b>htmlentities</b> takes an optional third argument
2415
   *                              <i>encoding</i> which defines encoding used in
2416
   *                              conversion.
2417
   *                              Although this argument is technically optional, you are highly
2418
   *                              encouraged to specify the correct value for your code.
2419 2
   *                              </p>
2420
   * @param bool   $double_encode [optional] <p>
2421 2
   *                              When <i>double_encode</i> is turned off PHP will not
2422 1
   *                              encode existing html entities. The default is to convert everything.
2423
   *                              </p>
2424
   *
2425 2
   *
2426 2
   * @return string the encoded string.
2427 2
   * </p>
2428 2
   * <p>
2429 2
   * If the input <i>string</i> contains an invalid code unit
2430 1
   * sequence within the given <i>encoding</i> an empty string
2431
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2432 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2433 1
   */
2434 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2435 1
  {
2436 1
    if ($encoding !== 'UTF-8') {
2437 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2438
    }
2439 2
2440
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2441
2442
    /**
2443
     * PHP doesn't replace a backslash to its html entity since this is something
2444
     * that's mostly used to escape characters when inserting in a database. Since
2445
     * we're using a decent database layer, we don't need this shit and we're replacing
2446
     * the double backslashes by its' html entity equivalent.
2447
     *
2448
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2449
     */
2450
    $str = str_replace('\\', '&#92;', $str);
2451
2452
    if ($encoding !== 'UTF-8') {
2453
      return $str;
2454
    }
2455
2456
    $byteLengths = self::chr_size_list($str);
2457
    $search = array();
2458
    $replacements = array();
2459
    foreach ($byteLengths as $counter => $byteLength) {
2460
      if ($byteLength >= 3) {
2461
        $char = self::access($str, $counter);
2462
2463
        if (!isset($replacements[$char])) {
2464
          $search[$char] = $char;
2465
          $replacements[$char] = self::html_encode($char);
2466
        }
2467
      }
2468
    }
2469
2470
    return str_replace($search, $replacements, $str);
2471
  }
2472
2473
  /**
2474
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2475
   *
2476
   * INFO: Take a look at "UTF8::htmlentities()"
2477
   *
2478
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2479
   *
2480
   * @param string $str           <p>
2481
   *                              The string being converted.
2482
   *                              </p>
2483
   * @param int    $flags         [optional] <p>
2484
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2485
   *                              invalid code unit sequences and the used document type. The default is
2486
   *                              ENT_COMPAT | ENT_HTML401.
2487
   *                              <table>
2488
   *                              Available <i>flags</i> constants
2489
   *                              <tr valign="top">
2490
   *                              <td>Constant Name</td>
2491
   *                              <td>Description</td>
2492
   *                              </tr>
2493
   *                              <tr valign="top">
2494
   *                              <td><b>ENT_COMPAT</b></td>
2495
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2496
   *                              </tr>
2497
   *                              <tr valign="top">
2498
   *                              <td><b>ENT_QUOTES</b></td>
2499
   *                              <td>Will convert both double and single quotes.</td>
2500
   *                              </tr>
2501
   *                              <tr valign="top">
2502
   *                              <td><b>ENT_NOQUOTES</b></td>
2503
   *                              <td>Will leave both double and single quotes unconverted.</td>
2504
   *                              </tr>
2505
   *                              <tr valign="top">
2506
   *                              <td><b>ENT_IGNORE</b></td>
2507
   *                              <td>
2508
   *                              Silently discard invalid code unit sequences instead of returning
2509
   *                              an empty string. Using this flag is discouraged as it
2510
   *                              may have security implications.
2511
   *                              </td>
2512
   *                              </tr>
2513
   *                              <tr valign="top">
2514
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2515
   *                              <td>
2516
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2517
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2518
   *                              </td>
2519
   *                              </tr>
2520
   *                              <tr valign="top">
2521
   *                              <td><b>ENT_DISALLOWED</b></td>
2522
   *                              <td>
2523
   *                              Replace invalid code points for the given document type with a
2524
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2525
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2526
   *                              instance, to ensure the well-formedness of XML documents with
2527
   *                              embedded external content.
2528
   *                              </td>
2529
   *                              </tr>
2530
   *                              <tr valign="top">
2531
   *                              <td><b>ENT_HTML401</b></td>
2532
   *                              <td>
2533
   *                              Handle code as HTML 4.01.
2534
   *                              </td>
2535
   *                              </tr>
2536
   *                              <tr valign="top">
2537
   *                              <td><b>ENT_XML1</b></td>
2538
   *                              <td>
2539
   *                              Handle code as XML 1.
2540
   *                              </td>
2541
   *                              </tr>
2542
   *                              <tr valign="top">
2543
   *                              <td><b>ENT_XHTML</b></td>
2544
   *                              <td>
2545
   *                              Handle code as XHTML.
2546
   *                              </td>
2547
   *                              </tr>
2548
   *                              <tr valign="top">
2549
   *                              <td><b>ENT_HTML5</b></td>
2550
   *                              <td>
2551 1
   *                              Handle code as HTML 5.
2552
   *                              </td>
2553 1
   *                              </tr>
2554 1
   *                              </table>
2555 1
   *                              </p>
2556
   * @param string $encoding      [optional] <p>
2557 1
   *                              Defines encoding used in conversion.
2558
   *                              </p>
2559
   *                              <p>
2560
   *                              For the purposes of this function, the encodings
2561
   *                              ISO-8859-1, ISO-8859-15,
2562
   *                              UTF-8, cp866,
2563
   *                              cp1251, cp1252, and
2564
   *                              KOI8-R are effectively equivalent, provided the
2565 1
   *                              <i>string</i> itself is valid for the encoding, as
2566
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2567 1
   *                              the same positions in all of these encodings.
2568
   *                              </p>
2569
   * @param bool   $double_encode [optional] <p>
2570
   *                              When <i>double_encode</i> is turned off PHP will not
2571 1
   *                              encode existing html entities, the default is to convert everything.
2572
   *                              </p>
2573 1
   *
2574 1
   * @return string The converted string.
2575 1
   * </p>
2576 1
   * <p>
2577
   * If the input <i>string</i> contains an invalid code unit
2578 1
   * sequence within the given <i>encoding</i> an empty string
2579
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2580
   * <b>ENT_SUBSTITUTE</b> flags are set.
2581
   */
2582
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2583
  {
2584
    if ($encoding !== 'UTF-8') {
2585
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2586
    }
2587
2588
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2589
  }
2590 2
2591
  /**
2592 2
   * Checks whether iconv is available on the server.
2593
   *
2594
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2595
   */
2596
  public static function iconv_loaded()
2597
  {
2598
    $return = extension_loaded('iconv') ? true : false;
2599
2600
    // INFO: "default_charset" is already set by the "Bootup"-class
2601
2602
    if (Bootup::is_php('5.6') === false) {
2603
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2604
      iconv_set_encoding('input_encoding', 'UTF-8');
2605 3
      iconv_set_encoding('output_encoding', 'UTF-8');
2606
      iconv_set_encoding('internal_encoding', 'UTF-8');
2607 3
    }
2608 3
2609
    return $return;
2610 3
  }
2611
2612 3
  /**
2613
   * alias for "UTF8::decimal_to_chr()"
2614
   *
2615 1
   * @see UTF8::decimal_to_chr()
2616
   *
2617
   * @param mixed $int
2618
   *
2619
   * @return string
2620
   */
2621
  public static function int_to_chr($int)
2622
  {
2623 1
    return self::decimal_to_chr($int);
2624
  }
2625
2626 1
  /**
2627 1
   * Converts Integer to hexadecimal U+xxxx code point representation.
2628
   *
2629 1
   * INFO: opposite to UTF8::hex_to_int()
2630
   *
2631
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2632
   * @param string $pfix [optional]
2633
   *
2634
   * @return string <p>The code point, or empty string on failure.</p>
2635
   */
2636
  public static function int_to_hex($int, $pfix = 'U+')
2637 4
  {
2638
    if ((int)$int === $int) {
2639 4
      $hex = dechex($int);
2640
2641
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2642
2643
      return $pfix . $hex;
2644
    }
2645
2646
    return '';
2647
  }
2648
2649
  /**
2650
   * Checks whether intl-char is available on the server.
2651
   *
2652
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2653
   */
2654
  public static function intlChar_loaded()
2655
  {
2656
    return (
2657
        Bootup::is_php('7.0') === true
2658
        &&
2659
        class_exists('IntlChar') === true
2660
    );
2661
  }
2662
2663
  /**
2664
   * Checks whether intl is available on the server.
2665
   *
2666
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2667
   */
2668
  public static function intl_loaded()
2669
  {
2670
    return extension_loaded('intl') ? true : false;
2671
  }
2672
2673
  /**
2674
   * alias for "UTF8::is_ascii()"
2675
   *
2676
   * @see UTF8::is_ascii()
2677
   *
2678
   * @param string $str
2679
   *
2680
   * @return boolean
2681
   *
2682
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2683
   */
2684
  public static function isAscii($str)
2685
  {
2686
    return self::is_ascii($str);
2687
  }
2688
2689
  /**
2690
   * alias for "UTF8::is_base64()"
2691
   *
2692
   * @see UTF8::is_base64()
2693
   *
2694
   * @param string $str
2695
   *
2696
   * @return bool
2697
   *
2698
   * @deprecated <p>use "UTF8::is_base64()"</p>
2699
   */
2700
  public static function isBase64($str)
2701
  {
2702
    return self::is_base64($str);
2703
  }
2704
2705
  /**
2706
   * alias for "UTF8::is_binary()"
2707
   *
2708
   * @see UTF8::is_binary()
2709
   *
2710
   * @param string $str
2711
   *
2712
   * @return bool
2713
   *
2714
   * @deprecated <p>use "UTF8::is_binary()"</p>
2715
   */
2716
  public static function isBinary($str)
2717
  {
2718
    return self::is_binary($str);
2719
  }
2720
2721
  /**
2722
   * alias for "UTF8::is_bom()"
2723
   *
2724
   * @see UTF8::is_bom()
2725
   *
2726
   * @param string $utf8_chr
2727
   *
2728
   * @return boolean
2729
   *
2730
   * @deprecated <p>use "UTF8::is_bom()"</p>
2731
   */
2732
  public static function isBom($utf8_chr)
2733
  {
2734
    return self::is_bom($utf8_chr);
2735
  }
2736
2737
  /**
2738
   * alias for "UTF8::is_html()"
2739
   *
2740
   * @see UTF8::is_html()
2741
   *
2742
   * @param string $str
2743
   *
2744
   * @return boolean
2745
   *
2746
   * @deprecated <p>use "UTF8::is_html()"</p>
2747
   */
2748
  public static function isHtml($str)
2749
  {
2750
    return self::is_html($str);
2751
  }
2752
2753
  /**
2754
   * alias for "UTF8::is_json()"
2755
   *
2756
   * @see UTF8::is_json()
2757
   *
2758
   * @param string $str
2759
   *
2760
   * @return bool
2761
   *
2762
   * @deprecated <p>use "UTF8::is_json()"</p>
2763
   */
2764
  public static function isJson($str)
2765
  {
2766
    return self::is_json($str);
2767
  }
2768
2769
  /**
2770
   * alias for "UTF8::is_utf16()"
2771
   *
2772
   * @see UTF8::is_utf16()
2773
   *
2774
   * @param string $str
2775
   *
2776
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2777
   *
2778
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2779
   */
2780
  public static function isUtf16($str)
2781
  {
2782
    return self::is_utf16($str);
2783
  }
2784
2785
  /**
2786
   * alias for "UTF8::is_utf32()"
2787
   *
2788
   * @see UTF8::is_utf32()
2789
   *
2790
   * @param string $str
2791
   *
2792
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2793
   *
2794
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2795
   */
2796
  public static function isUtf32($str)
2797 53
  {
2798
    return self::is_utf32($str);
2799 53
  }
2800
2801 53
  /**
2802 6
   * alias for "UTF8::is_utf8()"
2803
   *
2804
   * @see UTF8::is_utf8()
2805 52
   *
2806
   * @param string $str
2807
   * @param bool   $strict
2808
   *
2809
   * @return bool
2810
   *
2811
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2812
   */
2813
  public static function isUtf8($str, $strict = false)
2814
  {
2815 1
    return self::is_utf8($str, $strict);
2816
  }
2817 1
2818
  /**
2819 1
   * Checks if a string is 7 bit ASCII.
2820 1
   *
2821
   * @param string $str <p>The string to check.</p>
2822
   *
2823 1
   * @return bool <p>
2824 1
   *              <strong>true</strong> if it is ASCII<br>
2825 1
   *              <strong>false</strong> otherwise
2826
   *              </p>
2827
   */
2828 1
  public static function is_ascii($str)
2829
  {
2830
    $str = (string)$str;
2831
2832
    if (!isset($str[0])) {
2833
      return true;
2834
    }
2835
2836
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2837
  }
2838 16
2839
  /**
2840 16
   * Returns true if the string is base64 encoded, false otherwise.
2841
   *
2842 16
   * @param string $str <p>The input string.</p>
2843 4
   *
2844
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2845
   */
2846 16
  public static function is_base64($str)
2847 4
  {
2848
    $str = (string)$str;
2849
2850 16
    if (!isset($str[0])) {
2851 16
      return false;
2852 5
    }
2853
2854
    $base64String = (string)base64_decode($str, true);
2855 15
    if ($base64String && base64_encode($base64String) === $str) {
2856 1
      return true;
2857
    }
2858
2859 15
    return false;
2860
  }
2861
2862
  /**
2863
   * Check if the input is binary... (is look like a hack).
2864
   *
2865
   * @param mixed $input
2866
   *
2867
   * @return bool
2868
   */
2869
  public static function is_binary($input)
2870
  {
2871
    $input = (string)$input;
2872
2873
    if (!isset($input[0])) {
2874
      return false;
2875
    }
2876
2877
    if (preg_match('~^[01]+$~', $input)) {
2878
      return true;
2879
    }
2880
2881
    $testLength = strlen($input);
2882
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2883
      return true;
2884
    }
2885
2886
    if (substr_count($input, "\x00") > 0) {
2887
      return true;
2888
    }
2889
2890
    return false;
2891 1
  }
2892
2893 1
  /**
2894 1
   * Check if the file is binary.
2895 1
   *
2896
   * @param string $file
2897 1
   *
2898
   * @return boolean
2899 1
   */
2900
  public static function is_binary_file($file)
2901
  {
2902
    try {
2903
      $fp = fopen($file, 'rb');
2904
      $block = fread($fp, 512);
2905
      fclose($fp);
2906
    } catch (\Exception $e) {
2907
      $block = '';
2908
    }
2909 1
2910
    return self::is_binary($block);
2911 1
  }
2912
2913 1
  /**
2914 1
   * Checks if the given string is equal to any "Byte Order Mark".
2915
   *
2916
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2917
   *
2918 1
   * @param string $str <p>The input string.</p>
2919
   *
2920 1
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2921
   */
2922 1
  public static function is_bom($str)
2923 1
  {
2924
    foreach (self::$BOM as $bomString => $bomByteLength) {
2925
      if ($str === $bomString) {
2926 1
        return true;
2927
      }
2928
    }
2929
2930
    return false;
2931
  }
2932
2933
  /**
2934
   * Check if the string contains any html-tags <lall>.
2935
   *
2936 1
   * @param string $str <p>The input string.</p>
2937
   *
2938 1
   * @return boolean
2939
   */
2940 1
  public static function is_html($str)
2941
  {
2942
    $str = (string)$str;
2943
2944 1
    if (!isset($str[0])) {
2945
      return false;
2946
    }
2947
2948 1
    // init
2949 1
    $matches = array();
2950 1
2951 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2952 1
2953 1
    if (count($matches) === 0) {
2954 1
      return false;
2955 1
    }
2956
2957
    return true;
2958 1
  }
2959
2960
  /**
2961
   * Try to check if "$str" is an json-string.
2962
   *
2963
   * @param string $str <p>The input string.</p>
2964
   *
2965
   * @return bool
2966
   */
2967
  public static function is_json($str)
2968
  {
2969
    $str = (string)$str;
2970
2971
    if (!isset($str[0])) {
2972 5
      return false;
2973
    }
2974 5
2975
    $json = self::json_decode($str);
2976 5
2977
    if (
2978 5
        (
2979 5
            is_object($json) === true
2980 5
            ||
2981 5
            is_array($json) === true
2982 5
        )
2983 5
        &&
2984 5
        json_last_error() === JSON_ERROR_NONE
2985 5
    ) {
2986 4
      return true;
2987 2
    }
2988 2
2989 5
    return false;
2990 5
  }
2991 5
2992
  /**
2993 5
   * Check if the string is UTF-16.
2994 5
   *
2995 5
   * @param string $str <p>The input string.</p>
2996 5
   *
2997 5
   * @return int|false <p>
2998 5
   *                   <strong>false</strong> if is't not UTF-16,<br>
2999 5
   *                   <strong>1</strong> for UTF-16LE,<br>
3000 5
   *                   <strong>2</strong> for UTF-16BE.
3001 4
   *                   </p>
3002 3
   */
3003 3 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3004 5
  {
3005 5
    $str = self::remove_bom($str);
3006 5
3007
    if (self::is_binary($str) === true) {
3008 5
3009 3
      $maybeUTF16LE = 0;
3010 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3011
      if ($test) {
3012
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3013 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3014
        if ($test3 === $test) {
3015
          $strChars = self::count_chars($str, true);
3016 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3017
            if (in_array($test3char, $strChars, true) === true) {
3018 3
              $maybeUTF16LE++;
3019
            }
3020
          }
3021
        }
3022
      }
3023
3024
      $maybeUTF16BE = 0;
3025
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3026
      if ($test) {
3027
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3028
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3029
        if ($test3 === $test) {
3030
          $strChars = self::count_chars($str, true);
3031
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3032 3
            if (in_array($test3char, $strChars, true) === true) {
3033
              $maybeUTF16BE++;
3034 3
            }
3035
          }
3036 3
        }
3037
      }
3038 3
3039 3
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3040 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3041 2
          return 1;
3042 2
        }
3043 2
3044 2
        return 2;
3045 2
      }
3046 2
3047 1
    }
3048 1
3049 2
    return false;
3050 2
  }
3051 2
3052
  /**
3053 3
   * Check if the string is UTF-32.
3054 3
   *
3055 3
   * @param string $str
3056 2
   *
3057 2
   * @return int|false <p>
3058 2
   *                   <strong>false</strong> if is't not UTF-32,<br>
3059 2
   *                   <strong>1</strong> for UTF-32LE,<br>
3060 2
   *                   <strong>2</strong> for UTF-32BE.
3061 2
   *                   </p>
3062 1
   */
3063 1 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3064 2
  {
3065 2
    $str = self::remove_bom($str);
3066 2
3067
    if (self::is_binary($str) === true) {
3068 3
3069 1
      $maybeUTF32LE = 0;
3070 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3071
      if ($test) {
3072
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3073 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3074
        if ($test3 === $test) {
3075
          $strChars = self::count_chars($str, true);
3076 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3077
            if (in_array($test3char, $strChars, true) === true) {
3078 3
              $maybeUTF32LE++;
3079
            }
3080
          }
3081
        }
3082
      }
3083
3084
      $maybeUTF32BE = 0;
3085
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3086
      if ($test) {
3087
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3088
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3089
        if ($test3 === $test) {
3090
          $strChars = self::count_chars($str, true);
3091 60
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3092
            if (in_array($test3char, $strChars, true) === true) {
3093 60
              $maybeUTF32BE++;
3094
            }
3095 60
          }
3096 3
        }
3097
      }
3098
3099 58
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3100 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3101 1
          return 1;
3102
        }
3103
3104
        return 2;
3105
      }
3106
3107
    }
3108
3109 58
    return false;
3110
  }
3111
3112
  /**
3113
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3114
   *
3115
   * @see    http://hsivonen.iki.fi/php-utf8/
3116
   *
3117
   * @param string $str    <p>The string to be checked.</p>
3118 58
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3119
   *
3120 58
   * @return bool
3121 58
   */
3122
  public static function is_utf8($str, $strict = false)
3123 58
  {
3124
    $str = (string)$str;
3125
3126
    if (!isset($str[0])) {
3127 58
      return true;
3128
    }
3129
3130 58
    if ($strict === true) {
3131
      if (self::is_utf16($str) !== false) {
3132
        return false;
3133
      }
3134 58
3135 58
      if (self::is_utf32($str) !== false) {
3136 58
        return false;
3137
      }
3138
    }
3139 58
3140
    if (self::pcre_utf8_support() !== true) {
3141 52
3142 58
      // If even just the first character can be matched, when the /u
3143
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3144 48
      // invalid, nothing at all will match, even if the string contains
3145 48
      // some valid sequences
3146 48
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3147 48
    }
3148 55
3149
    $mState = 0; // cached expected number of octets after the current octet
3150 29
    // until the beginning of the next UTF8 character sequence
3151 29
    $mUcs4 = 0; // cached Unicode character
3152 29
    $mBytes = 1; // cached expected number of octets in the current sequence
3153 29
3154 46
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3155
      self::checkForSupport();
3156 11
    }
3157 11
3158 11 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3159 11
      $len = \mb_strlen($str, '8BIT');
3160 22
    } else {
3161
      $len = strlen($str);
3162
    }
3163
3164
    /** @noinspection ForeachInvariantsInspection */
3165
    for ($i = 0; $i < $len; $i++) {
3166
      $in = ord($str[$i]);
3167
      if ($mState === 0) {
3168
        // When mState is zero we expect either a US-ASCII character or a
3169 4
        // multi-octet sequence.
3170 4
        if (0 === (0x80 & $in)) {
3171 4
          // US-ASCII, pass straight through.
3172 4
          $mBytes = 1;
3173 12 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3174
          // First octet of 2 octet sequence.
3175 4
          $mUcs4 = $in;
3176 4
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3177 4
          $mState = 1;
3178 4
          $mBytes = 2;
3179 4
        } elseif (0xE0 === (0xF0 & $in)) {
3180
          // First octet of 3 octet sequence.
3181
          $mUcs4 = $in;
3182
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3183 6
          $mState = 2;
3184
          $mBytes = 3;
3185 57 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3186
          // First octet of 4 octet sequence.
3187
          $mUcs4 = $in;
3188 52
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3189
          $mState = 3;
3190 48
          $mBytes = 4;
3191 48
        } elseif (0xF8 === (0xFC & $in)) {
3192 48
          /* First octet of 5 octet sequence.
3193 48
          *
3194
          * This is illegal because the encoded codepoint must be either
3195
          * (a) not the shortest form or
3196
          * (b) outside the Unicode range of 0-0x10FFFF.
3197
          * Rather than trying to resynchronize, we will carry on until the end
3198 48
          * of the sequence and let the later error handling code catch it.
3199
          */
3200
          $mUcs4 = $in;
3201
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3202
          $mState = 4;
3203
          $mBytes = 5;
3204 48 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3205 48
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3206 48
          $mUcs4 = $in;
3207 48
          $mUcs4 = ($mUcs4 & 1) << 30;
3208
          $mState = 5;
3209 48
          $mBytes = 6;
3210
        } else {
3211 48
          /* Current octet is neither in the US-ASCII range nor a legal first
3212 48
           * octet of a multi-octet sequence.
3213 7
           */
3214
          return false;
3215
        }
3216 48
      } else {
3217 48
        // When mState is non-zero, we expect a continuation of the multi-octet
3218 48
        // sequence
3219 48
        if (0x80 === (0xC0 & $in)) {
3220 48
          // Legal continuation.
3221
          $shift = ($mState - 1) * 6;
3222
          $tmp = $in;
3223
          $tmp = ($tmp & 0x0000003F) << $shift;
3224
          $mUcs4 |= $tmp;
3225 26
          /**
3226
           * End of the multi-octet sequence. mUcs4 now contains the final
3227
           * Unicode code point to be output
3228 57
           */
3229
          if (0 === --$mState) {
3230 27
            /*
3231
            * Check for illegal sequences and code points.
3232
            */
3233
            // From Unicode 3.1, non-shortest form is illegal
3234
            if (
3235
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3236
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3237
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3238
                (4 < $mBytes) ||
3239
                // From Unicode 3.2, surrogate characters are illegal.
3240
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3241
                // Code points outside the Unicode range are illegal.
3242
                ($mUcs4 > 0x10FFFF)
3243
            ) {
3244
              return false;
3245
            }
3246
            // initialize UTF8 cache
3247
            $mState = 0;
3248
            $mUcs4 = 0;
3249
            $mBytes = 1;
3250
          }
3251
        } else {
3252
          /**
3253
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3254
           * Incomplete multi-octet sequence.
3255
           */
3256
          return false;
3257
        }
3258
      }
3259
    }
3260
3261
    return true;
3262
  }
3263
3264
  /**
3265
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3266
   * Decodes a JSON string
3267
   *
3268
   * @link http://php.net/manual/en/function.json-decode.php
3269 2
   *
3270
   * @param string $json    <p>
3271 2
   *                        The <i>json</i> string being decoded.
3272
   *                        </p>
3273 2
   *                        <p>
3274
   *                        This function only works with UTF-8 encoded strings.
3275
   *                        </p>
3276 2
   *                        <p>PHP implements a superset of
3277
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3278
   *                        only supports these values when they are nested inside an array or an object.
3279 2
   *                        </p>
3280
   * @param bool   $assoc   [optional] <p>
3281
   *                        When <b>TRUE</b>, returned objects will be converted into
3282
   *                        associative arrays.
3283
   *                        </p>
3284
   * @param int    $depth   [optional] <p>
3285
   *                        User specified recursion depth.
3286
   *                        </p>
3287
   * @param int    $options [optional] <p>
3288
   *                        Bitmask of JSON decode options. Currently only
3289
   *                        <b>JSON_BIGINT_AS_STRING</b>
3290
   *                        is supported (default is to cast large integers as floats)
3291
   *                        </p>
3292
   *
3293
   * @return mixed the value encoded in <i>json</i> in appropriate
3294
   * PHP type. Values true, false and
3295
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3296
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3297
   * <i>json</i> cannot be decoded or if the encoded
3298
   * data is deeper than the recursion limit.
3299
   */
3300 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3301
  {
3302
    $json = (string)self::filter($json);
3303
3304
    if (Bootup::is_php('5.4') === true) {
3305
      $json = json_decode($json, $assoc, $depth, $options);
3306
    } else {
3307
      $json = json_decode($json, $assoc, $depth);
3308
    }
3309
3310
    return $json;
3311
  }
3312
3313
  /**
3314
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3315
   * Returns the JSON representation of a value.
3316
   *
3317
   * @link http://php.net/manual/en/function.json-encode.php
3318 2
   *
3319
   * @param mixed $value   <p>
3320 2
   *                       The <i>value</i> being encoded. Can be any type except
3321
   *                       a resource.
3322 2
   *                       </p>
3323
   *                       <p>
3324
   *                       All string data must be UTF-8 encoded.
3325 2
   *                       </p>
3326
   *                       <p>PHP implements a superset of
3327
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3328 2
   *                       only supports these values when they are nested inside an array or an object.
3329
   *                       </p>
3330
   * @param int   $options [optional] <p>
3331
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3332
   *                       <b>JSON_HEX_TAG</b>,
3333
   *                       <b>JSON_HEX_AMP</b>,
3334
   *                       <b>JSON_HEX_APOS</b>,
3335
   *                       <b>JSON_NUMERIC_CHECK</b>,
3336
   *                       <b>JSON_PRETTY_PRINT</b>,
3337
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3338
   *                       <b>JSON_FORCE_OBJECT</b>,
3339
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3340 7
   *                       constants is described on
3341
   *                       the JSON constants page.
3342 7
   *                       </p>
3343 7
   * @param int   $depth   [optional] <p>
3344
   *                       Set the maximum depth. Must be greater than zero.
3345
   *                       </p>
3346
   *
3347 7
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3348 7
   */
3349 7 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3350
  {
3351 7
    $value = self::filter($value);
3352
3353 7
    if (Bootup::is_php('5.5') === true) {
3354
      $json = json_encode($value, $options, $depth);
3355
    } else {
3356
      $json = json_encode($value, $options);
3357
    }
3358
3359
    return $json;
3360
  }
3361
3362
  /**
3363
   * Makes string's first char lowercase.
3364
   *
3365
   * @param string $str <p>The input string</p>
3366
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3367 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3368
   *
3369 1
   * @return string <p>The resulting string</p>
3370
   */
3371
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3372
  {
3373
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3374
    if ($strPartTwo === false) {
3375
      $strPartTwo = '';
3376
    }
3377
3378
    $strPartOne = self::strtolower(
3379
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3380
        $encoding,
3381
        $cleanUtf8
3382
    );
3383 1
3384
    return $strPartOne . $strPartTwo;
3385 1
  }
3386 1
3387
  /**
3388
   * alias for "UTF8::lcfirst()"
3389 1
   *
3390 1
   * @see UTF8::lcfirst()
3391
   *
3392 1
   * @param string  $word
3393 1
   * @param string  $encoding
3394 1
   * @param boolean $cleanUtf8
3395 1
   *
3396
   * @return string
3397
   */
3398 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3399
  {
3400 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3401 1
  }
3402
3403
  /**
3404
   * Lowercase for all words in the string.
3405
   *
3406 1
   * @param string   $str        <p>The input string.</p>
3407
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3408
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3409 1
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3410 1
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3411 1
   *
3412 1
   * @return string
3413 1
   */
3414 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3415
  {
3416 1
    if (!$str) {
3417 1
      return '';
3418
    }
3419 1
3420
    $words = self::str_to_words($str, $charlist);
3421
    $newWords = array();
3422
3423
    if (count($exceptions) > 0) {
3424
      $useExceptions = true;
3425
    } else {
3426
      $useExceptions = false;
3427
    }
3428
3429 View Code Duplication
    foreach ($words as $word) {
3430 24
3431
      if (!$word) {
3432 24
        continue;
3433
      }
3434 24
3435 2
      if (
3436
          $useExceptions === false
3437
          ||
3438
          (
3439 23
              $useExceptions === true
3440 2
              &&
3441
              !in_array($word, $exceptions, true)
3442
          )
3443 23
      ) {
3444
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3445
      }
3446
3447
      $newWords[] = $word;
3448
    }
3449
3450
    return implode('', $newWords);
3451
  }
3452
3453 1
  /**
3454
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3455 1
   *
3456 1
   * @param string $str   <p>The string to be trimmed</p>
3457 1
   * @param string $chars <p>Optional characters to be stripped</p>
3458
   *
3459 1
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3460
   */
3461 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3462
  {
3463
    $str = (string)$str;
3464
3465
    if (!isset($str[0])) {
3466
      return '';
3467
    }
3468
3469
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3470 1
    if ($chars === INF || !$chars) {
3471
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3472 1
    }
3473 1
3474 1
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3475
  }
3476
3477 1
  /**
3478
   * Returns the UTF-8 character with the maximum code point in the given data.
3479
   *
3480
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3481
   *
3482
   * @return string <p>The character with the highest code point than others.</p>
3483
   */
3484 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3485 15
  {
3486
    if (is_array($arg) === true) {
3487 15
      $arg = implode('', $arg);
3488
    }
3489 15
3490 15
    return self::chr(max(self::codepoints($arg)));
3491 15
  }
3492
3493 15
  /**
3494
   * Calculates and returns the maximum number of bytes taken by any
3495
   * UTF-8 encoded character in the given string.
3496
   *
3497
   * @param string $str <p>The original Unicode string.</p>
3498
   *
3499
   * @return int <p>Max byte lengths of the given chars.</p>
3500
   */
3501
  public static function max_chr_width($str)
3502
  {
3503 1
    $bytes = self::chr_size_list($str);
3504
    if (count($bytes) > 0) {
3505 1
      return (int)max($bytes);
3506 1
    }
3507 1
3508
    return 0;
3509 1
  }
3510
3511
  /**
3512
   * Checks whether mbstring is available on the server.
3513
   *
3514
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3515
   */
3516
  public static function mbstring_loaded()
3517
  {
3518
    $return = extension_loaded('mbstring') ? true : false;
3519
3520
    if ($return === true) {
3521
      \mb_internal_encoding('UTF-8');
3522
    }
3523
3524
    return $return;
3525
  }
3526
3527
  /**
3528
   * Returns the UTF-8 character with the minimum code point in the given data.
3529
   *
3530
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3531
   *
3532
   * @return string <p>The character with the lowest code point than others.</p>
3533
   */
3534 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3535
  {
3536
    if (is_array($arg) === true) {
3537 77
      $arg = implode('', $arg);
3538
    }
3539 77
3540
    return self::chr(min(self::codepoints($arg)));
3541 77
  }
3542 3
3543
  /**
3544
   * alias for "UTF8::normalize_encoding()"
3545 76
   *
3546 1
   * @see UTF8::normalize_encoding()
3547
   *
3548
   * @param string $encoding
3549 76
   * @param mixed  $fallback
3550 6
   *
3551
   * @return string
3552
   *
3553 75
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3554 74
   */
3555
  public static function normalizeEncoding($encoding, $fallback = false)
3556
  {
3557 5
    return self::normalize_encoding($encoding, $fallback);
3558 5
  }
3559 5
3560
  /**
3561
   * Normalize the encoding-"name" input.
3562 5
   *
3563 5
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3564 5
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3565 5
   *
3566 5
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3567 5
   */
3568 5
  public static function normalize_encoding($encoding, $fallback = false)
3569 5
  {
3570 5
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3571 5
3572 5
    if (!$encoding) {
3573 5
      return $fallback;
3574 5
    }
3575 5
3576 5
    if ('UTF-8' === $encoding) {
3577
      return $encoding;
3578 5
    }
3579 5
3580 5
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3581
      return $encoding;
3582 5
    }
3583
3584 5
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3585
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3586
    }
3587
3588
    $encodingOrig = $encoding;
3589
    $encoding = strtoupper($encoding);
3590
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3591
3592
    $equivalences = array(
3593
        'ISO8859'     => 'ISO-8859-1',
3594 16
        'ISO88591'    => 'ISO-8859-1',
3595
        'ISO'         => 'ISO-8859-1',
3596 16
        'LATIN'       => 'ISO-8859-1',
3597
        'LATIN1'      => 'ISO-8859-1', // Western European
3598 16
        'ISO88592'    => 'ISO-8859-2',
3599 1
        'LATIN2'      => 'ISO-8859-2', // Central European
3600
        'ISO88593'    => 'ISO-8859-3',
3601
        'LATIN3'      => 'ISO-8859-3', // Southern European
3602 16
        'ISO88594'    => 'ISO-8859-4',
3603 16
        'LATIN4'      => 'ISO-8859-4', // Northern European
3604
        'ISO88595'    => 'ISO-8859-5',
3605 16
        'ISO88596'    => 'ISO-8859-6', // Greek
3606 1
        'ISO88597'    => 'ISO-8859-7',
3607 1
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3608 1
        'ISO88599'    => 'ISO-8859-9',
3609
        'LATIN5'      => 'ISO-8859-9', // Turkish
3610 16
        'ISO885911'   => 'ISO-8859-11',
3611
        'TIS620'      => 'ISO-8859-11', // Thai
3612
        'ISO885910'   => 'ISO-8859-10',
3613
        'LATIN6'      => 'ISO-8859-10', // Nordic
3614
        'ISO885913'   => 'ISO-8859-13',
3615
        'LATIN7'      => 'ISO-8859-13', // Baltic
3616
        'ISO885914'   => 'ISO-8859-14',
3617
        'LATIN8'      => 'ISO-8859-14', // Celtic
3618
        'ISO885915'   => 'ISO-8859-15',
3619
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3620
        'ISO885916'   => 'ISO-8859-16',
3621
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3622
        'CP1250'      => 'WINDOWS-1250',
3623 37
        'WIN1250'     => 'WINDOWS-1250',
3624
        'WINDOWS1250' => 'WINDOWS-1250',
3625 37
        'CP1251'      => 'WINDOWS-1251',
3626
        'WIN1251'     => 'WINDOWS-1251',
3627 37
        'WINDOWS1251' => 'WINDOWS-1251',
3628 4
        'CP1252'      => 'WINDOWS-1252',
3629
        'WIN1252'     => 'WINDOWS-1252',
3630
        'WINDOWS1252' => 'WINDOWS-1252',
3631 37
        'CP1253'      => 'WINDOWS-1253',
3632 37
        'WIN1253'     => 'WINDOWS-1253',
3633
        'WINDOWS1253' => 'WINDOWS-1253',
3634 37
        'CP1254'      => 'WINDOWS-1254',
3635
        'WIN1254'     => 'WINDOWS-1254',
3636 2
        'WINDOWS1254' => 'WINDOWS-1254',
3637
        'CP1255'      => 'WINDOWS-1255',
3638 2
        'WIN1255'     => 'WINDOWS-1255',
3639
        'WINDOWS1255' => 'WINDOWS-1255',
3640 1
        'CP1256'      => 'WINDOWS-1256',
3641 1
        'WIN1256'     => 'WINDOWS-1256',
3642
        'WINDOWS1256' => 'WINDOWS-1256',
3643 2
        'CP1257'      => 'WINDOWS-1257',
3644 2
        'WIN1257'     => 'WINDOWS-1257',
3645
        'WINDOWS1257' => 'WINDOWS-1257',
3646 37
        'CP1258'      => 'WINDOWS-1258',
3647 37
        'WIN1258'     => 'WINDOWS-1258',
3648
        'WINDOWS1258' => 'WINDOWS-1258',
3649 37
        'UTF16'       => 'UTF-16',
3650 1
        'UTF32'       => 'UTF-32',
3651 1
        'UTF8'        => 'UTF-8',
3652
        'UTF'         => 'UTF-8',
3653 37
        'UTF7'        => 'UTF-7',
3654 37
        '8BIT'        => 'CP850',
3655
        'BINARY'      => 'CP850',
3656 37
    );
3657
3658
    if (!empty($equivalences[$encodingUpperHelper])) {
3659
      $encoding = $equivalences[$encodingUpperHelper];
3660
    }
3661
3662
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3663
3664
    return $encoding;
3665
  }
3666
3667
  /**
3668 12
   * Normalize some MS Word special characters.
3669
   *
3670 12
   * @param string $str <p>The string to be normalized.</p>
3671
   *
3672 12
   * @return string
3673 1
   */
3674 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3675
  {
3676 11
    $str = (string)$str;
3677
3678
    if (!isset($str[0])) {
3679
      return '';
3680
    }
3681
3682
    static $UTF8_MSWORD_KEYS_CACHE = null;
3683
    static $UTF8_MSWORD_VALUES_CACHE = null;
3684
3685
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3686
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3687
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3688
    }
3689
3690
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3691
  }
3692
3693
  /**
3694
   * Normalize the whitespace.
3695
   *
3696
   * @param string $str                     <p>The string to be normalized.</p>
3697
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3698
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3699
   *                                        bidirectional text chars.</p>
3700
   *
3701
   * @return string
3702
   */
3703
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3704
  {
3705
    $str = (string)$str;
3706
3707
    if (!isset($str[0])) {
3708
      return '';
3709
    }
3710
3711
    static $WHITESPACE_CACHE = array();
3712
    $cacheKey = (int)$keepNonBreakingSpace;
3713
3714
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3715
3716
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3717
3718
      if ($keepNonBreakingSpace === true) {
3719
        /** @noinspection OffsetOperationsInspection */
3720
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3721
      }
3722
3723
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3724
    }
3725
3726
    if ($keepBidiUnicodeControls === false) {
3727
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3728
3729
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3730
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3731 23
      }
3732
3733
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3734 23
    }
3735 1
3736
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3737
  }
3738
3739 1
  /**
3740 1
   * Strip all whitespace characters. This includes tabs and newline
3741 1
   * characters, as well as multibyte whitespace such as the thin space
3742 1
   * and ideographic space.
3743
   *
3744 23
   * @param string $str
3745
   *
3746
   * @return string
3747
   */
3748 23
  public static function strip_whitespace($str)
3749
  {
3750
    $str = (string)$str;
3751
3752
    if (!isset($str[0])) {
3753
      return '';
3754
    }
3755
3756 23
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3757 23
  }
3758 23
3759
  /**
3760
   * Format a number with grouped thousands.
3761 10
   *
3762
   * @param float  $number
3763 10
   * @param int    $decimals
3764 10
   * @param string $dec_point
3765
   * @param string $thousands_sep
3766 10
   *
3767 1
   * @return string
3768
   *
3769
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3770 10
   */
3771 4
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3772
  {
3773
    $thousands_sep = (string)$thousands_sep;
3774 10
    $dec_point = (string)$dec_point;
3775 6
    $number = (float)$number;
3776
3777
    if (
3778 10
        isset($thousands_sep[1], $dec_point[1])
3779
        &&
3780
        Bootup::is_php('5.4') === true
3781
    ) {
3782
      return str_replace(
3783
          array(
3784
              '.',
3785
              ',',
3786
          ),
3787
          array(
3788
              $dec_point,
3789
              $thousands_sep,
3790
          ),
3791
          number_format($number, $decimals, '.', ',')
3792
      );
3793
    }
3794
3795 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3796
  }
3797 1
3798 1
  /**
3799 1
   * Calculates Unicode code point of the given UTF-8 encoded character.
3800
   *
3801
   * INFO: opposite to UTF8::chr()
3802 1
   *
3803 1
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3804 1
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3805
   *
3806
   * @return int <p>
3807 1
   *             Unicode code point of the given character,<br>
3808
   *             0 on invalid UTF-8 byte sequence.
3809
   *             </p>
3810
   */
3811
  public static function ord($chr, $encoding = 'UTF-8')
3812
  {
3813
    // init
3814
    static $CHAR_CACHE = array();
3815 58
    $encoding = (string)$encoding;
3816
3817
    // save the original string
3818
    $chr_orig = $chr;
3819 58
3820
    if ($encoding !== 'UTF-8') {
3821
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3822
3823
      // check again, if it's still not UTF-8
3824
      /** @noinspection NotOptimalIfConditionsInspection */
3825
      if ($encoding !== 'UTF-8') {
3826
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3827
      }
3828
    }
3829
3830 1
    $cacheKey = $chr_orig . $encoding;
3831
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3832 1
      return $CHAR_CACHE[$cacheKey];
3833 1
    }
3834
3835
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3836 1
      self::checkForSupport();
3837 1
    }
3838 1
3839
    if (self::$SUPPORT['intlChar'] === true) {
3840
      $code = \IntlChar::ord($chr);
3841 1
      if ($code) {
3842
        return $CHAR_CACHE[$cacheKey] = $code;
3843
      }
3844 1
    }
3845
3846
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3847
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3848 1
    $code = $chr ? $chr[1] : 0;
3849 1
3850 1
    if (0xF0 <= $code && isset($chr[4])) {
3851
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3852
    }
3853 1
3854
    if (0xE0 <= $code && isset($chr[3])) {
3855
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3856 1
    }
3857
3858
    if (0xC0 <= $code && isset($chr[2])) {
3859
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3860 1
    }
3861
3862 1
    return $CHAR_CACHE[$cacheKey] = $code;
3863 1
  }
3864 1
3865 1
  /**
3866 1
   * Parses the string into an array (into the the second parameter).
3867
   *
3868
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3869
   *          if the second parameter is not set!
3870
   *
3871
   * @link http://php.net/manual/en/function.parse-str.php
3872
   *
3873
   * @param string  $str       <p>The input string.</p>
3874
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3875
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3876
   *
3877
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3878
   */
3879
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3880
  {
3881
    if ($cleanUtf8 === true) {
3882
      $str = self::clean($str);
3883
    }
3884
3885
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3886
    $return = \mb_parse_str($str, $result);
3887
    if ($return === false || empty($result)) {
3888 2
      return false;
3889
    }
3890 2
3891
    return true;
3892 2
  }
3893 1
3894
  /**
3895
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3896 2
   *
3897 2
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3898 1
   */
3899 1
  public static function pcre_utf8_support()
3900
  {
3901 2
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3902
    return (bool)@preg_match('//u', '');
3903
  }
3904 2
3905
  /**
3906 2
   * Create an array containing a range of UTF-8 characters.
3907 2
   *
3908 2
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3909 2
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3910
   *
3911 2
   * @return array
3912 2
   */
3913 2
  public static function range($var1, $var2)
3914
  {
3915 2
    if (!$var1 || !$var2) {
3916
      return array();
3917 2
    }
3918
3919 View Code Duplication
    if (ctype_digit((string)$var1)) {
3920
      $start = (int)$var1;
3921
    } elseif (ctype_xdigit($var1)) {
3922
      $start = (int)self::hex_to_int($var1);
3923
    } else {
3924
      $start = self::ord($var1);
3925
    }
3926
3927
    if (!$start) {
3928
      return array();
3929
    }
3930
3931 View Code Duplication
    if (ctype_digit((string)$var2)) {
3932
      $end = (int)$var2;
3933
    } elseif (ctype_xdigit($var2)) {
3934
      $end = (int)self::hex_to_int($var2);
3935
    } else {
3936
      $end = self::ord($var2);
3937
    }
3938
3939
    if (!$end) {
3940
      return array();
3941
    }
3942
3943 40
    return array_map(
3944
        array(
3945 40
            '\\voku\\helper\\UTF8',
3946
            'chr',
3947 40
        ),
3948 5
        range($start, $end)
3949
    );
3950
  }
3951 40
3952 40
  /**
3953 5
   * Multi decode html entity & fix urlencoded-win1252-chars.
3954 5
   *
3955
   * e.g:
3956
   * 'test+test'                     => 'test+test'
3957 5
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3958 5
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3959 40
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3960
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3961 40
   * 'Düsseldorf'                   => 'Düsseldorf'
3962
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3963
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3964
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3965
   *
3966
   * @param string $str          <p>The input string.</p>
3967
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3968
   *
3969
   * @return string
3970
   */
3971 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3972 1
  {
3973
    $str = (string)$str;
3974 1
3975 1
    if (!isset($str[0])) {
3976 1
      return '';
3977
    }
3978 1
3979
    $pattern = '/%u([0-9a-f]{3,4})/i';
3980 1
    if (preg_match($pattern, $str)) {
3981 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3982 1
    }
3983 1
3984
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3985 1
3986
    do {
3987
      $str_compare = $str;
3988
3989
      $str = self::fix_simple_utf8(
3990
          rawurldecode(
3991
              self::html_entity_decode(
3992
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3993
                  $flags
3994
              )
3995
          )
3996
      );
3997
3998
    } while ($multi_decode === true && $str_compare !== $str);
3999
4000
    return (string)$str;
4001 57
  }
4002
4003
  /**
4004 57
   * alias for "UTF8::remove_bom()"
4005
   *
4006
   * @see UTF8::remove_bom()
4007
   *
4008 57
   * @param string $str
4009 57
   *
4010 57
   * @return string
4011 57
   *
4012
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4013 57
   */
4014
  public static function removeBOM($str)
4015
  {
4016 57
    return self::remove_bom($str);
4017 57
  }
4018
4019 57
  /**
4020
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4021
   *
4022
   * @param string $str <p>The input string.</p>
4023
   *
4024
   * @return string <p>String without UTF-BOM</p>
4025
   */
4026
  public static function remove_bom($str)
4027
  {
4028
    $str = (string)$str;
4029
4030
    if (!isset($str[0])) {
4031 57
      return '';
4032
    }
4033 57
4034
    foreach (self::$BOM as $bomString => $bomByteLength) {
4035 57
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4036 4
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4037
        if ($strTmp === false) {
4038
          $strTmp = '';
4039 57
        }
4040 57
        $str = (string)$strTmp;
4041 57
      }
4042 57
    }
4043 57
4044
    return $str;
4045 57
  }
4046
4047
  /**
4048
   * Removes duplicate occurrences of a string in another string.
4049 57
   *
4050 57
   * @param string          $str  <p>The base string.</p>
4051
   * @param string|string[] $what <p>String to search for in the base string.</p>
4052 57
   *
4053 57
   * @return string <p>The result string with removed duplicates.</p>
4054 57
   */
4055
  public static function remove_duplicates($str, $what = ' ')
4056 57
  {
4057
    if (is_string($what) === true) {
4058 57
      $what = array($what);
4059 57
    }
4060 57
4061
    if (is_array($what) === true) {
4062 57
      /** @noinspection ForeachSourceInspection */
4063 57
      foreach ($what as $item) {
4064 57
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4065
      }
4066 57
    }
4067
4068
    return $str;
4069
  }
4070
4071
  /**
4072
   * Remove invisible characters from a string.
4073
   *
4074
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4075
   *
4076
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4077 23
   *
4078
   * @param string $str
4079 23
   * @param bool   $url_encoded
4080
   * @param string $replacement
4081 23
   *
4082 5
   * @return string
4083
   */
4084
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4085
  {
4086 19
    // init
4087 3
    $non_displayables = array();
4088
4089
    // every control character except newline (dec 10),
4090 18
    // carriage return (dec 13) and horizontal tab (dec 09)
4091
    if ($url_encoded) {
4092
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4093
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4094
    }
4095
4096
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4097
4098
    do {
4099
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4100
    } while ($count !== 0);
4101 60
4102
    return $str;
4103 60
  }
4104
4105 60
  /**
4106
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4107 60
   *
4108 48
   * @param string $str                <p>The input string</p>
4109
   * @param string $replacementChar    <p>The replacement character.</p>
4110
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4111
   *
4112 19
   * @return string
4113
   */
4114
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4115 19
  {
4116 18
    $str = (string)$str;
4117
4118 18
    if (!isset($str[0])) {
4119 18
      return '';
4120 18
    }
4121 2
4122 2
    if ($processInvalidUtf8 === true) {
4123
      $replacementCharHelper = $replacementChar;
4124
      if ($replacementChar === '') {
4125 19
        $replacementCharHelper = 'none';
4126
      }
4127 19
4128 19
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4129 19
        self::checkForSupport();
4130
      }
4131 19
4132 19
      $save = \mb_substitute_character();
4133 19
      \mb_substitute_character($replacementCharHelper);
4134
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4135
      \mb_substitute_character($save);
4136
    }
4137 19
4138
    return str_replace(
4139 19
        array(
4140
            "\xEF\xBF\xBD",
4141
            '�',
4142
        ),
4143
        array(
4144
            $replacementChar,
4145
            $replacementChar,
4146
        ),
4147
        $str
4148
    );
4149
  }
4150
4151
  /**
4152
   * Strip whitespace or other characters from end of a UTF-8 string.
4153
   *
4154
   * @param string $str   <p>The string to be trimmed.</p>
4155
   * @param string $chars <p>Optional characters to be stripped.</p>
4156
   *
4157
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4158
   */
4159 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4160
  {
4161
    $str = (string)$str;
4162
4163
    if (!isset($str[0])) {
4164
      return '';
4165 1
    }
4166
4167 1
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4168
    if ($chars === INF || !$chars) {
4169 1
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4170 1
    }
4171
4172
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4173
  }
4174
4175 1
  /**
4176 1
   * rxClass
4177 1
   *
4178 1
   * @param string $s
4179
   * @param string $class
4180
   *
4181 1
   * @return string
4182
   */
4183
  private static function rxClass($s, $class = '')
4184
  {
4185 1
    static $RX_CLASSS_CACHE = array();
4186
4187
    $cacheKey = $s . $class;
4188
4189
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4190
      return $RX_CLASSS_CACHE[$cacheKey];
4191
    }
4192
4193
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4194
    $class = array($class);
4195
4196
    /** @noinspection SuspiciousLoopInspection */
4197 39
    foreach (self::str_split($s) as $s) {
4198
      if ('-' === $s) {
4199 39
        $class[0] = '-' . $class[0];
4200
      } elseif (!isset($s[2])) {
4201 39
        $class[0] .= preg_quote($s, '/');
4202 3
      } elseif (1 === self::strlen($s)) {
4203
        $class[0] .= $s;
4204
      } else {
4205
        $class[] = $s;
4206 38
      }
4207
    }
4208 38
4209
    if ($class[0]) {
4210
      $class[0] = '[' . $class[0] . ']';
4211
    }
4212 38
4213 7
    if (1 === count($class)) {
4214 7
      $return = $class[0];
4215
    } else {
4216 38
      $return = '(?:' . implode('|', $class) . ')';
4217
    }
4218 38
4219 38
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4220 38
4221 38
    return $return;
4222 38
  }
4223
4224 38
  /**
4225
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4226
   */
4227
  public static function showSupport()
4228
  {
4229
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4230
      self::checkForSupport();
4231
    }
4232
4233
    foreach (self::$SUPPORT as $utf8Support) {
4234
      echo $utf8Support . "\n<br>";
4235
    }
4236
  }
4237
4238
  /**
4239
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4240
   *
4241
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4242
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4243
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4244
   *
4245
   * @return string <p>The HTML numbered entity.</p>
4246
   */
4247
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4248
  {
4249
    $char = (string)$char;
4250
4251
    if (!isset($char[0])) {
4252
      return '';
4253
    }
4254
4255
    if (
4256
        $keepAsciiChars === true
4257
        &&
4258
        self::is_ascii($char) === true
4259
    ) {
4260
      return $char;
4261
    }
4262
4263
    if ($encoding !== 'UTF-8') {
4264
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4265
    }
4266
4267
    return '&#' . self::ord($char, $encoding) . ';';
4268
  }
4269
4270
  /**
4271
   * Convert a string to an array of Unicode characters.
4272
   *
4273
   * @param string  $str       <p>The string to split into array.</p>
4274
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4275
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4276
   *
4277
   * @return string[] <p>An array containing chunks of the string.</p>
4278
   */
4279
  public static function split($str, $length = 1, $cleanUtf8 = false)
4280
  {
4281
    $str = (string)$str;
4282
4283
    if (!isset($str[0])) {
4284
      return array();
4285
    }
4286
4287
    // init
4288
    $ret = array();
4289
4290
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4291
      self::checkForSupport();
4292
    }
4293
4294
    if ($cleanUtf8 === true) {
4295 38
      $str = self::clean($str);
4296 5
    }
4297
4298 5
    if (self::$SUPPORT['pcre_utf8'] === true) {
4299
4300 5
      preg_match_all('/./us', $str, $retArray);
4301 5
      if (isset($retArray[0])) {
4302 5
        $ret = $retArray[0];
4303
      }
4304
      unset($retArray);
4305
4306 34
    } else {
4307
4308
      // fallback
4309
4310 34
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4311
        self::checkForSupport();
4312
      }
4313
4314 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4315
        $len = \mb_strlen($str, '8BIT');
4316
      } else {
4317
        $len = strlen($str);
4318
      }
4319
4320
      /** @noinspection ForeachInvariantsInspection */
4321
      for ($i = 0; $i < $len; $i++) {
4322
4323 12
        if (($str[$i] & "\x80") === "\x00") {
4324
4325
          $ret[] = $str[$i];
4326
4327
        } elseif (
4328
            isset($str[$i + 1])
4329 12
            &&
4330
            ($str[$i] & "\xE0") === "\xC0"
4331 3
        ) {
4332 1
4333
          if (($str[$i + 1] & "\xC0") === "\x80") {
4334
            $ret[] = $str[$i] . $str[$i + 1];
4335 3
4336 1
            $i++;
4337
          }
4338
4339 2 View Code Duplication
        } elseif (
4340
            isset($str[$i + 2])
4341
            &&
4342
            ($str[$i] & "\xF0") === "\xE0"
4343 2
        ) {
4344
4345
          if (
4346
              ($str[$i + 1] & "\xC0") === "\x80"
4347 2
              &&
4348
              ($str[$i + 2] & "\xC0") === "\x80"
4349
          ) {
4350
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4351
4352
            $i += 2;
4353 12
          }
4354 3
4355
        } elseif (
4356
            isset($str[$i + 3])
4357
            &&
4358
            ($str[$i] & "\xF8") === "\xF0"
4359
        ) {
4360
4361 12 View Code Duplication
          if (
4362 9
              ($str[$i + 1] & "\xC0") === "\x80"
4363
              &&
4364
              ($str[$i + 2] & "\xC0") === "\x80"
4365
              &&
4366
              ($str[$i + 3] & "\xC0") === "\x80"
4367
          ) {
4368
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4369
4370
            $i += 3;
4371 7
          }
4372 7
4373 7
        }
4374 7
      }
4375 7
    }
4376 7
4377 7
    if ($length > 1) {
4378 7
      $ret = array_chunk($ret, $length);
4379 7
4380 7
      return array_map(
4381 7
          function ($item) {
4382 7
            return implode('', $item);
4383 7
          }, $ret
4384 7
      );
4385 7
    }
4386 7
4387 7
    if (isset($ret[0]) && $ret[0] === '') {
4388 7
      return array();
4389 7
    }
4390 7
4391 7
    return $ret;
4392
  }
4393 7
4394 7
  /**
4395 7
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4396
   *
4397
   * @param string $str <p>The input string.</p>
4398
   *
4399
   * @return false|string <p>
4400
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4401
   *                      otherwise it will return false.
4402
   *                      </p>
4403
   */
4404
  public static function str_detect_encoding($str)
4405
  {
4406
    //
4407
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4408
    //
4409
4410
    if (self::is_binary($str) === true) {
4411
4412
      if (self::is_utf16($str) === 1) {
4413
        return 'UTF-16LE';
4414
      }
4415
4416
      if (self::is_utf16($str) === 2) {
4417
        return 'UTF-16BE';
4418
      }
4419
4420
      if (self::is_utf32($str) === 1) {
4421
        return 'UTF-32LE';
4422 2
      }
4423
4424 2
      if (self::is_utf32($str) === 2) {
4425 2
        return 'UTF-32BE';
4426
      }
4427 2
4428 1
    }
4429
4430
    //
4431 2
    // 2.) simple check for ASCII chars
4432 2
    //
4433
4434
    if (self::is_ascii($str) === true) {
4435
      return 'ASCII';
4436 2
    }
4437 2
4438
    //
4439
    // 3.) simple check for UTF-8 chars
4440 2
    //
4441
4442
    if (self::is_utf8($str) === true) {
4443
      return 'UTF-8';
4444
    }
4445
4446
    //
4447
    // 4.) check via "\mb_detect_encoding()"
4448
    //
4449
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4450
4451 2
    $detectOrder = array(
4452
        'ISO-8859-1',
4453 2
        'ISO-8859-2',
4454 2
        'ISO-8859-3',
4455
        'ISO-8859-4',
4456 2
        'ISO-8859-5',
4457 1
        'ISO-8859-6',
4458
        'ISO-8859-7',
4459
        'ISO-8859-8',
4460 2
        'ISO-8859-9',
4461 2
        'ISO-8859-10',
4462
        'ISO-8859-13',
4463
        'ISO-8859-14',
4464 2
        'ISO-8859-15',
4465
        'ISO-8859-16',
4466
        'WINDOWS-1251',
4467
        'WINDOWS-1252',
4468
        'WINDOWS-1254',
4469
        'ISO-2022-JP',
4470
        'JIS',
4471
        'EUC-JP',
4472
    );
4473
4474
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4475
    if ($encoding) {
4476
      return $encoding;
4477
    }
4478
4479
    //
4480
    // 5.) check via "iconv()"
4481
    //
4482
4483
    $md5 = md5($str);
4484
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4485
      # INFO: //IGNORE and //TRANSLIT still throw notice
4486
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4487
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4488
        return $encodingTmp;
4489
      }
4490
    }
4491
4492 26
    return false;
4493
  }
4494 26
4495
  /**
4496
   * Check if the string ends with the given substring.
4497 26
   *
4498 26
   * @param string $haystack <p>The string to search in.</p>
4499 2
   * @param string $needle   <p>The substring to search for.</p>
4500 2
   *
4501 24
   * @return bool
4502
   */
4503 26
  public static function str_ends_with($haystack, $needle)
4504
  {
4505 26
    $haystack = (string)$haystack;
4506 26
    $needle = (string)$needle;
4507
4508 26
    if (!isset($haystack[0], $needle[0])) {
4509
      return false;
4510
    }
4511
4512
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4513
    if ($haystackSub === false) {
4514
      return false;
4515
    }
4516
4517
    if ($needle === $haystackSub) {
4518
      return true;
4519 2
    }
4520
4521 2
    return false;
4522 2
  }
4523
4524 2
  /**
4525 1
   * Check if the string ends with the given substring, case insensitive.
4526
   *
4527
   * @param string $haystack <p>The string to search in.</p>
4528 2
   * @param string $needle   <p>The substring to search for.</p>
4529 2
   *
4530
   * @return bool
4531
   */
4532 2 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4533
  {
4534
    $haystack = (string)$haystack;
4535
    $needle = (string)$needle;
4536
4537
    if (!isset($haystack[0], $needle[0])) {
4538
      return false;
4539
    }
4540
4541
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4542
      return true;
4543
    }
4544 1
4545
    return false;
4546 1
  }
4547
4548 1
  /**
4549 1
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4550
   *
4551
   * @link  http://php.net/manual/en/function.str-ireplace.php
4552 1
   *
4553
   * @param mixed $search  <p>
4554 1
   *                       Every replacement with search array is
4555 1
   *                       performed on the result of previous replacement.
4556
   *                       </p>
4557
   * @param mixed $replace <p>
4558 1
   *                       </p>
4559 1
   * @param mixed $subject <p>
4560
   *                       If subject is an array, then the search and
4561
   *                       replace is performed with every entry of
4562 1
   *                       subject, and the return value is an array as
4563 1
   *                       well.
4564 1
   *                       </p>
4565 1
   * @param int   $count   [optional] <p>
4566
   *                       The number of matched and replaced needles will
4567 1
   *                       be returned in count which is passed by
4568 1
   *                       reference.
4569 1
   *                       </p>
4570 1
   *
4571
   * @return mixed <p>A string or an array of replacements.</p>
4572
   */
4573 1
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4574
  {
4575
    $search = (array)$search;
4576
4577
    /** @noinspection AlterInForeachInspection */
4578
    foreach ($search as &$s) {
4579
      if ('' === $s .= '') {
4580
        $s = '/^(?<=.)$/';
4581
      } else {
4582
        $s = '/' . preg_quote($s, '/') . '/ui';
4583
      }
4584
    }
4585
4586
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4587
    $count = $replace; // used as reference parameter
4588
4589 2
    return $subject;
4590
  }
4591 2
4592
  /**
4593
   * Check if the string starts with the given substring, case insensitive.
4594 2
   *
4595 2
   * @param string $haystack <p>The string to search in.</p>
4596
   * @param string $needle   <p>The substring to search for.</p>
4597 2
   *
4598
   * @return bool
4599 2
   */
4600 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4601
  {
4602 2
    $haystack = (string)$haystack;
4603
    $needle = (string)$needle;
4604
4605 2
    if (!isset($haystack[0], $needle[0])) {
4606 2
      return false;
4607 2
    }
4608 2
4609 2
    if (self::stripos($haystack, $needle) === 0) {
4610
      return true;
4611 2
    }
4612 2
4613 2
    return false;
4614 2
  }
4615 2
4616 2
  /**
4617
   * Limit the number of characters in a string, but also after the next word.
4618 2
   *
4619 2
   * @param string $str
4620 2
   * @param int    $length
4621 2
   * @param string $strAddOn
4622 2
   *
4623 2
   * @return string
4624
   */
4625 2
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4626
  {
4627
    $str = (string)$str;
4628 2
4629
    if (!isset($str[0])) {
4630
      return '';
4631
    }
4632
4633
    $length = (int)$length;
4634
4635
    if (self::strlen($str) <= $length) {
4636
      return $str;
4637
    }
4638
4639
    if (self::substr($str, $length - 1, 1) === ' ') {
4640
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4641
    }
4642
4643
    $str = (string)self::substr($str, 0, $length);
4644
    $array = explode(' ', $str);
4645
    array_pop($array);
4646
    $new_str = implode(' ', $array);
4647
4648
    if ($new_str === '') {
4649 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4650
    } else {
4651 1
      $str = $new_str . $strAddOn;
4652
    }
4653 1
4654
    return $str;
4655
  }
4656
4657
  /**
4658
   * Pad a UTF-8 string to given length with another string.
4659
   *
4660
   * @param string $str        <p>The input string.</p>
4661
   * @param int    $pad_length <p>The length of return string.</p>
4662
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4663
   * @param int    $pad_type   [optional] <p>
4664
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4665
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4666
   *                           </p>
4667
   *
4668
   * @return string <strong>Returns the padded string</strong>
4669
   */
4670
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4671
  {
4672
    $str_length = self::strlen($str);
4673
4674
    if (
4675
        is_int($pad_length) === true
4676
        &&
4677
        $pad_length > 0
4678
        &&
4679
        $pad_length >= $str_length
4680
    ) {
4681
      $ps_length = self::strlen($pad_string);
4682
4683
      $diff = $pad_length - $str_length;
4684
4685 12
      switch ($pad_type) {
4686 View Code Duplication
        case STR_PAD_LEFT:
4687 12
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4688
          $pre = (string)self::substr($pre, 0, $diff);
4689
          $post = '';
4690
          break;
4691
4692
        case STR_PAD_BOTH:
4693
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4694
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4695
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4696
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4697
          break;
4698
4699 1
        case STR_PAD_RIGHT:
4700 View Code Duplication
        default:
4701 1
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4702
          $post = (string)self::substr($post, 0, $diff);
4703 1
          $pre = '';
4704 1
      }
4705
4706
      return $pre . $str . $post;
4707 1
    }
4708
4709
    return $str;
4710
  }
4711
4712
  /**
4713
   * Repeat a string.
4714
   *
4715
   * @param string $str        <p>
4716
   *                           The string to be repeated.
4717 1
   *                           </p>
4718
   * @param int    $multiplier <p>
4719 1
   *                           Number of time the input string should be
4720
   *                           repeated.
4721 1
   *                           </p>
4722
   *                           <p>
4723 1
   *                           multiplier has to be greater than or equal to 0.
4724
   *                           If the multiplier is set to 0, the function
4725
   *                           will return an empty string.
4726
   *                           </p>
4727
   *
4728
   * @return string <p>The repeated string.</p>
4729
   */
4730
  public static function str_repeat($str, $multiplier)
4731
  {
4732
    $str = self::filter($str);
4733
4734
    return str_repeat($str, $multiplier);
4735 1
  }
4736
4737 1
  /**
4738
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4739 1
   *
4740 1
   * Replace all occurrences of the search string with the replacement string
4741 1
   *
4742
   * @link http://php.net/manual/en/function.str-replace.php
4743 1
   *
4744 1
   * @param mixed $search  <p>
4745 1
   *                       The value being searched for, otherwise known as the needle.
4746 1
   *                       An array may be used to designate multiple needles.
4747
   *                       </p>
4748
   * @param mixed $replace <p>
4749 1
   *                       The replacement value that replaces found search
4750
   *                       values. An array may be used to designate multiple replacements.
4751
   *                       </p>
4752
   * @param mixed $subject <p>
4753
   *                       The string or array being searched and replaced on,
4754
   *                       otherwise known as the haystack.
4755
   *                       </p>
4756
   *                       <p>
4757
   *                       If subject is an array, then the search and
4758
   *                       replace is performed with every entry of
4759
   *                       subject, and the return value is an array as
4760 22
   *                       well.
4761
   *                       </p>
4762 22
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4763
   *
4764 22
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4765 1
   */
4766
  public static function str_replace($search, $replace, $subject, &$count = null)
4767
  {
4768 21
    return str_replace($search, $replace, $subject, $count);
4769
  }
4770 21
4771
  /**
4772
   * Replace the first "$search"-term with the "$replace"-term.
4773
   *
4774
   * @param string $search
4775 21
   * @param string $replace
4776 21
   * @param string $subject
4777
   *
4778 21
   * @return string
4779 21
   */
4780
  public static function str_replace_first($search, $replace, $subject)
4781
  {
4782 1
    $pos = self::strpos($subject, $search);
4783 1
4784
    if ($pos !== false) {
4785
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4786 1
    }
4787 1
4788 1
    return $subject;
4789 1
  }
4790 1
4791
  /**
4792 1
   * Shuffles all the characters in the string.
4793
   *
4794 1
   * @param string $str <p>The input string</p>
4795
   *
4796
   * @return string <p>The shuffled string.</p>
4797
   */
4798
  public static function str_shuffle($str)
4799
  {
4800
    $array = self::split($str);
4801
4802
    shuffle($array);
4803
4804
    return implode('', $array);
4805 2
  }
4806
4807 2
  /**
4808 2
   * Sort all characters according to code points.
4809
   *
4810 2
   * @param string $str    <p>A UTF-8 string.</p>
4811 1
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4812
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4813
   *
4814 2
   * @return string <p>String of sorted characters.</p>
4815 2
   */
4816
  public static function str_sort($str, $unique = false, $desc = false)
4817
  {
4818 2
    $array = self::codepoints($str);
4819
4820
    if ($unique) {
4821
      $array = array_flip(array_flip($array));
4822
    }
4823
4824
    if ($desc) {
4825
      arsort($array);
4826
    } else {
4827
      asort($array);
4828 1
    }
4829
4830 1
    return self::string($array);
4831
  }
4832 1
4833
  /**
4834 1
   * Split a string into an array.
4835
   *
4836
   * @param string $str
4837
   * @param int    $len
4838
   *
4839
   * @return array
4840
   */
4841
  public static function str_split($str, $len = 1)
4842
  {
4843
    $str = (string)$str;
4844
4845
    if (!isset($str[0])) {
4846
      return array();
4847 10
    }
4848
4849 10
    $len = (int)$len;
4850
4851 10
    if ($len < 1) {
4852 1
      return str_split($str, $len);
4853 1
    }
4854
4855 10
    /** @noinspection PhpInternalEntityUsedInspection */
4856 2
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4857
    $a = $a[0];
4858
4859
    if ($len === 1) {
4860 2
      return $a;
4861
    }
4862
4863 10
    $arrayOutput = array();
4864
    $p = -1;
4865 10
4866
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4867
    foreach ($a as $l => $a) {
4868
      if ($l % $len) {
4869 10
        $arrayOutput[$p] .= $a;
4870
      } else {
4871 10
        $arrayOutput[++$p] = $a;
4872 10
      }
4873
    }
4874
4875 1
    return $arrayOutput;
4876 1
  }
4877
4878
  /**
4879 1
   * Check if the string starts with the given substring.
4880 1
   *
4881 1
   * @param string $haystack <p>The string to search in.</p>
4882 1
   * @param string $needle   <p>The substring to search for.</p>
4883
   *
4884
   * @return bool
4885
   */
4886 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4887 1
  {
4888 1
    $haystack = (string)$haystack;
4889 1
    $needle = (string)$needle;
4890 1
4891
    if (!isset($haystack[0], $needle[0])) {
4892
      return false;
4893 1
    }
4894 1
4895
    if (self::strpos($haystack, $needle) === 0) {
4896 1
      return true;
4897
    }
4898
4899
    return false;
4900
  }
4901
4902
  /**
4903
   * Get a binary representation of a specific string.
4904
   *
4905
   * @param string $str <p>The input string.</p>
4906
   *
4907
   * @return string
4908
   */
4909
  public static function str_to_binary($str)
4910 7
  {
4911
    $str = (string)$str;
4912 7
4913
    $value = unpack('H*', $str);
4914
4915
    return base_convert($value[1], 16, 2);
4916
  }
4917
4918
  /**
4919
   * Convert a string into an array of words.
4920
   *
4921
   * @param string   $str
4922
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4923
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4924
   * @param null|int $removeShortValues
4925
   *
4926
   * @return array
4927
   */
4928 1
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4929
  {
4930 1
    $str = (string)$str;
4931
4932 1
    if ($removeShortValues !== null) {
4933
      $removeShortValues = (int)$removeShortValues;
4934 1
    }
4935
4936 1
    if (!isset($str[0])) {
4937 1
      if ($removeEmptyValues === true) {
4938 1
        return array();
4939 1
      }
4940
4941 1
      return array('');
4942
    }
4943 1
4944 1
    $charList = self::rxClass($charList, '\pL');
4945 1
4946 1
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4947 1
4948 1
    if (
4949
        $removeShortValues === null
4950 1
        &&
4951
        $removeEmptyValues === false
4952 1
    ) {
4953
      return $return;
4954
    }
4955
4956 1
    $tmpReturn = array();
4957
    foreach ($return as $returnValue) {
4958
      if (
4959
          $removeShortValues !== null
4960
          &&
4961
          self::strlen($returnValue) <= $removeShortValues
4962
      ) {
4963
        continue;
4964
      }
4965
4966
      if (
4967
          $removeEmptyValues === true
4968
          &&
4969
          trim($returnValue) === ''
4970
      ) {
4971
        continue;
4972
      }
4973 11
4974
      $tmpReturn[] = $returnValue;
4975 11
    }
4976
4977
    return $tmpReturn;
4978
  }
4979
4980
  /**
4981
   * alias for "UTF8::to_ascii()"
4982
   *
4983
   * @see UTF8::to_ascii()
4984
   *
4985
   * @param string $str
4986
   * @param string $unknown
4987
   * @param bool   $strict
4988
   *
4989
   * @return string
4990
   */
4991 1
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4992
  {
4993 1
    return self::to_ascii($str, $unknown, $strict);
4994
  }
4995
4996
  /**
4997
   * Counts number of words in the UTF-8 string.
4998
   *
4999
   * @param string $str      <p>The input string.</p>
5000
   * @param int    $format   [optional] <p>
5001
   *                         <strong>0</strong> => return a number of words (default)<br>
5002
   *                         <strong>1</strong> => return an array of words<br>
5003
   *                         <strong>2</strong> => return an array of words with word-offset as key
5004
   *                         </p>
5005
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5006
   *
5007
   * @return array|int <p>The number of words in the string</p>
5008 14
   */
5009
  public static function str_word_count($str, $format = 0, $charlist = '')
5010
  {
5011 14
    $strParts = self::str_to_words($str, $charlist);
5012 13
5013 13
    $len = count($strParts);
5014 14
5015
    if ($format === 1) {
5016
5017
      $numberOfWords = array();
5018
      for ($i = 1; $i < $len; $i += 2) {
5019
        $numberOfWords[] = $strParts[$i];
5020
      }
5021
5022
    } elseif ($format === 2) {
5023
5024
      $numberOfWords = array();
5025
      $offset = self::strlen($strParts[0]);
5026
      for ($i = 1; $i < $len; $i += 2) {
5027 15
        $numberOfWords[$offset] = $strParts[$i];
5028
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5029 15
      }
5030 1
5031
    } else {
5032
5033 14
      $numberOfWords = ($len - 1) / 2;
5034 2
5035 2
    }
5036
5037
    return $numberOfWords;
5038 2
  }
5039 2
5040
  /**
5041 14
   * Case-insensitive string comparison.
5042 14
   *
5043 1
   * INFO: Case-insensitive version of UTF8::strcmp()
5044
   *
5045
   * @param string $str1
5046 13
   * @param string $str2
5047
   *
5048 13
   * @return int <p>
5049
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5050
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5051 1
   *             <strong>0</strong> if they are equal.
5052
   *             </p>
5053
   */
5054
  public static function strcasecmp($str1, $str2)
5055
  {
5056
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5057
  }
5058
5059
  /**
5060
   * alias for "UTF8::strstr()"
5061
   *
5062
   * @see UTF8::strstr()
5063
   *
5064
   * @param string  $haystack
5065
   * @param string  $needle
5066
   * @param bool    $before_needle
5067 1
   * @param string  $encoding
5068
   * @param boolean $cleanUtf8
5069 1
   *
5070
   * @return string|false
5071
   */
5072
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5073
  {
5074
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5075
  }
5076
5077
  /**
5078
   * Case-sensitive string comparison.
5079
   *
5080
   * @param string $str1
5081 2
   * @param string $str2
5082
   *
5083 2
   * @return int  <p>
5084 2
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5085 2
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5086
   *              <strong>0</strong> if they are equal.
5087 2
   *              </p>
5088 2
   */
5089 2
  public static function strcmp($str1, $str2)
5090
  {
5091 2
    /** @noinspection PhpUndefinedClassInspection */
5092 2
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5093
        \Normalizer::normalize($str1, \Normalizer::NFD),
5094
        \Normalizer::normalize($str2, \Normalizer::NFD)
5095
    );
5096
  }
5097
5098
  /**
5099
   * Find length of initial segment not matching mask.
5100
   *
5101
   * @param string $str
5102 3
   * @param string $charList
5103
   * @param int    $offset
5104 3
   * @param int    $length
5105 3
   *
5106 3
   * @return int|null
5107
   */
5108 3
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5109
  {
5110 3
    if ('' === $charList .= '') {
5111
      return null;
5112
    }
5113
5114 View Code Duplication
    if ($offset || $length !== null) {
5115
      $strTmp = self::substr($str, $offset, $length);
5116
      if ($strTmp === false) {
5117
        return null;
5118
      }
5119
      $str = (string)$strTmp;
5120
    }
5121
5122
    $str = (string)$str;
5123
    if (!isset($str[0])) {
5124
      return null;
5125
    }
5126
5127
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5128
      /** @noinspection OffsetOperationsInspection */
5129
      return self::strlen($length[1]);
5130
    }
5131
5132
    return self::strlen($str);
5133 2
  }
5134
5135 2
  /**
5136
   * alias for "UTF8::stristr()"
5137 2
   *
5138 1
   * @see UTF8::stristr()
5139
   *
5140
   * @param string  $haystack
5141 2
   * @param string  $needle
5142 1
   * @param bool    $before_needle
5143 1
   * @param string  $encoding
5144
   * @param boolean $cleanUtf8
5145 2
   *
5146
   * @return string|false
5147
   */
5148
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5149
  {
5150
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5151
  }
5152
5153
  /**
5154
   * Create a UTF-8 string from code points.
5155
   *
5156
   * INFO: opposite to UTF8::codepoints()
5157
   *
5158
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5159
   *
5160
   * @return string <p>UTF-8 encoded string.</p>
5161
   */
5162
  public static function string(array $array)
5163
  {
5164 10
    return implode(
5165
        '',
5166 10
        array_map(
5167 10
            array(
5168 10
                '\\voku\\helper\\UTF8',
5169
                'chr',
5170 10
            ),
5171 3
            $array
5172
        )
5173
    );
5174 9
  }
5175
5176
  /**
5177 1
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5178 1
   *
5179 1
   * @param string $str <p>The input string.</p>
5180
   *
5181
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5182
   */
5183 9
  public static function string_has_bom($str)
5184 2
  {
5185 9
    foreach (self::$BOM as $bomString => $bomByteLength) {
5186 9
      if (0 === strpos($str, $bomString)) {
5187 9
        return true;
5188 1
      }
5189
    }
5190
5191 9
    return false;
5192
  }
5193
5194
  /**
5195
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5196
   *
5197 9
   * @link http://php.net/manual/en/function.strip-tags.php
5198 9
   *
5199 9
   * @param string  $str            <p>
5200 9
   *                                The input string.
5201 9
   *                                </p>
5202
   * @param string  $allowable_tags [optional] <p>
5203
   *                                You can use the optional second parameter to specify tags which should
5204
   *                                not be stripped.
5205
   *                                </p>
5206 9
   *                                <p>
5207
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5208
   *                                can not be changed with allowable_tags.
5209
   *                                </p>
5210
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5211
   *
5212
   * @return string <p>The stripped string.</p>
5213
   */
5214 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5215
  {
5216
    $str = (string)$str;
5217
5218
    if (!isset($str[0])) {
5219
      return '';
5220
    }
5221
5222
    if ($cleanUtf8 === true) {
5223 17
      $str = self::clean($str);
5224
    }
5225 17
5226 17
    return strip_tags($str, $allowable_tags);
5227 17
  }
5228
5229 17
  /**
5230 6
   * Finds position of first occurrence of a string within another, case insensitive.
5231
   *
5232
   * @link http://php.net/manual/en/function.mb-stripos.php
5233 11
   *
5234 1
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5235 1
   * @param string  $needle    <p>The string to find in haystack.</p>
5236
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5237 11
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5238
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5239
   *
5240 1
   * @return int|false <p>
5241 1
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5242 1
   *                   or false if needle is not found.
5243
   *                   </p>
5244 11
   */
5245
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5246
  {
5247
    $haystack = (string)$haystack;
5248 11
    $needle = (string)$needle;
5249
    $offset = (int)$offset;
5250
5251
    if (!isset($haystack[0], $needle[0])) {
5252
      return false;
5253
    }
5254 11
5255 1
    if ($cleanUtf8 === true) {
5256 11
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5257
      // if invalid characters are found in $haystack before $needle
5258
      $haystack = self::clean($haystack);
5259
      $needle = self::clean($needle);
5260 11
    }
5261 11
5262 View Code Duplication
    if (
5263
        $encoding === 'UTF-8'
5264
        ||
5265
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5266
    ) {
5267
      $encoding = 'UTF-8';
5268
    } else {
5269
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5270
    }
5271
5272
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5273
      self::checkForSupport();
5274
    }
5275
5276
    if (
5277
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5278
        &&
5279
        self::$SUPPORT['intl'] === true
5280
        &&
5281
        Bootup::is_php('5.4') === true
5282
    ) {
5283
      return \grapheme_stripos($haystack, $needle, $offset);
5284
    }
5285
5286
    // fallback to "mb_"-function via polyfill
5287
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5288
  }
5289
5290
  /**
5291
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5292
   *
5293
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5294
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5295
   * @param bool    $before_needle [optional] <p>
5296
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5297
   *                               haystack before the first occurrence of the needle (excluding the needle).
5298
   *                               </p>
5299
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5300
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5301
   *
5302
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5303 88
   */
5304
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5305 88
  {
5306
    $haystack = (string)$haystack;
5307 88
    $needle = (string)$needle;
5308 5
    $before_needle = (bool)$before_needle;
5309
5310
    if (!isset($haystack[0], $needle[0])) {
5311
      return false;
5312
    }
5313 87
5314 13
    if ($encoding !== 'UTF-8') {
5315 87
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5316 78
    }
5317 78
5318 12
    if ($cleanUtf8 === true) {
5319
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5320
      // if invalid characters are found in $haystack before $needle
5321 87
      $needle = self::clean($needle);
5322
      $haystack = self::clean($haystack);
5323
    }
5324
5325
    if (!$needle) {
5326 87
      return $haystack;
5327 87
    }
5328
5329
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5330 10
      self::checkForSupport();
5331 10
    }
5332 10
5333 10 View Code Duplication
    if (
5334
        $encoding !== 'UTF-8'
5335
        &&
5336
        self::$SUPPORT['mbstring'] === false
5337
    ) {
5338
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5339 79
    }
5340
5341
    if (self::$SUPPORT['mbstring'] === true) {
5342 2
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5343 2
    }
5344
5345
    if (
5346
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5347 79
        &&
5348 2
        self::$SUPPORT['intl'] === true
5349 79
        &&
5350
        Bootup::is_php('5.4') === true
5351 79
    ) {
5352
      return \grapheme_stristr($haystack, $needle, $before_needle);
5353
    }
5354
5355
    // TODO: testing
5356
    //if (self::is_ascii($haystack) && self::is_ascii($needle)) {
5357 79
    //  return stristr($haystack, $needle, $before_needle);
5358 2
    //}
5359 79
5360 2
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5361 79
5362
    if (!isset($match[1])) {
5363
      return false;
5364
    }
5365 79
5366 79
    if ($before_needle) {
5367
      return $match[1];
5368
    }
5369
5370
    return self::substr($haystack, self::strlen($match[1]));
5371
  }
5372
5373
  /**
5374
   * Get the string length, not the byte-length!
5375
   *
5376
   * @link     http://php.net/manual/en/function.mb-strlen.php
5377
   *
5378
   * @param string  $str       <p>The string being checked for length.</p>
5379
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5380
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5381
   *
5382
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5383
   *             character counted as +1)</p>
5384
   */
5385
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5386
  {
5387
    $str = (string)$str;
5388
5389
    if (!isset($str[0])) {
5390
      return 0;
5391
    }
5392
5393 View Code Duplication
    if (
5394
        $encoding === 'UTF-8'
5395
        ||
5396
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5397
    ) {
5398
      $encoding = 'UTF-8';
5399
    } else {
5400
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5401
    }
5402
5403
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5404
      self::checkForSupport();
5405
    }
5406
5407
    switch ($encoding) {
5408
      case 'ASCII':
5409
      case 'CP850':
5410 1
      case '8BIT':
5411
        if (
5412 1
            $encoding === 'CP850'
5413
            &&
5414
            self::$SUPPORT['mbstring_func_overload'] === false
5415
        ) {
5416
          return strlen($str);
5417
        }
5418
5419
        return \mb_strlen($str, '8BIT');
5420
    }
5421
5422
    if ($cleanUtf8 === true) {
5423
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5424
      // if invalid characters are found in $str
5425
      $str = self::clean($str);
5426
    }
5427
5428 View Code Duplication
    if (
5429 2
        $encoding !== 'UTF-8'
5430
        &&
5431 2
        self::$SUPPORT['mbstring'] === false
5432
        &&
5433
        self::$SUPPORT['iconv'] === false
5434
    ) {
5435
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5436
    }
5437
5438
    if (
5439
        $encoding !== 'UTF-8'
5440
        &&
5441
        self::$SUPPORT['iconv'] === true
5442
        &&
5443
        self::$SUPPORT['mbstring'] === false
5444
    ) {
5445
      return \iconv_strlen($str, $encoding);
5446
    }
5447 1
5448
    if (self::$SUPPORT['mbstring'] === true) {
5449 1
      return \mb_strlen($str, $encoding);
5450
    }
5451
5452
    if (self::$SUPPORT['iconv'] === true) {
5453
      return \iconv_strlen($str, $encoding);
5454
    }
5455
5456
    if (
5457
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5458
        &&
5459
        self::$SUPPORT['intl'] === true
5460
        &&
5461
        Bootup::is_php('5.4') === true
5462
    ) {
5463
      return \grapheme_strlen($str);
5464
    }
5465 2
5466
    if (self::is_ascii($str)) {
5467 2
      return strlen($str);
5468 2
    }
5469
5470 2
    // fallback via vanilla php
5471
    preg_match_all('/./us', $str, $parts);
5472
    $returnTmp = count($parts[0]);
5473
    if ($returnTmp !== 0) {
5474
      return $returnTmp;
5475
    }
5476
5477
    // fallback to "mb_"-function via polyfill
5478
    return \mb_strlen($str, $encoding);
5479
  }
5480
5481
  /**
5482
   * Case insensitive string comparisons using a "natural order" algorithm.
5483 1
   *
5484
   * INFO: natural order version of UTF8::strcasecmp()
5485 1
   *
5486 1
   * @param string $str1 <p>The first string.</p>
5487
   * @param string $str2 <p>The second string.</p>
5488 1
   *
5489 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5490
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5491
   *             <strong>0</strong> if they are equal
5492 1
   */
5493 1
  public static function strnatcasecmp($str1, $str2)
5494
  {
5495
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5496 1
  }
5497
5498
  /**
5499
   * String comparisons using a "natural order" algorithm
5500
   *
5501
   * INFO: natural order version of UTF8::strcmp()
5502
   *
5503
   * @link  http://php.net/manual/en/function.strnatcmp.php
5504
   *
5505
   * @param string $str1 <p>The first string.</p>
5506
   * @param string $str2 <p>The second string.</p>
5507
   *
5508
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5509
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5510
   *             <strong>0</strong> if they are equal
5511
   */
5512
  public static function strnatcmp($str1, $str2)
5513
  {
5514
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5515 58
  }
5516
5517 58
  /**
5518 58
   * Case-insensitive string comparison of the first n characters.
5519
   *
5520 58
   * @link  http://php.net/manual/en/function.strncasecmp.php
5521 3
   *
5522
   * @param string $str1 <p>The first string.</p>
5523
   * @param string $str2 <p>The second string.</p>
5524
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5525 57
   *
5526
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5527
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5528
   *             <strong>0</strong> if they are equal
5529 57
   */
5530
  public static function strncasecmp($str1, $str2, $len)
5531
  {
5532
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5533 57
  }
5534
5535
  /**
5536 2
   * String comparison of the first n characters.
5537 2
   *
5538 2
   * @link  http://php.net/manual/en/function.strncmp.php
5539
   *
5540
   * @param string $str1 <p>The first string.</p>
5541
   * @param string $str2 <p>The second string.</p>
5542 57
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5543 42
   *
5544 57
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5545 17
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5546 17
   *             <strong>0</strong> if they are equal
5547 41
   */
5548
  public static function strncmp($str1, $str2, $len)
5549
  {
5550 57
    $str1 = (string)self::substr($str1, 0, $len);
5551
    $str2 = (string)self::substr($str2, 0, $len);
5552
5553
    return self::strcmp($str1, $str2);
5554
  }
5555
5556 57
  /**
5557 41
   * Search a string for any of a set of characters.
5558 57
   *
5559 41
   * @link  http://php.net/manual/en/function.strpbrk.php
5560
   *
5561
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5562
   * @param string $char_list <p>This parameter is case sensitive.</p>
5563
   *
5564 17
   * @return string String starting from the character found, or false if it is not found.
5565 17
   */
5566 17
  public static function strpbrk($haystack, $char_list)
5567 1
  {
5568 17
    $haystack = (string)$haystack;
5569
    $char_list = (string)$char_list;
5570
5571
    if (!isset($haystack[0], $char_list[0])) {
5572
      return false;
5573
    }
5574 17
5575
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5576 17
      return substr($haystack, strpos($haystack, $m[0]));
5577 1
    }
5578 17
5579
    return false;
5580 17
  }
5581
5582
  /**
5583
   * Find position of first occurrence of string in a string.
5584
   *
5585
   * @link http://php.net/manual/en/function.mb-strpos.php
5586 17
   *
5587 17
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5588
   * @param string  $needle    <p>The string to find in haystack.</p>
5589
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5590
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5591
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5592
   *
5593
   * @return int|false <p>
5594
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5595
   *                   If needle is not found it returns false.
5596
   *                   </p>
5597
   */
5598
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5599
  {
5600
    $haystack = (string)$haystack;
5601
    $needle = (string)$needle;
5602
5603
    if (!isset($haystack[0], $needle[0])) {
5604
      return false;
5605
    }
5606
5607
    // init
5608
    $offset = (int)$offset;
5609
5610
    // iconv and mbstring do not support integer $needle
5611
5612
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5613
      $needle = (string)self::chr($needle);
5614
    }
5615
5616
    if ($cleanUtf8 === true) {
5617
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5618
      // if invalid characters are found in $haystack before $needle
5619
      $needle = self::clean($needle);
5620
      $haystack = self::clean($haystack);
5621
    }
5622
5623 View Code Duplication
    if (
5624
        $encoding === 'UTF-8'
5625
        ||
5626
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5627
    ) {
5628
      $encoding = 'UTF-8';
5629
    } else {
5630
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5631
    }
5632
5633
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5634
      self::checkForSupport();
5635
    }
5636
5637
    if (
5638
        $encoding === 'CP850'
5639
        &&
5640
        self::$SUPPORT['mbstring_func_overload'] === false
5641
    ) {
5642
      return strpos($haystack, $needle, $offset);
5643
    }
5644
5645 View Code Duplication
    if (
5646
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5647
        &
5648
        self::$SUPPORT['iconv'] === true
5649
        &&
5650
        self::$SUPPORT['mbstring'] === false
5651
    ) {
5652
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5653
    }
5654
5655
    if (
5656
        $offset >= 0 // iconv_strpos() can't handle negative offset
5657
        &&
5658
        $encoding !== 'UTF-8'
5659
        &&
5660
        self::$SUPPORT['mbstring'] === false
5661
        &&
5662
        self::$SUPPORT['iconv'] === true
5663
    ) {
5664
      // ignore invalid negative offset to keep compatibility
5665
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5666
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5667
    }
5668 1
5669
    if (self::$SUPPORT['mbstring'] === true) {
5670 1
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5671 1
    }
5672 1
5673
    if (
5674 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5675
        &&
5676
        self::$SUPPORT['intl'] === true
5677
        &&
5678
        Bootup::is_php('5.4') === true
5679
    ) {
5680
      return \grapheme_strpos($haystack, $needle, $offset);
5681
    }
5682 1
5683
    if (
5684
        $offset >= 0 // iconv_strpos() can't handle negative offset
5685
        &&
5686
        self::$SUPPORT['iconv'] === true
5687
    ) {
5688
      // ignore invalid negative offset to keep compatibility
5689
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5690
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5691
    }
5692 4
5693
    $haystackIsAscii = self::is_ascii($haystack);
5694 4
    if ($haystackIsAscii && self::is_ascii($needle)) {
5695
      return strpos($haystack, $needle, $offset);
5696 4
    }
5697 2
5698
    // fallback via vanilla php
5699
5700 3
    if ($haystackIsAscii) {
5701
      $haystackTmp = substr($haystack, $offset);
5702
    } else {
5703
      $haystackTmp = self::substr($haystack, $offset);
5704
    }
5705
    if ($haystackTmp === false) {
5706
      $haystackTmp = '';
5707
    }
5708
    $haystack = (string)$haystackTmp;
5709
5710
    if ($offset < 0) {
5711
      $offset = 0;
5712
    }
5713
5714
    $pos = strpos($haystack, $needle);
5715
    if ($pos === false) {
5716
      return false;
5717
    }
5718
5719
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5720
    if ($returnTmp !== false) {
5721
      return $returnTmp;
5722
    }
5723
5724
    // fallback to "mb_"-function via polyfill
5725
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5726 1
  }
5727
5728 1
  /**
5729 1
   * Finds the last occurrence of a character in a string within another.
5730 1
   *
5731
   * @link http://php.net/manual/en/function.mb-strrchr.php
5732 1
   *
5733
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5734
   * @param string $needle        <p>The string to find in haystack</p>
5735
   * @param bool   $before_needle [optional] <p>
5736
   *                              Determines which portion of haystack
5737
   *                              this function returns.
5738
   *                              If set to true, it returns all of haystack
5739 1
   *                              from the beginning to the last occurrence of needle.
5740
   *                              If set to false, it returns all of haystack
5741
   *                              from the last occurrence of needle to the end,
5742
   *                              </p>
5743
   * @param string $encoding      [optional] <p>
5744
   *                              Character encoding name to use.
5745
   *                              If it is omitted, internal character encoding is used.
5746
   *                              </p>
5747
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5748
   *
5749
   * @return string|false The portion of haystack or false if needle is not found.
5750
   */
5751 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5752
  {
5753
    if ($encoding !== 'UTF-8') {
5754
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5755
    }
5756 1
5757
    if ($cleanUtf8 === true) {
5758 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5759
      // if invalid characters are found in $haystack before $needle
5760
      $needle = self::clean($needle);
5761
      $haystack = self::clean($haystack);
5762
    }
5763 1
5764 1
    // fallback to "mb_"-function via polyfill
5765 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5766
  }
5767 1
5768
  /**
5769
   * Reverses characters order in the string.
5770
   *
5771
   * @param string $str The input string
5772
   *
5773 1
   * @return string The string with characters in the reverse sequence
5774
   */
5775 1
  public static function strrev($str)
5776
  {
5777
    $str = (string)$str;
5778
5779
    if (!isset($str[0])) {
5780
      return '';
5781
    }
5782
5783
    return implode('', array_reverse(self::split($str)));
5784 1
  }
5785
5786 1
  /**
5787 1
   * Finds the last occurrence of a character in a string within another, case insensitive.
5788 1
   *
5789
   * @link http://php.net/manual/en/function.mb-strrichr.php
5790
   *
5791
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5792 1
   * @param string  $needle        <p>The string to find in haystack.</p>
5793
   * @param bool    $before_needle [optional] <p>
5794
   *                               Determines which portion of haystack
5795
   *                               this function returns.
5796
   *                               If set to true, it returns all of haystack
5797
   *                               from the beginning to the last occurrence of needle.
5798 1
   *                               If set to false, it returns all of haystack
5799
   *                               from the last occurrence of needle to the end,
5800 1
   *                               </p>
5801
   * @param string  $encoding      [optional] <p>
5802
   *                               Character encoding name to use.
5803
   *                               If it is omitted, internal character encoding is used.
5804 1
   *                               </p>
5805 1
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5806
   *
5807
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5808
   */
5809 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5810
  {
5811
    if ($encoding !== 'UTF-8') {
5812
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5813
    }
5814
5815
    if ($cleanUtf8 === true) {
5816
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5817
      // if invalid characters are found in $haystack before $needle
5818
      $needle = self::clean($needle);
5819
      $haystack = self::clean($haystack);
5820
    }
5821
5822
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5823
  }
5824
5825
  /**
5826
   * Find position of last occurrence of a case-insensitive string.
5827
   *
5828
   * @param string  $haystack  <p>The string to look in.</p>
5829
   * @param string  $needle    <p>The string to look for.</p>
5830
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5831
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5832
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5833
   *
5834
   * @return int|false <p>
5835
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5836
   *                   not found, it returns false.
5837
   *                   </p>
5838
   */
5839
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5840 10
  {
5841
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5842 10
      $needle = (string)self::chr($needle);
5843 2
    }
5844 2
5845
    // init
5846
    $haystack = (string)$haystack;
5847 10
    $needle = (string)$needle;
5848 10
    $offset = (int)$offset;
5849 10
5850
    if (!isset($haystack[0], $needle[0])) {
5851 10
      return false;
5852 2
    }
5853
5854 View Code Duplication
    if (
5855
        $cleanUtf8 === true
5856
        ||
5857 9
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5858
    ) {
5859 9
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5860
5861 3
      $needle = self::clean($needle);
5862 3
      $haystack = self::clean($haystack);
5863 3
    }
5864
5865 View Code Duplication
    if (
5866
        $encoding === 'UTF-8'
5867 9
        ||
5868 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5869 9
    ) {
5870 9
      $encoding = 'UTF-8';
5871 9
    } else {
5872 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5873
    }
5874
5875 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5876
      self::checkForSupport();
5877
    }
5878
5879 View Code Duplication
    if (
5880
        $encoding !== 'UTF-8'
5881 9
        &&
5882 1
        self::$SUPPORT['mbstring'] === false
5883 9
    ) {
5884
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5885
    }
5886
5887 9
    if (self::$SUPPORT['mbstring'] === true) {
5888 9
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5889
    }
5890
5891
    if (
5892
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5893
        &&
5894
        self::$SUPPORT['intl'] === true
5895
        &&
5896
        Bootup::is_php('5.4') === true
5897
    ) {
5898
      return \grapheme_strripos($haystack, $needle, $offset);
5899
    }
5900
5901
    // fallback via vanilla php
5902
5903
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5904
  }
5905
5906
  /**
5907
   * Find position of last occurrence of a string in a string.
5908
   *
5909
   * @link http://php.net/manual/en/function.mb-strrpos.php
5910
   *
5911
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5912
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5913
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5914
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5915
   *                              the end of the string.
5916
   *                              </p>
5917
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5918
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5919
   *
5920
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5921
   *                   is not found, it returns false.</p>
5922
   */
5923
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5924
  {
5925
    if ((int)$needle === $needle && $needle >= 0) {
5926
      $needle = (string)self::chr($needle);
5927
    }
5928
5929
    // init
5930
    $haystack = (string)$haystack;
5931
    $needle = (string)$needle;
5932
    $offset = (int)$offset;
5933
5934
    if (!isset($haystack[0], $needle[0])) {
5935
      return false;
5936
    }
5937 10
5938 View Code Duplication
    if (
5939 10
        $cleanUtf8 === true
5940 2
        ||
5941 2
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5942
    ) {
5943
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5944 2
      $needle = self::clean($needle);
5945 2
      $haystack = self::clean($haystack);
5946
    }
5947 10
5948 10 View Code Duplication
    if (
5949 2
        $encoding === 'UTF-8'
5950
        ||
5951
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5952 8
    ) {
5953
      $encoding = 'UTF-8';
5954
    } else {
5955
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5956
    }
5957
5958
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5959
      self::checkForSupport();
5960
    }
5961
5962 View Code Duplication
    if (
5963
        $encoding !== 'UTF-8'
5964
        &&
5965
        self::$SUPPORT['mbstring'] === false
5966
    ) {
5967
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5968
    }
5969 2
5970
    if (self::$SUPPORT['mbstring'] === true) {
5971 2
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5972 2
    }
5973
5974 2
    if (
5975 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5976
        &&
5977
        self::$SUPPORT['intl'] === true
5978 2
        &&
5979
        Bootup::is_php('5.4') === true
5980
    ) {
5981
      return \grapheme_strrpos($haystack, $needle, $offset);
5982
    }
5983
5984
    // fallback via vanilla php
5985 2
5986 1
    $haystackTmp = null;
5987 1
    if ($offset > 0) {
5988
      $haystackTmp = self::substr($haystack, $offset);
5989 2
    } elseif ($offset < 0) {
5990
      $haystackTmp = self::substr($haystack, 0, $offset);
5991
      $offset = 0;
5992
    }
5993
5994
    if ($haystackTmp !== null) {
5995 2
      if ($haystackTmp === false) {
5996 1
        $haystackTmp = '';
5997 2
      }
5998
      $haystack = (string)$haystackTmp;
5999
    }
6000
6001 2
    $pos = strrpos($haystack, $needle);
6002 2
    if ($pos === false) {
6003
      return false;
6004
    }
6005
6006
    return $offset + self::strlen(substr($haystack, 0, $pos));
6007
  }
6008
6009
  /**
6010
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6011
   * mask.
6012
   *
6013
   * @param string $str    <p>The input string.</p>
6014
   * @param string $mask   <p>The mask of chars</p>
6015
   * @param int    $offset [optional]
6016
   * @param int    $length [optional]
6017
   *
6018
   * @return int
6019
   */
6020
  public static function strspn($str, $mask, $offset = 0, $length = null)
6021
  {
6022 View Code Duplication
    if ($offset || $length !== null) {
6023
      $strTmp = self::substr($str, $offset, $length);
6024
      if ($strTmp === false) {
6025
        $strTmp = '';
6026
      }
6027
      $str = (string)$strTmp;
6028
    }
6029
6030
    $str = (string)$str;
6031
    if (!isset($str[0], $mask[0])) {
6032
      return 0;
6033
    }
6034
6035
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6036
  }
6037
6038
  /**
6039
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6040
   *
6041
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6042 13
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6043
   * @param bool    $before_needle [optional] <p>
6044
   *                               If <b>TRUE</b>, strstr() returns the part of the
6045 13
   *                               haystack before the first occurrence of the needle (excluding the needle).
6046
   *                               </p>
6047 13
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6048 4
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6049
   *
6050
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6051 12
   */
6052 12
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6053
  {
6054 12
    $haystack = (string)$haystack;
6055 1
    $needle = (string)$needle;
6056 1
6057 1
    if (!isset($haystack[0], $needle[0])) {
6058
      return false;
6059 12
    }
6060
6061 12
    if ($cleanUtf8 === true) {
6062
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6063 12
      // if invalid characters are found in $haystack before $needle
6064
      $needle = self::clean($needle);
6065 12
      $haystack = self::clean($haystack);
6066 1
    }
6067 1
6068
    if ($encoding !== 'UTF-8') {
6069
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6070 12
    }
6071 12
6072
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6073 12
      self::checkForSupport();
6074 1
    }
6075 1
6076 View Code Duplication
    if (
6077 12
        $encoding !== 'UTF-8'
6078
        &&
6079
        self::$SUPPORT['mbstring'] === false
6080
    ) {
6081
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6082
    }
6083
6084
    if (self::$SUPPORT['mbstring'] === true) {
6085
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6086
    }
6087
6088
    if (
6089
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6090
        &&
6091
        self::$SUPPORT['intl'] === true
6092 25
        &&
6093
        Bootup::is_php('5.4') === true
6094
    ) {
6095 25
      return \grapheme_strstr($haystack, $needle, $before_needle);
6096
    }
6097 25
6098 3
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6099
6100
    if (!isset($match[1])) {
6101 23
      return false;
6102
    }
6103
6104 1
    if ($before_needle) {
6105 1
      return $match[1];
6106
    }
6107 23
6108 2
    return self::substr($haystack, self::strlen($match[1]));
6109 2
  }
6110
6111 23
  /**
6112
   * Unicode transformation for case-less matching.
6113
   *
6114
   * @link http://unicode.org/reports/tr21/tr21-5.html
6115
   *
6116
   * @param string  $str       <p>The input string.</p>
6117
   * @param bool    $full      [optional] <p>
6118
   *                           <b>true</b>, replace full case folding chars (default)<br>
6119
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6120
   *                           </p>
6121
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6122
   *
6123
   * @return string
6124
   */
6125
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6126
  {
6127
    // init
6128
    $str = (string)$str;
6129
6130
    if (!isset($str[0])) {
6131
      return '';
6132
    }
6133
6134
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6135 23
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6136
6137
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6138
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6139
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6140
    }
6141
6142
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6143
6144
    if ($full) {
6145 3
6146
      static $FULL_CASE_FOLD = null;
6147
6148 3
      if ($FULL_CASE_FOLD === null) {
6149
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6150
      }
6151
6152
      /** @noinspection OffsetOperationsInspection */
6153
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6154
    }
6155
6156
    if ($cleanUtf8 === true) {
6157
      $str = self::clean($str);
6158
    }
6159
6160
    return self::strtolower($str);
6161
  }
6162
6163 19
  /**
6164
   * Make a string lowercase.
6165 19
   *
6166
   * @link http://php.net/manual/en/function.mb-strtolower.php
6167 19
   *
6168 3
   * @param string      $str       <p>The string being lowercased.</p>
6169
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6170
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6171 17
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6172
   *
6173
   * @return string str with all alphabetic characters converted to lowercase.
6174 1
   */
6175 1 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6176
  {
6177 17
    // init
6178 2
    $str = (string)$str;
6179 2
6180
    if (!isset($str[0])) {
6181 17
      return '';
6182
    }
6183
6184
    if ($cleanUtf8 === true) {
6185
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6186
      // if invalid characters are found in $haystack before $needle
6187
      $str = self::clean($str);
6188
    }
6189
6190
    if ($encoding !== 'UTF-8') {
6191
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6192
    }
6193
6194
    if ($lang !== null) {
6195
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6196
        self::checkForSupport();
6197
      }
6198
6199
      if (
6200
          self::$SUPPORT['intl'] === true
6201
          &&
6202
          Bootup::is_php('5.4') === true
6203
      ) {
6204
6205 17
        $langCode = $lang . '-Lower';
6206
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6207
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6208
6209
          $langCode = 'Any-Lower';
6210
        }
6211
6212
        return transliterator_transliterate($langCode, $str);
6213
      }
6214
6215
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6216
    }
6217
6218
    return \mb_strtolower($str, $encoding);
6219
  }
6220
6221
  /**
6222 1
   * Generic case sensitive transformation for collation matching.
6223
   *
6224 1
   * @param string $str <p>The input string</p>
6225
   *
6226 1
   * @return string
6227
   */
6228
  private static function strtonatfold($str)
6229
  {
6230 1
    /** @noinspection PhpUndefinedClassInspection */
6231
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6232
  }
6233
6234 1
  /**
6235 1
   * Make a string uppercase.
6236 1
   *
6237 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6238 1
   *
6239
   * @param string      $str       <p>The string being uppercased.</p>
6240 1
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6241 1
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6242 1
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6243 1
   *
6244 1
   * @return string str with all alphabetic characters converted to uppercase.
6245
   */
6246 1 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6247 1
  {
6248
    $str = (string)$str;
6249 1
6250 1
    if (!isset($str[0])) {
6251
      return '';
6252
    }
6253 1
6254
    if ($cleanUtf8 === true) {
6255
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6256
      // if invalid characters are found in $haystack before $needle
6257
      $str = self::clean($str);
6258
    }
6259
6260
    if ($encoding !== 'UTF-8') {
6261
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6262
    }
6263
6264
    if ($lang !== null) {
6265 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6266
        self::checkForSupport();
6267 1
      }
6268 1
6269 1
      if (
6270
          self::$SUPPORT['intl'] === true
6271 1
          &&
6272
          Bootup::is_php('5.4') === true
6273
      ) {
6274 1
6275 1
        $langCode = $lang . '-Upper';
6276
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6277
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6278 1
6279
          $langCode = 'Any-Upper';
6280
        }
6281
6282
        return transliterator_transliterate($langCode, $str);
6283
      }
6284
6285
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6286
    }
6287
6288
    return \mb_strtoupper($str, $encoding);
6289
  }
6290
6291 1
  /**
6292
   * Translate characters or replace sub-strings.
6293 1
   *
6294
   * @link  http://php.net/manual/en/function.strtr.php
6295
   *
6296
   * @param string          $str  <p>The string being translated.</p>
6297
   * @param string|string[] $from <p>The string replacing from.</p>
6298
   * @param string|string[] $to   <p>The string being translated to to.</p>
6299 1
   *
6300
   * @return string <p>
6301 1
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6302
   *                corresponding character in to.
6303
   *                </p>
6304
   */
6305 1
  public static function strtr($str, $from, $to = INF)
6306 1
  {
6307 1
    $str = (string)$str;
6308 1
6309 1
    if (!isset($str[0])) {
6310 1
      return '';
6311
    }
6312
6313 1
    if ($from === $to) {
6314 1
      return $str;
6315
    }
6316 1
6317
    if (INF !== $to) {
6318
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6318 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6319
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6319 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6320
      $countFrom = count($from);
6321
      $countTo = count($to);
6322
6323
      if ($countFrom > $countTo) {
6324
        $from = array_slice($from, 0, $countTo);
6325
      } elseif ($countFrom < $countTo) {
6326
        $to = array_slice($to, 0, $countFrom);
6327
      }
6328
6329
      $from = array_combine($from, $to);
6330
    }
6331
6332
    if (is_string($from)) {
6333
      return str_replace($from, '', $str);
6334 76
    }
6335
6336
    return strtr($str, $from);
6337 76
  }
6338
6339 76
  /**
6340 10
   * Return the width of a string.
6341
   *
6342
   * @param string  $str       <p>The input string.</p>
6343
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6344 74
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6345 3
   *
6346
   * @return int
6347
   */
6348 73
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6349
  {
6350
    if ($encoding !== 'UTF-8') {
6351 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6352 1
    }
6353
6354
    if ($cleanUtf8 === true) {
6355 73
      // iconv and mbstring are not tolerant to invalid encoding
6356 1
      // further, their behaviour is inconsistent with that of PHP's substr
6357
      $str = self::clean($str);
6358
    }
6359 72
6360 72
    // fallback to "mb_"-function via polyfill
6361 49
    return \mb_strwidth($str, $encoding);
6362 49
  }
6363
6364
  /**
6365 72
   * Changes all keys in an array.
6366 2
   *
6367
   * @param array $array <p>The array to work on</p>
6368
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6369 70
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6370 34
   *
6371 34
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6372 59
   *                     input is not an array.</p>
6373
   */
6374
  public static function array_change_key_case($array, $case = CASE_LOWER)
6375
  {
6376
    if (!is_array($array)) {
6377 70
      return false;
6378 23
    }
6379 70
6380 49
    if (
6381 49
        $case !== CASE_LOWER
6382 22
        &&
6383
        $case !== CASE_UPPER
6384
    ) {
6385 70
      $case = CASE_UPPER;
6386
    }
6387
6388
    $return = array();
6389
    foreach ($array as $key => $value) {
6390
      if ($case  === CASE_LOWER) {
6391 70
        $key = self::strtolower($key);
6392 21
      } else {
6393 70
        $key = self::strtoupper($key);
6394 21
      }
6395
6396
      $return[$key] = $value;
6397
    }
6398
6399 49
    return $return;
6400 1
  }
6401 49
6402
  /**
6403
   * Get part of a string.
6404
   *
6405 49
   * @link http://php.net/manual/en/function.mb-substr.php
6406 49
   *
6407
   * @param string  $str       <p>The string being checked.</p>
6408
   * @param int     $offset    <p>The first position used in str.</p>
6409
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6410
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6411
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6412
   *
6413
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6414
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6415
   *                      characters long, <b>FALSE</b> will be returned.</p>
6416
   */
6417
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6418
  {
6419
    // init
6420
    $str = (string)$str;
6421
6422
    if (!isset($str[0])) {
6423
      return '';
6424
    }
6425
6426
    // Empty string
6427
    if ($length === 0) {
6428
      return '';
6429
    }
6430
6431
    if ($cleanUtf8 === true) {
6432
      // iconv and mbstring are not tolerant to invalid encoding
6433
      // further, their behaviour is inconsistent with that of PHP's substr
6434
      $str = self::clean($str);
6435
    }
6436
6437
    // Whole string
6438
    if (!$offset && $length === null) {
6439
      return $str;
6440
    }
6441
6442
    $str_length = 0;
6443
    if ($offset || $length === null) {
6444
      $str_length = (int)self::strlen($str, $encoding);
6445
    }
6446
6447
    // Impossible
6448
    if ($offset && $offset > $str_length) {
6449
      return false;
6450
    }
6451
6452
    if ($length === null) {
6453
      $length = $str_length;
6454
    } else {
6455
      $length = (int)$length;
6456
    }
6457
6458 View Code Duplication
    if (
6459
        $encoding === 'UTF-8'
6460 1
        ||
6461
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6462
    ) {
6463
      $encoding = 'UTF-8';
6464 1
    } else {
6465
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6466 1
    }
6467 1
6468 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6469
      self::checkForSupport();
6470
    }
6471 1
6472
    if (
6473 1
        $encoding === 'CP850'
6474 1
        &&
6475
        self::$SUPPORT['mbstring_func_overload'] === false
6476
    ) {
6477 1
      return substr($str, $offset, $length === null ? $str_length : $length);
6478 1
    }
6479
6480 1 View Code Duplication
    if (
6481 1
        $encoding !== 'UTF-8'
6482
        &&
6483
        self::$SUPPORT['mbstring'] === false
6484 1
    ) {
6485
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6486
    }
6487
6488
    if (self::$SUPPORT['mbstring'] === true) {
6489
      return \mb_substr($str, $offset, $length, $encoding);
6490
    }
6491
6492
    if (
6493
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6494
        &&
6495
        self::$SUPPORT['intl'] === true
6496
        &&
6497
        Bootup::is_php('5.4') === true
6498
    ) {
6499
      return \grapheme_substr($str, $offset, $length);
6500
    }
6501
6502
    if (
6503
        $length >= 0 // "iconv_substr()" can't handle negative length
6504
        &&
6505 1
        self::$SUPPORT['iconv'] === true
6506
    ) {
6507
      return \iconv_substr($str, $offset, $length);
6508 1
    }
6509 1
6510
    if (self::is_ascii($str)) {
6511 1
      return ($length === null) ?
6512 1
          substr($str, $offset) :
6513
          substr($str, $offset, $length);
6514
    }
6515 1
6516
    // fallback via vanilla php
6517 1
6518 1
    // split to array, and remove invalid characters
6519 1
    $array = self::split($str);
6520
6521 1
    // extract relevant part, and join to make sting again
6522 1
    return implode('', array_slice($array, $offset, $length));
6523
  }
6524
6525
  /**
6526
   * Binary safe comparison of two strings from an offset, up to length characters.
6527 1
   *
6528
   * @param string  $str1               <p>The main string being compared.</p>
6529 1
   * @param string  $str2               <p>The secondary string being compared.</p>
6530 1
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6531 1
   *                                    counting from the end of the string.</p>
6532 1
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6533 1
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6534 1
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6535 1
   *                                    insensitive.</p>
6536
   *
6537
   * @return int <p>
6538 1
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6539 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6540
   *             <strong>0</strong> if they are equal.
6541
   *             </p>
6542 1
   */
6543 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6544
  {
6545 1
    if (
6546 1
        $offset !== 0
6547 1
        ||
6548
        $length !== null
6549 1
    ) {
6550
      $str1Tmp = self::substr($str1, $offset, $length);
6551
      if ($str1Tmp === false) {
6552
        $str1Tmp = '';
6553
      }
6554
      $str1 = (string)$str1Tmp;
6555
6556 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6557
      if ($str2Tmp === false) {
6558
        $str2Tmp = '';
6559
      }
6560
      $str2 = (string)$str2Tmp;
6561
    }
6562 1
6563 1
    if ($case_insensitivity === true) {
6564 1
      return self::strcasecmp($str1, $str2);
6565
    }
6566
6567
    return self::strcmp($str1, $str2);
6568 1
  }
6569 1
6570
  /**
6571
   * Count the number of substring occurrences.
6572
   *
6573
   * @link  http://php.net/manual/en/function.substr-count.php
6574
   *
6575
   * @param string  $haystack  <p>The string to search in.</p>
6576
   * @param string  $needle    <p>The substring to search for.</p>
6577
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6578
   * @param int     $length    [optional] <p>
6579
   *                           The maximum length after the specified offset to search for the
6580
   *                           substring. It outputs a warning if the offset plus the length is
6581
   *                           greater than the haystack length.
6582
   *                           </p>
6583
   * @param string  $encoding  <p>Set the charset.</p>
6584
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6585 1
   *
6586
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6587
   */
6588 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6589 1
  {
6590
    // init
6591 1
    $haystack = (string)$haystack;
6592 1
    $needle = (string)$needle;
6593
6594
    if (!isset($haystack[0], $needle[0])) {
6595 1
      return false;
6596 1
    }
6597
6598
    if ($offset || $length !== null) {
6599 1
6600 1
      if ($length === null) {
6601 1
        $length = (int)self::strlen($haystack);
6602
      }
6603
6604 1
      $offset = (int)$offset;
6605 1
      $length = (int)$length;
6606
6607 1
      if (
6608
          (
6609
              $length !== 0
6610
              &&
6611
              $offset !== 0
6612
          )
6613
          &&
6614
          $length + $offset <= 0
6615
          &&
6616
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6617
      ) {
6618 1
        return false;
6619
      }
6620
6621 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6622 1
      if ($haystackTmp === false) {
6623
        $haystackTmp = '';
6624 1
      }
6625 1
      $haystack = (string)$haystackTmp;
6626
    }
6627
6628 1
    if ($encoding !== 'UTF-8') {
6629 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6630
    }
6631
6632 1
    if ($cleanUtf8 === true) {
6633 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6634 1
      // if invalid characters are found in $haystack before $needle
6635
      $needle = self::clean($needle);
6636
      $haystack = self::clean($haystack);
6637 1
    }
6638 1
6639
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6640 1
      self::checkForSupport();
6641
    }
6642
6643 View Code Duplication
    if (
6644
        $encoding !== 'UTF-8'
6645
        &&
6646
        self::$SUPPORT['mbstring'] === false
6647
    ) {
6648
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6649
    }
6650
6651 1
    if (self::$SUPPORT['mbstring'] === true) {
6652
      return \mb_substr_count($haystack, $needle, $encoding);
6653
    }
6654 1
6655 1
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6656
6657 1
    return count($matches);
6658 1
  }
6659
6660
  /**
6661 1
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6662 1
   *
6663
   * @param string $haystack <p>The string to search in.</p>
6664
   * @param string $needle   <p>The substring to search for.</p>
6665 1
   *
6666 1
   * @return string <p>Return the sub-string.</p>
6667 1
   */
6668 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6669
  {
6670 1
    // init
6671 1
    $haystack = (string)$haystack;
6672
    $needle = (string)$needle;
6673 1
6674
    if (!isset($haystack[0])) {
6675
      return '';
6676
    }
6677
6678
    if (!isset($needle[0])) {
6679
      return $haystack;
6680
    }
6681
6682
    if (self::str_istarts_with($haystack, $needle) === true) {
6683
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6684
      if ($haystackTmp === false) {
6685
        $haystackTmp = '';
6686
      }
6687
      $haystack = (string)$haystackTmp;
6688
    }
6689
6690
    return $haystack;
6691
  }
6692
6693
  /**
6694
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6695
   *
6696
   * @param string $haystack <p>The string to search in.</p>
6697
   * @param string $needle   <p>The substring to search for.</p>
6698
   *
6699
   * @return string <p>Return the sub-string.</p>
6700 7
   */
6701 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6702 7
  {
6703 1
    // init
6704
    $haystack = (string)$haystack;
6705
    $needle = (string)$needle;
6706 1
6707 1
    if (!isset($haystack[0])) {
6708 1
      return '';
6709 1
    }
6710
6711
    if (!isset($needle[0])) {
6712
      return $haystack;
6713 1
    }
6714 1
6715 1
    if (self::str_iends_with($haystack, $needle) === true) {
6716 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6717 1
      if ($haystackTmp === false) {
6718 1
        $haystackTmp = '';
6719 1
      }
6720 1
      $haystack = (string)$haystackTmp;
6721
    }
6722
6723
    return $haystack;
6724 1
  }
6725 1
6726 1
  /**
6727 1
   * Removes an prefix ($needle) from start of the string ($haystack).
6728 1
   *
6729 1
   * @param string $haystack <p>The string to search in.</p>
6730 1
   * @param string $needle   <p>The substring to search for.</p>
6731 1
   *
6732
   * @return string <p>Return the sub-string.</p>
6733
   */
6734 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6735 1
  {
6736 1
    // init
6737 1
    $haystack = (string)$haystack;
6738
    $needle = (string)$needle;
6739
6740
    if (!isset($haystack[0])) {
6741 1
      return '';
6742
    }
6743
6744 7
    if (!isset($needle[0])) {
6745 1
      return $haystack;
6746 1
    }
6747 1
6748 1
    if (self::str_starts_with($haystack, $needle) === true) {
6749
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6750 1
      if ($haystackTmp === false) {
6751
        $haystackTmp = '';
6752
      }
6753 7
      $haystack = (string)$haystackTmp;
6754 7
    }
6755
6756 7
    return $haystack;
6757 1
  }
6758
6759
  /**
6760 6
   * Replace text within a portion of a string.
6761 3
   *
6762 3
   * source: https://gist.github.com/stemar/8287074
6763 3
   *
6764
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6765
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6766 5
   * @param int|int[]       $offset           <p>
6767 5
   *                                          If start is positive, the replacing will begin at the start'th offset
6768
   *                                          into string.
6769 5
   *                                          <br><br>
6770 3
   *                                          If start is negative, the replacing will begin at the start'th character
6771 3
   *                                          from the end of string.
6772
   *                                          </p>
6773 5
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6774
   *                                          portion of string which is to be replaced. If it is negative, it
6775 5
   *                                          represents the number of characters from the end of string at which to
6776
   *                                          stop replacing. If it is not given, then it will default to strlen(
6777
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6778
   *                                          length is zero then this function will have the effect of inserting
6779
   *                                          replacement into string at the given start offset.</p>
6780
   *
6781
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6782
   */
6783
  public static function substr_replace($str, $replacement, $offset, $length = null)
6784
  {
6785
    if (is_array($str) === true) {
6786 1
      $num = count($str);
6787
6788 1
      // the replacement
6789 1
      if (is_array($replacement) === true) {
6790
        $replacement = array_slice($replacement, 0, $num);
6791 1
      } else {
6792 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6793
      }
6794
6795 1
      // the offset
6796 1 View Code Duplication
      if (is_array($offset) === true) {
6797
        $offset = array_slice($offset, 0, $num);
6798
        foreach ($offset as &$valueTmp) {
6799 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6800 1
        }
6801 1
        unset($valueTmp);
6802
      } else {
6803
        $offset = array_pad(array($offset), $num, $offset);
6804 1
      }
6805 1
6806
      // the length
6807 1
      if (!isset($length)) {
6808
        $length = array_fill(0, $num, 0);
6809 View Code Duplication
      } elseif (is_array($length) === true) {
6810
        $length = array_slice($length, 0, $num);
6811
        foreach ($length as &$valueTmpV2) {
6812
          if (isset($valueTmpV2)) {
6813
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6814
          } else {
6815
            $valueTmpV2 = 0;
6816
          }
6817
        }
6818
        unset($valueTmpV2);
6819 1
      } else {
6820
        $length = array_pad(array($length), $num, $length);
6821 1
      }
6822
6823 1
      // recursive call
6824 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6825
    }
6826
6827 1
    if (is_array($replacement) === true) {
6828 1
      if (count($replacement) > 0) {
6829 1
        $replacement = $replacement[0];
6830
      } else {
6831 1
        $replacement = '';
6832
      }
6833
    }
6834 1
6835 1
    // init
6836
    $str = (string)$str;
6837 1
    $replacement = (string)$replacement;
6838 1
6839
    if (!isset($str[0])) {
6840 1
      return $replacement;
6841
    }
6842 1
6843 1
    if (self::is_ascii($str)) {
6844
      return ($length === null) ?
6845
          substr_replace($str, $replacement, $offset) :
6846 1
          substr_replace($str, $replacement, $offset, $length);
6847 1
    }
6848
6849 1
    preg_match_all('/./us', $str, $smatches);
6850
    preg_match_all('/./us', $replacement, $rmatches);
6851 1
6852
    if ($length === null) {
6853
      $length = (int)self::strlen($str);
6854
    }
6855
6856
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6857
6858
    return implode('', $smatches[0]);
6859
  }
6860
6861
  /**
6862
   * Removes an suffix ($needle) from end of the string ($haystack).
6863
   *
6864
   * @param string $haystack <p>The string to search in.</p>
6865
   * @param string $needle   <p>The substring to search for.</p>
6866
   *
6867
   * @return string <p>Return the sub-string.</p>
6868
   */
6869 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6870
  {
6871
    $haystack = (string)$haystack;
6872
    $needle = (string)$needle;
6873
6874
    if (!isset($haystack[0])) {
6875
      return '';
6876
    }
6877
6878
    if (!isset($needle[0])) {
6879
      return $haystack;
6880
    }
6881
6882
    if (self::str_ends_with($haystack, $needle) === true) {
6883
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6884
      if ($haystackTmp === false) {
6885
        $haystackTmp = '';
6886
      }
6887
      $haystack = (string)$haystackTmp;
6888
    }
6889
6890
    return $haystack;
6891
  }
6892
6893
  /**
6894
   * Returns a case swapped version of the string.
6895
   *
6896
   * @param string  $str       <p>The input string.</p>
6897
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6898
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6899
   *
6900
   * @return string <p>Each character's case swapped.</p>
6901
   */
6902
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6903
  {
6904
    $str = (string)$str;
6905
6906
    if (!isset($str[0])) {
6907
      return '';
6908
    }
6909
6910
    if ($encoding !== 'UTF-8') {
6911
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6912
    }
6913
6914
    if ($cleanUtf8 === true) {
6915
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6916
      // if invalid characters are found in $haystack before $needle
6917
      $str = self::clean($str);
6918
    }
6919
6920
    $strSwappedCase = preg_replace_callback(
6921
        '/[\S]/u',
6922
        function ($match) use ($encoding) {
6923
          $marchToUpper = self::strtoupper($match[0], $encoding);
6924
6925
          if ($match[0] === $marchToUpper) {
6926
            return self::strtolower($match[0], $encoding);
6927
          }
6928
6929
          return $marchToUpper;
6930 21
        },
6931
        $str
6932 21
    );
6933
6934
    return $strSwappedCase;
6935 21
  }
6936
6937 21
  /**
6938 4
   * alias for "UTF8::to_ascii()"
6939
   *
6940
   * @see UTF8::to_ascii()
6941
   *
6942 18
   * @param string $s
6943 6
   * @param string $subst_chr
6944
   * @param bool   $strict
6945
   *
6946 13
   * @return string
6947
   *
6948
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6949 13
   */
6950 7
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6951
  {
6952
    return self::to_ascii($s, $subst_chr, $strict);
6953 7
  }
6954
6955
  /**
6956
   * alias for "UTF8::to_iso8859()"
6957
   *
6958
   * @see UTF8::to_iso8859()
6959
   *
6960
   * @param string $str
6961
   *
6962
   * @return string|string[]
6963
   *
6964
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6965
   */
6966
  public static function toIso8859($str)
6967
  {
6968
    return self::to_iso8859($str);
6969
  }
6970
6971
  /**
6972
   * alias for "UTF8::to_latin1()"
6973
   *
6974
   * @see UTF8::to_latin1()
6975
   *
6976
   * @param $str
6977
   *
6978
   * @return string
6979
   *
6980
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6981 7
   */
6982 7
  public static function toLatin1($str)
6983 7
  {
6984
    return self::to_latin1($str);
6985 7
  }
6986
6987 7
  /**
6988 7
   * alias for "UTF8::to_utf8()"
6989
   *
6990
   * @see UTF8::to_utf8()
6991 7
   *
6992
   * @param string $str
6993
   *
6994 7
   * @return string
6995 7
   *
6996 7
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6997
   */
6998 7
  public static function toUTF8($str)
6999 2
  {
7000
    return self::to_utf8($str);
7001 2
  }
7002 2
7003 2
  /**
7004
   * Convert a string into ASCII.
7005 2
   *
7006 1
   * @param string $str     <p>The input string.</p>
7007
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7008 1
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7009 1
   *                        performance</p>
7010 1
   *
7011
   * @return string
7012 1
   */
7013
  public static function to_ascii($str, $unknown = '?', $strict = false)
7014
  {
7015
    static $UTF8_TO_ASCII;
7016
7017
    // init
7018
    $str = (string)$str;
7019
7020
    if (!isset($str[0])) {
7021
      return '';
7022
    }
7023
7024
    // check if we only have ASCII, first (better performance)
7025
    if (self::is_ascii($str) === true) {
7026
      return $str;
7027 1
    }
7028 2
7029
    $str = self::clean($str, true, true, true);
7030 7
7031
    // check again, if we only have ASCII, now ...
7032
    if (self::is_ascii($str) === true) {
7033
      return $str;
7034
    }
7035 7
7036
    if ($strict === true) {
7037
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7038
        self::checkForSupport();
7039
      }
7040 7
7041 7
      if (
7042 3
          self::$SUPPORT['intl'] === true
7043 3
          &&
7044 1
          Bootup::is_php('5.4') === true
7045 1
      ) {
7046 3
7047
        // HACK for issue from "transliterator_transliterate()"
7048 7
        $str = str_replace(
7049
            'ℌ',
7050 7
            'H',
7051
            $str
7052
        );
7053
7054
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7055
7056
        // check again, if we only have ASCII, now ...
7057
        if (self::is_ascii($str) === true) {
7058
          return $str;
7059
        }
7060
7061
      }
7062 7
    }
7063 7
7064
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7065
    $chars = $ar[0];
7066
    foreach ($chars as &$c) {
7067
7068
      $ordC0 = ord($c[0]);
7069
7070
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7071
        continue;
7072
      }
7073
7074 1
      $ordC1 = ord($c[1]);
7075
7076 7
      // ASCII - next please
7077
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7078 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7079
      }
7080
7081
      if ($ordC0 >= 224) {
7082
        $ordC2 = ord($c[2]);
7083
7084
        if ($ordC0 <= 239) {
7085
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7086
        }
7087
7088 3
        if ($ordC0 >= 240) {
7089
          $ordC3 = ord($c[3]);
7090 3
7091
          if ($ordC0 <= 247) {
7092
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7093 1
          }
7094
7095
          if ($ordC0 >= 248) {
7096 1
            $ordC4 = ord($c[4]);
7097 1
7098 View Code Duplication
            if ($ordC0 <= 251) {
7099 1
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7100
            }
7101
7102 3
            if ($ordC0 >= 252) {
7103
              $ordC5 = ord($c[5]);
7104 3
7105 1 View Code Duplication
              if ($ordC0 <= 253) {
7106
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7107
              }
7108 3
            }
7109
          }
7110
        }
7111
      }
7112
7113
      if ($ordC0 === 254 || $ordC0 === 255) {
7114
        $c = $unknown;
7115
        continue;
7116
      }
7117
7118
      if (!isset($ord)) {
7119
        $c = $unknown;
7120 1
        continue;
7121
      }
7122 1
7123
      $bank = $ord >> 8;
7124
      if (!isset($UTF8_TO_ASCII[$bank])) {
7125
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7126
        if ($UTF8_TO_ASCII[$bank] === false) {
7127
          $UTF8_TO_ASCII[$bank] = array();
7128
        }
7129
      }
7130
7131
      $newchar = $ord & 255;
7132
7133
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7134
7135
        // keep for debugging
7136
        /*
7137
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7138
        echo "char: " . $c . "\n";
7139
        echo "ord: " . $ord . "\n";
7140 22
        echo "newchar: " . $newchar . "\n";
7141
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7142 22
        echo "bank:" . $bank . "\n\n";
7143
        */
7144 2
7145
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7146
      } else {
7147 2
7148 2
        // keep for debugging missing chars
7149
        /*
7150 2
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7151
        echo "char: " . $c . "\n";
7152
        echo "ord: " . $ord . "\n";
7153 22
        echo "newchar: " . $newchar . "\n";
7154
        echo "bank:" . $bank . "\n\n";
7155 22
        */
7156 3
7157
        $c = $unknown;
7158
      }
7159 22
    }
7160
7161
    return implode('', $chars);
7162
  }
7163 22
7164
  /**
7165
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7166 22
   *
7167
   * @param string|string[] $str
7168
   *
7169 22
   * @return string|string[]
7170
   */
7171
  public static function to_iso8859($str)
7172 22
  {
7173 22
    if (is_array($str) === true) {
7174
7175 22
      /** @noinspection ForeachSourceInspection */
7176
      foreach ($str as $k => $v) {
7177 22
        /** @noinspection AlterInForeachInspection */
7178
        /** @noinspection OffsetOperationsInspection */
7179 20
        $str[$k] = self::to_iso8859($v);
7180
      }
7181 20
7182 18
      return $str;
7183 18
    }
7184 18
7185 8
    $str = (string)$str;
7186
7187
    if (!isset($str[0])) {
7188 22
      return '';
7189
    }
7190 21
7191 21
    return self::utf8_decode($str);
7192
  }
7193 21
7194 15
  /**
7195 15
   * alias for "UTF8::to_iso8859()"
7196 15
   *
7197 11
   * @see UTF8::to_iso8859()
7198
   *
7199
   * @param string|string[] $str
7200 22
   *
7201
   * @return string|string[]
7202 12
   */
7203 12
  public static function to_latin1($str)
7204 12
  {
7205
    return self::to_iso8859($str);
7206 12
  }
7207 5
7208 5
  /**
7209 5
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7210 9
   *
7211
   * <ul>
7212
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7213 12
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7214 9
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7215
   * case.</li>
7216
   * </ul>
7217 22
   *
7218
   * @param string|string[] $str                    <p>Any string or array.</p>
7219 5
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7220
   *
7221 5
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7222 20
   */
7223
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7224 22
  {
7225
    if (is_array($str) === true) {
7226
      /** @noinspection ForeachSourceInspection */
7227 22
      foreach ($str as $k => $v) {
7228 22
        /** @noinspection AlterInForeachInspection */
7229 22
        /** @noinspection OffsetOperationsInspection */
7230 4
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7231 22
      }
7232
7233 22
      return $str;
7234
    }
7235
7236 22
    $str = (string)$str;
7237 1
7238 1
    if (!isset($str[0])) {
7239
      return $str;
7240 22
    }
7241
7242
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7243
      self::checkForSupport();
7244
    }
7245
7246 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7247
      $max = \mb_strlen($str, '8BIT');
7248 16
    } else {
7249
      $max = strlen($str);
7250 16
    }
7251
7252 16
    $buf = '';
7253 16
7254 2
    /** @noinspection ForeachInvariantsInspection */
7255 2
    for ($i = 0; $i < $max; $i++) {
7256 16
      $c1 = $str[$i];
7257 16
7258 16
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7259
7260
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7261 16
7262
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7263
7264
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7265
            $buf .= $c1 . $c2;
7266
            $i++;
7267
          } else { // not valid UTF8 - convert it
7268
            $buf .= self::to_utf8_convert($c1);
7269
          }
7270
7271
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7272
7273
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7274
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7275
7276
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7277 26
            $buf .= $c1 . $c2 . $c3;
7278
            $i += 2;
7279 26
          } else { // not valid UTF8 - convert it
7280
            $buf .= self::to_utf8_convert($c1);
7281 26
          }
7282 5
7283
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7284
7285
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7286 22
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7287 6
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7288
7289
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7290 16
            $buf .= $c1 . $c2 . $c3 . $c4;
7291
            $i += 3;
7292
          } else { // not valid UTF8 - convert it
7293
            $buf .= self::to_utf8_convert($c1);
7294
          }
7295
7296
        } else { // doesn't look like UTF8, but should be converted
7297
          $buf .= self::to_utf8_convert($c1);
7298
        }
7299
7300
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7301
7302 14
        $buf .= self::to_utf8_convert($c1);
7303
7304 14
      } else { // it doesn't need conversion
7305
        $buf .= $c1;
7306
      }
7307
    }
7308
7309
    // decode unicode escape sequences
7310 14
    $buf = preg_replace_callback(
7311 14
        '/\\\\u([0-9a-f]{4})/i',
7312
        function ($match) {
7313
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7314
        },
7315 14
        $buf
7316 14
    );
7317 14
7318
    // decode UTF-8 codepoints
7319 14
    if ($decodeHtmlEntityToUtf8 === true) {
7320
      $buf = self::html_entity_decode($buf);
7321 14
    }
7322
7323
    return $buf;
7324
  }
7325
7326
  /**
7327
   * @param int $int
7328
   *
7329
   * @return string
7330
   */
7331
  private static function to_utf8_convert($int)
7332
  {
7333
    $buf = '';
7334
7335 1
    $ordC1 = ord($int);
7336
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7337 1
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7338
    } else {
7339
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7340
      $cc2 = ($int & "\x3F") | "\x80";
7341
      $buf .= $cc1 . $cc2;
7342
    }
7343
7344
    return $buf;
7345
  }
7346
7347
  /**
7348
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7349
   *
7350
   * INFO: This is slower then "trim()"
7351 8
   *
7352
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7353 8
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7354 2
   *
7355
   * @param string $str   <p>The string to be trimmed</p>
7356
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7357
   *
7358
   * @return string <p>The trimmed string.</p>
7359
   */
7360 7
  public static function trim($str = '', $chars = INF)
7361
  {
7362
    $str = (string)$str;
7363
7364
    if (!isset($str[0])) {
7365
      return '';
7366 7
    }
7367
7368
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7369
    if ($chars === INF || !$chars) {
7370 7
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7371 7
    }
7372 7
7373
    return self::rtrim(self::ltrim($str, $chars), $chars);
7374
  }
7375
7376 7
  /**
7377 7
   * Makes string's first char uppercase.
7378
   *
7379 7
   * @param string  $str       <p>The input string.</p>
7380 1
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7381 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7382 7
   *
7383
   * @return string <p>The resulting string</p>
7384
   */
7385 7
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7386
  {
7387 7
    if ($cleanUtf8 === true) {
7388 7
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7389
      // if invalid characters are found in $haystack before $needle
7390
      $str = self::clean($str);
7391
    }
7392
7393 7
    $strPartTwo = self::substr($str, 1, null, $encoding);
7394
    if ($strPartTwo === false) {
7395
      $strPartTwo = '';
7396 1
    }
7397 1
7398 1
    $strPartOne = self::strtoupper(
7399 7
        (string)self::substr($str, 0, 1, $encoding),
7400 7
        $encoding,
7401 7
        $cleanUtf8
7402
    );
7403 7
7404 7
    return $strPartOne . $strPartTwo;
7405
  }
7406 7
7407
  /**
7408
   * alias for "UTF8::ucfirst()"
7409
   *
7410
   * @see UTF8::ucfirst()
7411
   *
7412
   * @param string  $word
7413
   * @param string  $encoding
7414
   * @param boolean $cleanUtf8
7415
   *
7416
   * @return string
7417
   */
7418
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7419
  {
7420
    return self::ucfirst($word, $encoding, $cleanUtf8);
7421
  }
7422
7423
  /**
7424
   * Uppercase for all words in the string.
7425
   *
7426
   * @param string   $str        <p>The input string.</p>
7427
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7428 1
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7429
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7430 1
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7431
   *
7432 1
   * @return string
7433 1
   */
7434
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7435
  {
7436 1
    if (!$str) {
7437 1
      return '';
7438 1
    }
7439 1
7440
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7441 1
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7442
7443
    if ($cleanUtf8 === true) {
7444 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7445
      // if invalid characters are found in $haystack before $needle
7446 1
      $str = self::clean($str);
7447 1
    }
7448 1
7449 1
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7450
7451 1
    if (
7452 1
        $usePhpDefaultFunctions === true
7453 1
        &&
7454
        self::is_ascii($str) === true
7455 1
    ) {
7456
      return ucwords($str);
7457 1
    }
7458
7459
    $words = self::str_to_words($str, $charlist);
7460
    $newWords = array();
7461
7462
    if (count($exceptions) > 0) {
7463
      $useExceptions = true;
7464
    } else {
7465
      $useExceptions = false;
7466
    }
7467
7468 View Code Duplication
    foreach ($words as $word) {
7469
7470
      if (!$word) {
7471
        continue;
7472
      }
7473
7474
      if (
7475
          $useExceptions === false
7476
          ||
7477
          (
7478
              $useExceptions === true
7479
              &&
7480
              !in_array($word, $exceptions, true)
7481
          )
7482
      ) {
7483
        $word = self::ucfirst($word, $encoding);
7484
      }
7485
7486
      $newWords[] = $word;
7487
    }
7488
7489
    return implode('', $newWords);
7490
  }
7491
7492
  /**
7493
   * Multi decode html entity & fix urlencoded-win1252-chars.
7494
   *
7495
   * e.g:
7496
   * 'test+test'                     => 'test test'
7497
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7498
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7499
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7500
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7501
   * 'Düsseldorf'                   => 'Düsseldorf'
7502
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7503
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7504
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7505
   *
7506
   * @param string $str          <p>The input string.</p>
7507
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7508
   *
7509
   * @return string
7510
   */
7511 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7512
  {
7513
    $str = (string)$str;
7514
7515
    if (!isset($str[0])) {
7516
      return '';
7517
    }
7518
7519
    $pattern = '/%u([0-9a-f]{3,4})/i';
7520
    if (preg_match($pattern, $str)) {
7521
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7522
    }
7523
7524
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7525
7526
    do {
7527
      $str_compare = $str;
7528
7529
      $str = self::fix_simple_utf8(
7530
          urldecode(
7531
              self::html_entity_decode(
7532
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7533
                  $flags
7534
              )
7535
          )
7536
      );
7537
7538
    } while ($multi_decode === true && $str_compare !== $str);
7539
7540
    return (string)$str;
7541
  }
7542
7543
  /**
7544
   * Return a array with "urlencoded"-win1252 -> UTF-8
7545
   *
7546
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7547
   *
7548
   * @return array
7549
   */
7550
  public static function urldecode_fix_win1252_chars()
7551
  {
7552
    return array(
7553
        '%20' => ' ',
7554
        '%21' => '!',
7555
        '%22' => '"',
7556
        '%23' => '#',
7557
        '%24' => '$',
7558
        '%25' => '%',
7559
        '%26' => '&',
7560
        '%27' => "'",
7561
        '%28' => '(',
7562
        '%29' => ')',
7563
        '%2A' => '*',
7564
        '%2B' => '+',
7565
        '%2C' => ',',
7566
        '%2D' => '-',
7567
        '%2E' => '.',
7568
        '%2F' => '/',
7569
        '%30' => '0',
7570
        '%31' => '1',
7571
        '%32' => '2',
7572
        '%33' => '3',
7573
        '%34' => '4',
7574
        '%35' => '5',
7575
        '%36' => '6',
7576
        '%37' => '7',
7577
        '%38' => '8',
7578
        '%39' => '9',
7579
        '%3A' => ':',
7580
        '%3B' => ';',
7581
        '%3C' => '<',
7582
        '%3D' => '=',
7583
        '%3E' => '>',
7584
        '%3F' => '?',
7585
        '%40' => '@',
7586
        '%41' => 'A',
7587
        '%42' => 'B',
7588
        '%43' => 'C',
7589
        '%44' => 'D',
7590
        '%45' => 'E',
7591
        '%46' => 'F',
7592
        '%47' => 'G',
7593
        '%48' => 'H',
7594
        '%49' => 'I',
7595
        '%4A' => 'J',
7596
        '%4B' => 'K',
7597
        '%4C' => 'L',
7598
        '%4D' => 'M',
7599
        '%4E' => 'N',
7600
        '%4F' => 'O',
7601
        '%50' => 'P',
7602
        '%51' => 'Q',
7603
        '%52' => 'R',
7604
        '%53' => 'S',
7605
        '%54' => 'T',
7606
        '%55' => 'U',
7607
        '%56' => 'V',
7608
        '%57' => 'W',
7609
        '%58' => 'X',
7610
        '%59' => 'Y',
7611
        '%5A' => 'Z',
7612
        '%5B' => '[',
7613
        '%5C' => '\\',
7614
        '%5D' => ']',
7615
        '%5E' => '^',
7616
        '%5F' => '_',
7617
        '%60' => '`',
7618
        '%61' => 'a',
7619
        '%62' => 'b',
7620
        '%63' => 'c',
7621
        '%64' => 'd',
7622
        '%65' => 'e',
7623
        '%66' => 'f',
7624
        '%67' => 'g',
7625
        '%68' => 'h',
7626
        '%69' => 'i',
7627
        '%6A' => 'j',
7628
        '%6B' => 'k',
7629
        '%6C' => 'l',
7630
        '%6D' => 'm',
7631
        '%6E' => 'n',
7632
        '%6F' => 'o',
7633
        '%70' => 'p',
7634
        '%71' => 'q',
7635
        '%72' => 'r',
7636
        '%73' => 's',
7637
        '%74' => 't',
7638
        '%75' => 'u',
7639
        '%76' => 'v',
7640
        '%77' => 'w',
7641
        '%78' => 'x',
7642
        '%79' => 'y',
7643
        '%7A' => 'z',
7644
        '%7B' => '{',
7645
        '%7C' => '|',
7646
        '%7D' => '}',
7647
        '%7E' => '~',
7648
        '%7F' => '',
7649
        '%80' => '`',
7650
        '%81' => '',
7651
        '%82' => '‚',
7652
        '%83' => 'ƒ',
7653
        '%84' => '„',
7654
        '%85' => '…',
7655
        '%86' => '†',
7656
        '%87' => '‡',
7657
        '%88' => 'ˆ',
7658
        '%89' => '‰',
7659
        '%8A' => 'Š',
7660
        '%8B' => '‹',
7661
        '%8C' => 'Œ',
7662
        '%8D' => '',
7663
        '%8E' => 'Ž',
7664
        '%8F' => '',
7665
        '%90' => '',
7666
        '%91' => '‘',
7667
        '%92' => '’',
7668
        '%93' => '“',
7669
        '%94' => '”',
7670
        '%95' => '•',
7671
        '%96' => '–',
7672
        '%97' => '—',
7673
        '%98' => '˜',
7674
        '%99' => '™',
7675
        '%9A' => 'š',
7676
        '%9B' => '›',
7677
        '%9C' => 'œ',
7678
        '%9D' => '',
7679
        '%9E' => 'ž',
7680
        '%9F' => 'Ÿ',
7681
        '%A0' => '',
7682
        '%A1' => '¡',
7683
        '%A2' => '¢',
7684
        '%A3' => '£',
7685
        '%A4' => '¤',
7686
        '%A5' => '¥',
7687
        '%A6' => '¦',
7688
        '%A7' => '§',
7689
        '%A8' => '¨',
7690
        '%A9' => '©',
7691
        '%AA' => 'ª',
7692
        '%AB' => '«',
7693
        '%AC' => '¬',
7694
        '%AD' => '',
7695
        '%AE' => '®',
7696
        '%AF' => '¯',
7697
        '%B0' => '°',
7698
        '%B1' => '±',
7699
        '%B2' => '²',
7700
        '%B3' => '³',
7701
        '%B4' => '´',
7702
        '%B5' => 'µ',
7703
        '%B6' => '¶',
7704 6
        '%B7' => '·',
7705
        '%B8' => '¸',
7706
        '%B9' => '¹',
7707 6
        '%BA' => 'º',
7708
        '%BB' => '»',
7709 6
        '%BC' => '¼',
7710 3
        '%BD' => '½',
7711
        '%BE' => '¾',
7712
        '%BF' => '¿',
7713 6
        '%C0' => 'À',
7714
        '%C1' => 'Á',
7715 6
        '%C2' => 'Â',
7716 6
        '%C3' => 'Ã',
7717
        '%C4' => 'Ä',
7718 6
        '%C5' => 'Å',
7719 1
        '%C6' => 'Æ',
7720 1
        '%C7' => 'Ç',
7721 1
        '%C8' => 'È',
7722
        '%C9' => 'É',
7723
        '%CA' => 'Ê',
7724 6
        '%CB' => 'Ë',
7725
        '%CC' => 'Ì',
7726 6
        '%CD' => 'Í',
7727
        '%CE' => 'Î',
7728
        '%CF' => 'Ï',
7729
        '%D0' => 'Ð',
7730 6
        '%D1' => 'Ñ',
7731
        '%D2' => 'Ò',
7732
        '%D3' => 'Ó',
7733 6
        '%D4' => 'Ô',
7734
        '%D5' => 'Õ',
7735
        '%D6' => 'Ö',
7736
        '%D7' => '×',
7737 6
        '%D8' => 'Ø',
7738 6
        '%D9' => 'Ù',
7739 6
        '%DA' => 'Ú',
7740 6
        '%DB' => 'Û',
7741 6
        '%DC' => 'Ü',
7742 6
        '%DD' => 'Ý',
7743 6
        '%DE' => 'Þ',
7744
        '%DF' => 'ß',
7745
        '%E0' => 'à',
7746 6
        '%E1' => 'á',
7747
        '%E2' => 'â',
7748 6
        '%E3' => 'ã',
7749 4
        '%E4' => 'ä',
7750 4
        '%E5' => 'å',
7751 4
        '%E6' => 'æ',
7752
        '%E7' => 'ç',
7753 6
        '%E8' => 'è',
7754 6
        '%E9' => 'é',
7755 6
        '%EA' => 'ê',
7756 6
        '%EB' => 'ë',
7757
        '%EC' => 'ì',
7758 6
        '%ED' => 'í',
7759
        '%EE' => 'î',
7760
        '%EF' => 'ï',
7761
        '%F0' => 'ð',
7762
        '%F1' => 'ñ',
7763
        '%F2' => 'ò',
7764
        '%F3' => 'ó',
7765
        '%F4' => 'ô',
7766
        '%F5' => 'õ',
7767
        '%F6' => 'ö',
7768 7
        '%F7' => '÷',
7769
        '%F8' => 'ø',
7770
        '%F9' => 'ù',
7771 7
        '%FA' => 'ú',
7772
        '%FB' => 'û',
7773 7
        '%FC' => 'ü',
7774 7
        '%FD' => 'ý',
7775
        '%FE' => 'þ',
7776
        '%FF' => 'ÿ',
7777 7
    );
7778 7
  }
7779
7780
  /**
7781
   * Decodes an UTF-8 string to ISO-8859-1.
7782 7
   *
7783 7
   * @param string $str <p>The input string.</p>
7784 3
   *
7785
   * @return string
7786
   */
7787 6
  public static function utf8_decode($str)
7788 6
  {
7789
    // init
7790 6
    $str = (string)$str;
7791 1
7792 1
    if (!isset($str[0])) {
7793 1
      return '';
7794
    }
7795 6
7796
    $str = (string)self::to_utf8($str);
7797
7798
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7799
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7800
7801
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7802
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7803
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7804
    }
7805
7806
    /** @noinspection PhpInternalEntityUsedInspection */
7807
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7808
7809
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7810
      self::checkForSupport();
7811
    }
7812
7813 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7814
      $len = \mb_strlen($str, '8BIT');
7815
    } else {
7816
      $len = strlen($str);
7817
    }
7818
7819
    /** @noinspection ForeachInvariantsInspection */
7820
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7821
      switch ($str[$i] & "\xF0") {
7822
        case "\xC0":
7823
        case "\xD0":
7824 1
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7825
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7826 1
          break;
7827
7828
        /** @noinspection PhpMissingBreakStatementInspection */
7829
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7830
          ++$i;
7831
        case "\xE0":
7832
          $str[$j] = '?';
7833
          $i += 2;
7834
          break;
7835
7836
        default:
7837
          $str[$j] = $str[$i];
7838 1
      }
7839
    }
7840 1
7841
    return (string)self::substr($str, 0, $j, '8BIT');
7842 1
  }
7843 1
7844
  /**
7845
   * Encodes an ISO-8859-1 string to UTF-8.
7846
   *
7847 1
   * @param string $str <p>The input string.</p>
7848
   *
7849 1
   * @return string
7850 1
   */
7851
  public static function utf8_encode($str)
7852
  {
7853 1
    // init
7854
    $str = (string)$str;
7855
7856 1
    if (!isset($str[0])) {
7857 1
      return '';
7858 1
    }
7859 1
7860 1
    $strTmp = \utf8_encode($str);
7861
    if ($strTmp === false) {
7862
      return '';
7863 1
    }
7864
7865
    $str = (string)$strTmp;
7866
    if (false === strpos($str, "\xC2")) {
7867
      return $str;
7868
    }
7869
7870
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7871
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7872
7873
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7874
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7875
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7876
    }
7877
7878
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7879
  }
7880
7881
  /**
7882 10
   * fix -> utf8-win1252 chars
7883
   *
7884 10
   * @param string $str <p>The input string.</p>
7885 10
   *
7886
   * @return string
7887 10
   *
7888 3
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7889
   */
7890
  public static function utf8_fix_win1252_chars($str)
7891 8
  {
7892 8
    return self::fix_simple_utf8($str);
7893 8
  }
7894
7895 8
  /**
7896
   * Returns an array with all utf8 whitespace characters.
7897 8
   *
7898
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7899 8
   *
7900 1
   * @author: Derek E. [email protected]
7901 1
   *
7902 1
   * @return array <p>
7903
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7904 8
   *               as defined in above URL.
7905 8
   *               </p>
7906
   */
7907 8
  public static function whitespace_table()
7908 8
  {
7909 8
    return self::$WHITESPACE_TABLE;
7910 8
  }
7911 8
7912
  /**
7913 8
   * Limit the number of words in a string.
7914 8
   *
7915 8
   * @param string $str      <p>The input string.</p>
7916 8
   * @param int    $limit    <p>The limit of words as integer.</p>
7917
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7918 8
   *
7919 6
   * @return string
7920 6
   */
7921 6
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7922 6
  {
7923
    $str = (string)$str;
7924 6
7925 3
    if (!isset($str[0])) {
7926 3
      return '';
7927
    }
7928 6
7929 6
    // init
7930
    $limit = (int)$limit;
7931 8
7932
    if ($limit < 1) {
7933
      return '';
7934
    }
7935
7936
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7937
7938
    if (
7939 1
        !isset($matches[0])
7940
        ||
7941 1
        self::strlen($str) === self::strlen($matches[0])
7942
    ) {
7943
      return $str;
7944
    }
7945
7946
    return self::rtrim($matches[0]) . $strAddOn;
7947
  }
7948
7949
  /**
7950
   * Wraps a string to a given number of characters
7951
   *
7952
   * @link  http://php.net/manual/en/function.wordwrap.php
7953
   *
7954
   * @param string $str   <p>The input string.</p>
7955
   * @param int    $width [optional] <p>The column width.</p>
7956
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7957
   * @param bool   $cut   [optional] <p>
7958
   *                      If the cut is set to true, the string is
7959
   *                      always wrapped at or before the specified width. So if you have
7960
   *                      a word that is larger than the given width, it is broken apart.
7961
   *                      </p>
7962
   *
7963
   * @return string <p>The given string wrapped at the specified column.</p>
7964
   */
7965
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7966
  {
7967
    $str = (string)$str;
7968
    $break = (string)$break;
7969
7970
    if (!isset($str[0], $break[0])) {
7971
      return '';
7972
    }
7973
7974
    $w = '';
7975
    $strSplit = explode($break, $str);
7976
    $count = count($strSplit);
7977
7978
    $chars = array();
7979
    /** @noinspection ForeachInvariantsInspection */
7980
    for ($i = 0; $i < $count; ++$i) {
7981
7982
      if ($i) {
7983
        $chars[] = $break;
7984
        $w .= '#';
7985
      }
7986
7987
      $c = $strSplit[$i];
7988
      unset($strSplit[$i]);
7989
7990
      foreach (self::split($c) as $c) {
7991
        $chars[] = $c;
7992
        $w .= ' ' === $c ? ' ' : '?';
7993
      }
7994
    }
7995
7996
    $strReturn = '';
7997
    $j = 0;
7998
    $b = $i = -1;
7999
    $w = wordwrap($w, $width, '#', $cut);
8000
8001
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8002
      for (++$i; $i < $b; ++$i) {
8003
        $strReturn .= $chars[$j];
8004
        unset($chars[$j++]);
8005
      }
8006
8007
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8008
        unset($chars[$j++]);
8009
      }
8010
8011
      $strReturn .= $break;
8012
    }
8013
8014
    return $strReturn . implode('', $chars);
8015
  }
8016
8017
  /**
8018
   * Returns an array of Unicode White Space characters.
8019
   *
8020
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
8021
   */
8022
  public static function ws()
8023
  {
8024
    return self::$WHITESPACE;
8025
  }
8026
8027
}
8028