Completed
Push — master ( faa8e4...410843 )
by Lars
19:19 queued 07:20
created

UTF8::rtrim()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 15
Code Lines 7

Duplication

Lines 15
Ratio 100 %

Code Coverage

Tests 7
CRAP Score 4

Importance

Changes 0
Metric Value
dl 15
loc 15
ccs 7
cts 7
cp 1
rs 9.2
c 0
b 0
f 0
cc 4
eloc 7
nc 3
nop 2
crap 4
1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965 10
    // init
966
    static $CHAR_CACHE = array();
967
968
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
969 10
      self::checkForSupport();
970 2
    }
971 10
972
    if ($encoding !== 'UTF-8') {
973
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
974
    }
975
976 10 View Code Duplication
    if (
977 10
        $encoding !== 'UTF-8'
978 1
        &&
979
        $encoding !== 'WINDOWS-1252'
980
        &&
981
        self::$SUPPORT['mbstring'] === false
982 10
    ) {
983 10
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
984 10
    }
985 8
986
    $cacheKey = $code_point . $encoding;
987
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
988 9
      return $CHAR_CACHE[$cacheKey];
989 7
    }
990 9
991 6
    if (self::$SUPPORT['intlChar'] === true) {
992 6
      $str = \IntlChar::chr($code_point);
993 7
994 7
      if ($encoding !== 'UTF-8') {
995 7
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
996 7
      }
997 7
998 1
      $CHAR_CACHE[$cacheKey] = $str;
999 1
      return $str;
1000 1
    }
1001 1
1002
    // check type of code_point, only if there is no support for "\IntlChar"
1003
    if ((int)$code_point !== $code_point) {
1004 9
      $CHAR_CACHE[$cacheKey] = null;
1005 1
      return null;
1006 1
    }
1007
1008
    if ($code_point <= 0x7F) {
1009 9
      $str = self::chr_and_parse_int($code_point);
1010
    } elseif ($code_point <= 0x7FF) {
1011 9
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
1012
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1013
    } elseif ($code_point <= 0xFFFF) {
1014
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
1015
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1016
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1017
    } else {
1018
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
1019 26
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1020
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1021 26
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1022
    }
1023
1024
    if ($encoding !== 'UTF-8') {
1025
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1026
    }
1027
1028
    // add into static cache
1029
    $CHAR_CACHE[$cacheKey] = $str;
1030
1031
    return $str;
1032 1
  }
1033
1034 1
  /**
1035
   * @param int $int
1036 1
   *
1037
   * @return string
1038
   */
1039
  private static function chr_and_parse_int($int)
1040
  {
1041
    return chr((int)$int);
1042
  }
1043
1044
  /**
1045
   * Applies callback to all characters of a string.
1046
   *
1047
   * @param string|array $callback <p>The callback function.</p>
1048
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1049
   *
1050
   * @return array <p>The outcome of callback.</p>
1051 4
   */
1052
  public static function chr_map($callback, $str)
1053 4
  {
1054
    $chars = self::split($str);
1055 4
1056 3
    return array_map($callback, $chars);
1057
  }
1058
1059 4
  /**
1060
   * Generates an array of byte length of each character of a Unicode string.
1061 4
   *
1062 4
   * 1 byte => U+0000  - U+007F
1063 4
   * 2 byte => U+0080  - U+07FF
1064 4
   * 3 byte => U+0800  - U+FFFF
1065
   * 4 byte => U+10000 - U+10FFFF
1066
   *
1067
   * @param string $str <p>The original Unicode string.</p>
1068
   *
1069
   * @return array <p>An array of byte lengths of each character.</p>
1070
   */
1071
  public static function chr_size_list($str)
1072
  {
1073
    $str = (string)$str;
1074 2
1075
    if (!isset($str[0])) {
1076 2
      return array();
1077 2
    }
1078 2
1079
    return array_map(
1080 2
        function ($data) {
1081
          return self::strlen($data, '8BIT');
1082 2
        },
1083
        self::split($str)
1084
    );
1085 2
  }
1086
1087 2
  /**
1088 2
   * Get a decimal code representation of a specific character.
1089 2
   *
1090
   * @param string $char <p>The input character.</p>
1091 2
   *
1092 2
   * @return int
1093 2
   */
1094
  public static function chr_to_decimal($char)
1095 1
  {
1096 1
    $char = (string)$char;
1097 1
    $code = self::ord($char[0]);
1098
    $bytes = 1;
1099 2
1100
    if (!($code & 0x80)) {
1101 2
      // 0xxxxxxx
1102 2
      return $code;
1103
    }
1104 2
1105
    if (($code & 0xe0) === 0xc0) {
1106
      // 110xxxxx
1107
      $bytes = 2;
1108
      $code &= ~0xc0;
1109
    } elseif (($code & 0xf0) === 0xe0) {
1110
      // 1110xxxx
1111
      $bytes = 3;
1112
      $code &= ~0xe0;
1113
    } elseif (($code & 0xf8) === 0xf0) {
1114
      // 11110xxx
1115 1
      $bytes = 4;
1116
      $code &= ~0xf0;
1117 1
    }
1118
1119 1
    for ($i = 2; $i <= $bytes; $i++) {
1120 1
      // 10xxxxxx
1121
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1122
    }
1123 1
1124
    return $code;
1125
  }
1126
1127 1
  /**
1128
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1129
   *
1130
   * @param string $char <p>The input character</p>
1131
   * @param string $pfix [optional]
1132
   *
1133
   * @return string <p>The code point encoded as U+xxxx<p>
1134
   */
1135
  public static function chr_to_hex($char, $pfix = 'U+')
1136
  {
1137
    $char = (string)$char;
1138
1139 1
    if (!isset($char[0])) {
1140
      return '';
1141 1
    }
1142
1143
    if ($char === '&#0;') {
1144
      $char = '';
1145
    }
1146
1147
    return self::int_to_hex(self::ord($char), $pfix);
1148
  }
1149
1150
  /**
1151
   * alias for "UTF8::chr_to_decimal()"
1152
   *
1153 1
   * @see UTF8::chr_to_decimal()
1154
   *
1155 1
   * @param string $chr
1156
   *
1157
   * @return int
1158
   */
1159
  public static function chr_to_int($chr)
1160
  {
1161
    return self::chr_to_decimal($chr);
1162
  }
1163
1164
  /**
1165
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1166
   *
1167
   * @param string $body     <p>The original string to be split.</p>
1168
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1169
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1170
   *
1171 56
   * @return string <p>The chunked string</p>
1172
   */
1173
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1174
  {
1175
    return implode($end, self::split($body, $chunklen));
1176
  }
1177
1178
  /**
1179
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1180
   *
1181
   * @param string $str                     <p>The string to be sanitized.</p>
1182
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1183
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1184
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1185
   *                                        => "..."</p>
1186 56
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1187 56
   *                                        $normalize_whitespace</p>
1188
   *
1189 56
   * @return string <p>Clean UTF-8 encoded string.</p>
1190 56
   */
1191
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1192 56
  {
1193 36
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1194 36
    // caused connection reset problem on larger strings
1195
1196 56
    $regx = '/
1197 15
      (
1198 15
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1199
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1200 56
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1201 35
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1202 35
        ){1,100}                      # ...one or more times
1203
      )
1204 56
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1205
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1206
    /x';
1207
    $str = preg_replace($regx, '$1', $str);
1208
1209
    $str = self::replace_diamond_question_mark($str, '');
1210
    $str = self::remove_invisible_characters($str);
1211
1212
    if ($normalize_whitespace === true) {
1213
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1214 21
    }
1215
1216 21
    if ($normalize_msword === true) {
1217
      $str = self::normalize_msword($str);
1218 21
    }
1219 2
1220
    if ($remove_bom === true) {
1221
      $str = self::remove_bom($str);
1222
    }
1223 21
1224
    return $str;
1225
  }
1226
1227
  /**
1228
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1229
   *
1230 21
   * @param string $str <p>The input string.</p>
1231
   *
1232 21
   * @return string
1233
   */
1234 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236
    $str = (string)$str;
1237
1238
    if (!isset($str[0])) {
1239
      return '';
1240
    }
1241
1242
    // fixed ISO <-> UTF-8 Errors
1243
    $str = self::fix_simple_utf8($str);
1244
1245
    // remove all none UTF-8 symbols
1246 7
    // && remove diamond question mark (�)
1247
    // && remove remove invisible characters (e.g. "\0")
1248 7
    // && remove BOM
1249 7
    // && normalize whitespace chars (but keep non-breaking-spaces)
1250 7
    $str = self::clean($str, true, true, false, true);
1251
1252 7
    return (string)$str;
1253
  }
1254 7
1255 7
  /**
1256 7
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1257
   *
1258 7
   * INFO: opposite to UTF8::string()
1259
   *
1260 7
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1261 1
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1262
   *                                    default, code points will be returned as integers.</p>
1263 1
   *
1264 1
   * @return array <p>The array of code points.</p>
1265 1
   */
1266
  public static function codepoints($arg, $u_style = false)
1267 1
  {
1268 1
    if (is_string($arg) === true) {
1269
      $arg = self::split($arg);
1270 7
    }
1271
1272
    $arg = array_map(
1273
        array(
1274
            '\\voku\\helper\\UTF8',
1275
            'ord',
1276
        ),
1277
        $arg
1278
    );
1279
1280
    if ($u_style) {
1281
      $arg = array_map(
1282 7
          array(
1283
              '\\voku\\helper\\UTF8',
1284 7
              'int_to_hex',
1285
          ),
1286
          $arg
1287
      );
1288
    }
1289
1290
    return $arg;
1291
  }
1292
1293
  /**
1294 5
   * Returns count of characters used in a string.
1295
   *
1296 5
   * @param string $str       <p>The input string.</p>
1297
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1298
   *
1299 5
   * @return array <p>An associative array of Character as keys and
1300
   *               their count as values.</p>
1301
   */
1302 5
  public static function count_chars($str, $cleanUtf8 = false)
1303
  {
1304
    return array_count_values(self::split($str, 1, $cleanUtf8));
1305
  }
1306
1307
  /**
1308
   * Converts a int-value into an UTF-8 character.
1309
   *
1310
   * @param mixed $int
1311
   *
1312
   * @return string
1313
   */
1314
  public static function decimal_to_chr($int)
1315
  {
1316
    if (Bootup::is_php('5.4') === true) {
1317
      $flags = ENT_QUOTES | ENT_HTML5;
1318 11
    } else {
1319
      $flags = ENT_QUOTES;
1320 11
    }
1321 11
1322
    return self::html_entity_decode('&#' . $int . ';', $flags);
1323 11
  }
1324 5
1325
  /**
1326
   * Encode a string with a new charset-encoding.
1327 11
   *
1328 2
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1329 2
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1330
   *
1331 11
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1332
   * @param string $str      <p>The input string</p>
1333
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1334
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1335 11
   *
1336
   * @return string
1337
   */
1338
  public static function encode($encoding, $str, $force = true)
1339 11
  {
1340
    $str = (string)$str;
1341
    $encoding = (string)$encoding;
1342 11
1343
    if (!isset($str[0], $encoding[0])) {
1344 3
      return $str;
1345 11
    }
1346
1347
    if ($encoding !== 'UTF-8') {
1348
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1349 11
    }
1350
1351
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1352 11
      self::checkForSupport();
1353 2
    }
1354 2
1355 2
    $encodingDetected = self::str_detect_encoding($str);
1356 11
1357 11
    if (
1358
        $encodingDetected !== false
1359
        &&
1360
        (
1361
            $force === true
1362 3
            ||
1363
            $encodingDetected !== $encoding
1364
        )
1365 2
    ) {
1366 1
1367 1 View Code Duplication
      if (
1368 3
          $encoding === 'UTF-8'
1369 2
          &&
1370
          (
1371
              $force === true
1372
              || $encodingDetected === 'UTF-8'
1373
              || $encodingDetected === 'WINDOWS-1252'
1374 2
              || $encodingDetected === 'ISO-8859-1'
1375
1376 2
          )
1377 1
      ) {
1378 2
        return self::to_utf8($str);
1379
      }
1380
1381 View Code Duplication
      if (
1382 2
          $encoding === 'ISO-8859-1'
1383 2
          &&
1384 2
          (
1385
              $force === true
1386 2
              || $encodingDetected === 'ISO-8859-1'
1387
              || $encodingDetected === 'UTF-8'
1388 2
          )
1389 2
      ) {
1390
        return self::to_iso8859($str);
1391
      }
1392
1393 1 View Code Duplication
      if (
1394
          $encoding !== 'UTF-8'
1395
          &&
1396
          $encoding !== 'WINDOWS-1252'
1397
          &&
1398
          self::$SUPPORT['mbstring'] === false
1399
      ) {
1400
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1401
      }
1402
1403
      $strEncoded = \mb_convert_encoding(
1404
          $str,
1405
          $encoding,
1406
          $encodingDetected
1407
      );
1408
1409
      if ($strEncoded) {
1410
        return $strEncoded;
1411
      }
1412
    }
1413
1414
    return $str;
1415
  }
1416
1417
  /**
1418
   * Reads entire file into a string.
1419
   *
1420
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1421
   *
1422
   * @link http://php.net/manual/en/function.file-get-contents.php
1423
   *
1424
   * @param string        $filename      <p>
1425
   *                                     Name of the file to read.
1426
   *                                     </p>
1427
   * @param int|false     $flags         [optional] <p>
1428
   *                                     Prior to PHP 6, this parameter is called
1429
   *                                     use_include_path and is a bool.
1430
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1431
   *                                     to trigger include path
1432
   *                                     search.
1433
   *                                     </p>
1434
   *                                     <p>
1435
   *                                     The value of flags can be any combination of
1436
   *                                     the following flags (with some restrictions), joined with the
1437
   *                                     binary OR (|)
1438
   *                                     operator.
1439
   *                                     </p>
1440
   *                                     <p>
1441
   *                                     <table>
1442
   *                                     Available flags
1443
   *                                     <tr valign="top">
1444
   *                                     <td>Flag</td>
1445
   *                                     <td>Description</td>
1446
   *                                     </tr>
1447
   *                                     <tr valign="top">
1448
   *                                     <td>
1449
   *                                     FILE_USE_INCLUDE_PATH
1450
   *                                     </td>
1451
   *                                     <td>
1452
   *                                     Search for filename in the include directory.
1453
   *                                     See include_path for more
1454
   *                                     information.
1455
   *                                     </td>
1456
   *                                     </tr>
1457
   *                                     <tr valign="top">
1458
   *                                     <td>
1459
   *                                     FILE_TEXT
1460
   *                                     </td>
1461
   *                                     <td>
1462
   *                                     As of PHP 6, the default encoding of the read
1463
   *                                     data is UTF-8. You can specify a different encoding by creating a
1464
   *                                     custom context or by changing the default using
1465
   *                                     stream_default_encoding. This flag cannot be
1466
   *                                     used with FILE_BINARY.
1467
   *                                     </td>
1468
   *                                     </tr>
1469
   *                                     <tr valign="top">
1470
   *                                     <td>
1471
   *                                     FILE_BINARY
1472
   *                                     </td>
1473
   *                                     <td>
1474
   *                                     With this flag, the file is read in binary mode. This is the default
1475
   *                                     setting and cannot be used with FILE_TEXT.
1476
   *                                     </td>
1477
   *                                     </tr>
1478 3
   *                                     </table>
1479
   *                                     </p>
1480
   * @param resource|null $context       [optional] <p>
1481 3
   *                                     A valid context resource created with
1482 3
   *                                     stream_context_create. If you don't need to use a
1483
   *                                     custom context, you can skip this parameter by &null;.
1484 3
   *                                     </p>
1485 2
   * @param int|null      $offset        [optional] <p>
1486
   *                                     The offset where the reading starts.
1487
   *                                     </p>
1488
   * @param int|null      $maxLength     [optional] <p>
1489 2
   *                                     Maximum length of data read. The default is to read until end
1490 2
   *                                     of file is reached.
1491
   *                                     </p>
1492 2
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1493 2
   *
1494
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1495 3
   *                                     or pdf, because they used non default utf-8 chars</p>
1496 3
   *
1497 3
   * @return string <p>The function returns the read data or false on failure.</p>
1498
   */
1499 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1500 3
  {
1501 3
    // init
1502
    $timeout = (int)$timeout;
1503 3
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1504 1
1505 1
    if ($timeout && $context === null) {
1506 3
      $context = stream_context_create(
1507
          array(
1508
              'http' =>
1509
                  array(
1510 3
                      'timeout' => $timeout,
1511 1
                  ),
1512
          )
1513
      );
1514 2
    }
1515 2
1516 2
    if (!$flags) {
1517 2
      $flags = false;
1518
    }
1519 2
1520
    if ($offset === null) {
1521
      $offset = 0;
1522
    }
1523
1524
    if (is_int($maxLength) === true) {
1525
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1526
    } else {
1527
      $data = file_get_contents($filename, $flags, $context, $offset);
1528
    }
1529 1
1530
    // return false on error
1531 1
    if ($data === false) {
1532
      return false;
1533
    }
1534
1535
    if ($convertToUtf8 === true) {
1536
      $data = self::encode('UTF-8', $data, false);
1537
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1538
    }
1539
1540
    return $data;
1541
  }
1542
1543 9
  /**
1544
   * Checks if a file starts with BOM (Byte Order Mark) character.
1545 9
   *
1546 9
   * @param string $file_path <p>Path to a valid file.</p>
1547 3
   *
1548
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1549 3
   */
1550 3
  public static function file_has_bom($file_path)
1551 3
  {
1552 9
    return self::string_has_bom(file_get_contents($file_path));
1553 2
  }
1554 2
1555 2
  /**
1556 2
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1557 9
   *
1558
   * @param mixed  $var
1559 8
   * @param int    $normalization_form
1560
   * @param string $leading_combining
1561 2
   *
1562 2
   * @return mixed
1563
   */
1564 8
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1565
  {
1566 8
    switch (gettype($var)) {
1567 6 View Code Duplication
      case 'array':
1568 6
        foreach ($var as $k => $v) {
1569
          /** @noinspection AlterInForeachInspection */
1570 6
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1571
        }
1572 6
        break;
1573 3 View Code Duplication
      case 'object':
1574 3
        foreach ($var as $k => $v) {
1575 5
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1576
        }
1577
        break;
1578
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1579
1580 8
        if (false !== strpos($var, "\r")) {
1581 8
          // Workaround https://bugs.php.net/65732
1582 6
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1583 8
        }
1584 5
1585 8
        if (self::is_ascii($var) === false) {
1586
          /** @noinspection PhpUndefinedClassInspection */
1587
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1588 2
            $n = '-';
1589 2
          } else {
1590 8
            /** @noinspection PhpUndefinedClassInspection */
1591
            $n = \Normalizer::normalize($var, $normalization_form);
1592 8
1593 9
            if (isset($n[0])) {
1594
              $var = $n;
1595 9
            } else {
1596
              $var = self::encode('UTF-8', $var, true);
1597
            }
1598
          }
1599
1600
          if (
1601
              $var[0] >= "\x80"
1602
              &&
1603
              isset($n[0], $leading_combining[0])
1604
              &&
1605
              preg_match('/^\p{Mn}/u', $var)
1606
          ) {
1607
            // Prevent leading combining chars
1608
            // for NFC-safe concatenations.
1609
            $var = $leading_combining . $var;
1610
          }
1611
        }
1612
1613
        break;
1614
    }
1615
1616
    return $var;
1617
  }
1618
1619
  /**
1620
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1621
   *
1622
   * Gets a specific external variable by name and optionally filters it
1623
   *
1624
   * @link  http://php.net/manual/en/function.filter-input.php
1625
   *
1626
   * @param int    $type          <p>
1627
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1628
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1629
   *                              <b>INPUT_ENV</b>.
1630
   *                              </p>
1631
   * @param string $variable_name <p>
1632
   *                              Name of a variable to get.
1633
   *                              </p>
1634
   * @param int    $filter        [optional] <p>
1635
   *                              The ID of the filter to apply. The
1636
   *                              manual page lists the available filters.
1637
   *                              </p>
1638
   * @param mixed  $options       [optional] <p>
1639
   *                              Associative array of options or bitwise disjunction of flags. If filter
1640
   *                              accepts options, flags can be provided in "flags" field of array.
1641
   *                              </p>
1642
   *
1643
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1644
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1645
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1646
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1647
   * @since 5.2.0
1648
   */
1649 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1650
  {
1651
    if (4 > func_num_args()) {
1652
      $var = filter_input($type, $variable_name, $filter);
1653
    } else {
1654
      $var = filter_input($type, $variable_name, $filter, $options);
1655
    }
1656
1657
    return self::filter($var);
1658
  }
1659
1660
  /**
1661
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1662
   *
1663
   * Gets external variables and optionally filters them
1664
   *
1665
   * @link  http://php.net/manual/en/function.filter-input-array.php
1666
   *
1667
   * @param int   $type       <p>
1668
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1669
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1670
   *                          <b>INPUT_ENV</b>.
1671
   *                          </p>
1672
   * @param mixed $definition [optional] <p>
1673
   *                          An array defining the arguments. A valid key is a string
1674
   *                          containing a variable name and a valid value is either a filter type, or an array
1675
   *                          optionally specifying the filter, flags and options. If the value is an
1676
   *                          array, valid keys are filter which specifies the
1677
   *                          filter type,
1678
   *                          flags which specifies any flags that apply to the
1679
   *                          filter, and options which specifies any options that
1680
   *                          apply to the filter. See the example below for a better understanding.
1681
   *                          </p>
1682
   *                          <p>
1683
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1684
   *                          input array are filtered by this filter.
1685
   *                          </p>
1686
   * @param bool  $add_empty  [optional] <p>
1687
   *                          Add missing keys as <b>NULL</b> to the return value.
1688
   *                          </p>
1689
   *
1690
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1691
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1692
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1693
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1694
   * fails.
1695
   * @since 5.2.0
1696
   */
1697 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1698
  {
1699
    if (2 > func_num_args()) {
1700
      $a = filter_input_array($type);
1701
    } else {
1702
      $a = filter_input_array($type, $definition, $add_empty);
1703
    }
1704
1705
    return self::filter($a);
1706
  }
1707
1708
  /**
1709
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
   *
1711
   * Filters a variable with a specified filter
1712
   *
1713
   * @link  http://php.net/manual/en/function.filter-var.php
1714
   *
1715
   * @param mixed $variable <p>
1716
   *                        Value to filter.
1717
   *                        </p>
1718
   * @param int   $filter   [optional] <p>
1719
   *                        The ID of the filter to apply. The
1720
   *                        manual page lists the available filters.
1721
   *                        </p>
1722
   * @param mixed $options  [optional] <p>
1723
   *                        Associative array of options or bitwise disjunction of flags. If filter
1724
   *                        accepts options, flags can be provided in "flags" field of array. For
1725
   *                        the "callback" filter, callable type should be passed. The
1726
   *                        callback must accept one argument, the value to be filtered, and return
1727
   *                        the value after filtering/sanitizing it.
1728
   *                        </p>
1729
   *                        <p>
1730
   *                        <code>
1731
   *                        // for filters that accept options, use this format
1732
   *                        $options = array(
1733
   *                        'options' => array(
1734
   *                        'default' => 3, // value to return if the filter fails
1735
   *                        // other options here
1736
   *                        'min_range' => 0
1737
   *                        ),
1738
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1739
   *                        );
1740
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1741
   *                        // for filter that only accept flags, you can pass them directly
1742
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1743
   *                        // for filter that only accept flags, you can also pass as an array
1744
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1745
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1746 1
   *                        // callback validate filter
1747
   *                        function foo($value)
1748 1
   *                        {
1749 1
   *                        // Expected format: Surname, GivenNames
1750 1
   *                        if (strpos($value, ", ") === false) return false;
1751 1
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1752
   *                        $empty = (empty($surname) || empty($givennames));
1753
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1754 1
   *                        if ($empty || $notstrings) {
1755
   *                        return false;
1756
   *                        } else {
1757
   *                        return $value;
1758
   *                        }
1759
   *                        }
1760
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1761
   *                        </code>
1762
   *                        </p>
1763
   *
1764
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1765
   * @since 5.2.0
1766
   */
1767 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1768
  {
1769
    if (3 > func_num_args()) {
1770
      $variable = filter_var($variable, $filter);
1771
    } else {
1772
      $variable = filter_var($variable, $filter, $options);
1773
    }
1774
1775
    return self::filter($variable);
1776
  }
1777
1778
  /**
1779
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1780
   *
1781
   * Gets multiple variables and optionally filters them
1782
   *
1783
   * @link  http://php.net/manual/en/function.filter-var-array.php
1784
   *
1785
   * @param array $data       <p>
1786
   *                          An array with string keys containing the data to filter.
1787
   *                          </p>
1788
   * @param mixed $definition [optional] <p>
1789
   *                          An array defining the arguments. A valid key is a string
1790
   *                          containing a variable name and a valid value is either a
1791 1
   *                          filter type, or an
1792
   *                          array optionally specifying the filter, flags and options.
1793 1
   *                          If the value is an array, valid keys are filter
1794 1
   *                          which specifies the filter type,
1795 1
   *                          flags which specifies any flags that apply to the
1796 1
   *                          filter, and options which specifies any options that
1797
   *                          apply to the filter. See the example below for a better understanding.
1798
   *                          </p>
1799 1
   *                          <p>
1800
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1801
   *                          input array are filtered by this filter.
1802
   *                          </p>
1803
   * @param bool  $add_empty  [optional] <p>
1804
   *                          Add missing keys as <b>NULL</b> to the return value.
1805
   *                          </p>
1806
   *
1807
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1808
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1809
   * the variable is not set.
1810 1
   * @since 5.2.0
1811
   */
1812 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1813
  {
1814
    if (2 > func_num_args()) {
1815
      $a = filter_var_array($data);
1816
    } else {
1817
      $a = filter_var_array($data, $definition, $add_empty);
1818
    }
1819
1820
    return self::filter($a);
1821
  }
1822
1823
  /**
1824
   * Check if the number of unicode characters are not more than the specified integer.
1825
   *
1826
   * @param string $str      The original string to be checked.
1827
   * @param int    $box_size The size in number of chars to be checked against string.
1828 26
   *
1829
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1830
   */
1831 26
  public static function fits_inside($str, $box_size)
1832
  {
1833 26
    return (self::strlen($str) <= $box_size);
1834 2
  }
1835
1836
  /**
1837 26
   * Try to fix simple broken UTF-8 strings.
1838 26
   *
1839
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1840 26
   *
1841 1
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1842 1
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1843 1
   * See: http://en.wikipedia.org/wiki/Windows-1252
1844
   *
1845 26
   * @param string $str <p>The input string</p>
1846
   *
1847
   * @return string
1848
   */
1849 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1850
  {
1851
    // init
1852
    $str = (string)$str;
1853
1854
    if (!isset($str[0])) {
1855
      return '';
1856 1
    }
1857
1858 1
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1859
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1860
1861 1
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1862
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1863
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1864 1
    }
1865 1
1866
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1867 1
  }
1868
1869
  /**
1870 1
   * Fix a double (or multiple) encoded UTF8 string.
1871 1
   *
1872 1
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1873 1
   *
1874 1
   * @return string|string[] <p>Will return the fixed input-"array" or
1875 1
   *                         the fixed input-"string".</p>
1876 1
   */
1877
  public static function fix_utf8($str)
1878 1
  {
1879
    if (is_array($str) === true) {
1880
1881
      /** @noinspection ForeachSourceInspection */
1882
      foreach ($str as $k => $v) {
1883
        /** @noinspection AlterInForeachInspection */
1884
        /** @noinspection OffsetOperationsInspection */
1885
        $str[$k] = self::fix_utf8($v);
1886
      }
1887
1888 1
      return $str;
1889
    }
1890 1
1891
    $last = '';
1892
    while ($last !== $str) {
1893
      $last = $str;
1894 1
      $str = self::to_utf8(
1895
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1894 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1896
      );
1897
    }
1898
1899
    return $str;
1900
  }
1901
1902
  /**
1903
   * Get character of a specific character.
1904
   *
1905
   * @param string $char
1906
   *
1907
   * @return string <p>'RTL' or 'LTR'</p>
1908
   */
1909
  public static function getCharDirection($char)
1910
  {
1911
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1912 1
      self::checkForSupport();
1913
    }
1914 1
1915 1
    if (self::$SUPPORT['intlChar'] === true) {
1916
      $tmpReturn = \IntlChar::charDirection($char);
1917
1918 1
      // from "IntlChar"-Class
1919
      $charDirection = array(
1920 1
          'RTL' => array(1, 13, 14, 15, 21),
1921 1
          'LTR' => array(0, 11, 12, 20),
1922 1
      );
1923 1
1924 1
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1925 1
        return 'LTR';
1926 1
      }
1927 1
1928 1
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1929 1
        return 'RTL';
1930 1
      }
1931 1
    }
1932 1
1933 1
    $c = static::chr_to_decimal($char);
1934 1
1935 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1936 1
      return 'LTR';
1937 1
    }
1938 1
1939 1
    if (0x85e >= $c) {
1940 1
1941 1
      if (0x5be === $c ||
1942 1
          0x5c0 === $c ||
1943 1
          0x5c3 === $c ||
1944 1
          0x5c6 === $c ||
1945 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1946 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1947 1
          0x608 === $c ||
1948 1
          0x60b === $c ||
1949
          0x60d === $c ||
1950 1
          0x61b === $c ||
1951 1
          (0x61e <= $c && 0x64a >= $c) ||
1952
          (0x66d <= $c && 0x66f >= $c) ||
1953
          (0x671 <= $c && 0x6d5 >= $c) ||
1954 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1955
          (0x6ee <= $c && 0x6ef >= $c) ||
1956
          (0x6fa <= $c && 0x70d >= $c) ||
1957
          0x710 === $c ||
1958 1
          (0x712 <= $c && 0x72f >= $c) ||
1959
          (0x74d <= $c && 0x7a5 >= $c) ||
1960 1
          0x7b1 === $c ||
1961 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1962 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1963 1
          0x7fa === $c ||
1964 1
          (0x800 <= $c && 0x815 >= $c) ||
1965 1
          0x81a === $c ||
1966 1
          0x824 === $c ||
1967 1
          0x828 === $c ||
1968 1
          (0x830 <= $c && 0x83e >= $c) ||
1969 1
          (0x840 <= $c && 0x858 >= $c) ||
1970 1
          0x85e === $c
1971 1
      ) {
1972 1
        return 'RTL';
1973 1
      }
1974 1
1975 1
    } elseif (0x200f === $c) {
1976 1
1977 1
      return 'RTL';
1978 1
1979 1
    } elseif (0xfb1d <= $c) {
1980 1
1981 1
      if (0xfb1d === $c ||
1982 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1983 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1984 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1985 1
          0xfb3e === $c ||
1986 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1987 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1988 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1989 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1990 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1991 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1992 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1993 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1994
          (0xfe76 <= $c && 0xfefc >= $c) ||
1995 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1996 1
          0x10808 === $c ||
1997
          (0x1080a <= $c && 0x10835 >= $c) ||
1998
          (0x10837 <= $c && 0x10838 >= $c) ||
1999
          0x1083c === $c ||
2000 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2001
          (0x10857 <= $c && 0x1085f >= $c) ||
2002
          (0x10900 <= $c && 0x1091b >= $c) ||
2003
          (0x10920 <= $c && 0x10939 >= $c) ||
2004
          0x1093f === $c ||
2005
          0x10a00 === $c ||
2006
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2007
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2008
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2009
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2010 4
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2011
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2012 4
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2013 4
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2014
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2015 4
          (0x10b78 <= $c && 0x10b7f >= $c)
2016
      ) {
2017
        return 'RTL';
2018 1
      }
2019
    }
2020
2021
    return 'LTR';
2022
  }
2023
2024
  /**
2025
   * get data from "/data/*.ser"
2026
   *
2027
   * @param string $file
2028
   *
2029
   * @return bool|string|array|int <p>Will return false on error.</p>
2030 7
   */
2031
  private static function getData($file)
2032 7
  {
2033
    $file = __DIR__ . '/data/' . $file . '.php';
2034
    if (file_exists($file)) {
2035
      /** @noinspection PhpIncludeInspection */
2036 7
      return require $file;
2037 2
    }
2038
2039
    return false;
2040 5
  }
2041
2042
  /**
2043
   * Check for php-support.
2044 5
   *
2045
   * @param string|null $key
2046
   *
2047
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2048
   *               return bool-value, if $key is used and available<br>
2049
   *               otherwise return null</p>
2050
   */
2051
  public static function getSupportInfo($key = null)
2052
  {
2053
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2054
      self::checkForSupport();
2055
    }
2056
2057
    if ($key === null) {
2058
      return self::$SUPPORT;
2059
    }
2060
2061
    if (!isset(self::$SUPPORT[$key])) {
2062
      return null;
2063
    }
2064
2065
    return self::$SUPPORT[$key];
2066
  }
2067
2068
  /**
2069
   * alias for "UTF8::string_has_bom()"
2070 2
   *
2071
   * @see UTF8::string_has_bom()
2072 2
   *
2073
   * @param string $str
2074
   *
2075
   * @return bool
2076
   *
2077
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2078
   */
2079
  public static function hasBom($str)
2080
  {
2081
    return self::string_has_bom($str);
2082
  }
2083
2084 1
  /**
2085
   * Converts a hexadecimal-value into an UTF-8 character.
2086 1
   *
2087
   * @param string $hexdec <p>The hexadecimal value.</p>
2088 1
   *
2089 1
   * @return string|false <p>One single UTF-8 character.</p>
2090
   */
2091
  public static function hex_to_chr($hexdec)
2092 1
  {
2093 1
    return self::decimal_to_chr(hexdec($hexdec));
2094
  }
2095
2096 1
  /**
2097
   * Converts hexadecimal U+xxxx code point representation to integer.
2098
   *
2099
   * INFO: opposite to UTF8::int_to_hex()
2100
   *
2101
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2102
   *
2103
   * @return int|false <p>The code point, or false on failure.</p>
2104
   */
2105
  public static function hex_to_int($hexDec)
2106
  {
2107
    $hexDec = (string)$hexDec;
2108
2109
    if (!isset($hexDec[0])) {
2110 1
      return false;
2111
    }
2112 1
2113
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2114
      return intval($match[1], 16);
2115
    }
2116
2117
    return false;
2118
  }
2119
2120
  /**
2121
   * alias for "UTF8::html_entity_decode()"
2122
   *
2123
   * @see UTF8::html_entity_decode()
2124
   *
2125
   * @param string $str
2126 2
   * @param int    $flags
2127
   * @param string $encoding
2128
   *
2129 2
   * @return string
2130
   */
2131 2
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2132 1
  {
2133
    return self::html_entity_decode($str, $flags, $encoding);
2134
  }
2135 2
2136 1
  /**
2137 1
   * Converts a UTF-8 string to a series of HTML numbered entities.
2138
   *
2139
   * INFO: opposite to UTF8::html_decode()
2140 2
   *
2141
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2142 2
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2143 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2144 1
   *
2145 1
   * @return string <p>HTML numbered entities.</p>
2146
   */
2147 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2148 2
  {
2149 2
    // init
2150
    $str = (string)$str;
2151 2
2152
    if (!isset($str[0])) {
2153
      return '';
2154
    }
2155
2156
    if ($encoding !== 'UTF-8') {
2157
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2158
    }
2159
2160
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2161
    if (function_exists('mb_encode_numericentity')) {
2162
2163
      $startCode = 0x00;
2164
      if ($keepAsciiChars === true) {
2165
        $startCode = 0x80;
2166
      }
2167
2168
      return mb_encode_numericentity(
2169
          $str,
2170
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2171
          $encoding
2172
      );
2173
    }
2174
2175
    return implode(
2176
        '',
2177
        array_map(
2178
            function ($data) use ($keepAsciiChars, $encoding) {
2179
              return self::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2180
            },
2181
            self::split($str)
2182
        )
2183
    );
2184
  }
2185
2186
  /**
2187
   * UTF-8 version of html_entity_decode()
2188
   *
2189
   * The reason we are not using html_entity_decode() by itself is because
2190
   * while it is not technically correct to leave out the semicolon
2191
   * at the end of an entity most browsers will still interpret the entity
2192
   * correctly. html_entity_decode() does not convert entities without
2193
   * semicolons, so we are left with our own little solution here. Bummer.
2194
   *
2195
   * Convert all HTML entities to their applicable characters
2196
   *
2197
   * INFO: opposite to UTF8::html_encode()
2198
   *
2199
   * @link http://php.net/manual/en/function.html-entity-decode.php
2200
   *
2201
   * @param string $str      <p>
2202
   *                         The input string.
2203
   *                         </p>
2204
   * @param int    $flags    [optional] <p>
2205
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2206
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2207
   *                         <table>
2208
   *                         Available <i>flags</i> constants
2209
   *                         <tr valign="top">
2210
   *                         <td>Constant Name</td>
2211
   *                         <td>Description</td>
2212
   *                         </tr>
2213
   *                         <tr valign="top">
2214
   *                         <td><b>ENT_COMPAT</b></td>
2215
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2216
   *                         </tr>
2217
   *                         <tr valign="top">
2218
   *                         <td><b>ENT_QUOTES</b></td>
2219
   *                         <td>Will convert both double and single quotes.</td>
2220
   *                         </tr>
2221
   *                         <tr valign="top">
2222
   *                         <td><b>ENT_NOQUOTES</b></td>
2223
   *                         <td>Will leave both double and single quotes unconverted.</td>
2224
   *                         </tr>
2225
   *                         <tr valign="top">
2226
   *                         <td><b>ENT_HTML401</b></td>
2227
   *                         <td>
2228
   *                         Handle code as HTML 4.01.
2229
   *                         </td>
2230
   *                         </tr>
2231
   *                         <tr valign="top">
2232
   *                         <td><b>ENT_XML1</b></td>
2233
   *                         <td>
2234 16
   *                         Handle code as XML 1.
2235
   *                         </td>
2236
   *                         </tr>
2237 16
   *                         <tr valign="top">
2238
   *                         <td><b>ENT_XHTML</b></td>
2239 16
   *                         <td>
2240 5
   *                         Handle code as XHTML.
2241
   *                         </td>
2242
   *                         </tr>
2243 16
   *                         <tr valign="top">
2244 9
   *                         <td><b>ENT_HTML5</b></td>
2245
   *                         <td>
2246
   *                         Handle code as HTML 5.
2247
   *                         </td>
2248 15
   *                         </tr>
2249 15
   *                         </table>
2250
   *                         </p>
2251 15
   * @param string $encoding [optional] <p>Encoding to use.</p>
2252 15
   *
2253 9
   * @return string <p>The decoded string.</p>
2254 9
   */
2255 15
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2256 8
  {
2257
    // init
2258
    $str = (string)$str;
2259 15
2260 2
    if (!isset($str[0])) {
2261 2
      return '';
2262
    }
2263 15
2264 5
    if (!isset($str[3])) { // examples: &; || &x;
2265
      return $str;
2266
    }
2267 5
2268
    if (
2269 5
        strpos($str, '&') === false
2270
        ||
2271
        (
2272 15
            strpos($str, '&#') === false
2273
            &&
2274 15
            strpos($str, ';') === false
2275 15
        )
2276
    ) {
2277 13
      return $str;
2278
    }
2279 13
2280 13
    if ($encoding !== 'UTF-8') {
2281
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2282
    }
2283 6
2284 15
    if ($flags === null) {
2285
      if (Bootup::is_php('5.4') === true) {
2286 15
        $flags = ENT_QUOTES | ENT_HTML5;
2287
      } else {
2288
        $flags = ENT_QUOTES;
2289 15
      }
2290 15
    }
2291 15
2292 View Code Duplication
    if (
2293 15
        $encoding !== 'UTF-8'
2294
        &&
2295 15
        $encoding !== 'WINDOWS-1252'
2296
        &&
2297 15
        self::$SUPPORT['mbstring'] === false
2298
    ) {
2299
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2300
    }
2301
2302
    do {
2303
      $str_compare = $str;
2304
2305
      $str = preg_replace_callback(
2306
          "/&#\d{2,6};/",
2307
          function ($matches) use ($encoding) {
2308
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2309
2310
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2311
              return $returnTmp;
2312
            }
2313
2314
            return $matches[0];
2315
          },
2316
          $str
2317
      );
2318
2319
      // decode numeric & UTF16 two byte entities
2320
      $str = html_entity_decode(
2321
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2322
          $flags,
2323
          $encoding
2324
      );
2325
2326
    } while ($str_compare !== $str);
2327
2328
    return $str;
2329
  }
2330
2331
  /**
2332
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2333
   *
2334
   * @link http://php.net/manual/en/function.htmlentities.php
2335
   *
2336
   * @param string $str           <p>
2337
   *                              The input string.
2338
   *                              </p>
2339
   * @param int    $flags         [optional] <p>
2340
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2341
   *                              invalid code unit sequences and the used document type. The default is
2342
   *                              ENT_COMPAT | ENT_HTML401.
2343
   *                              <table>
2344
   *                              Available <i>flags</i> constants
2345
   *                              <tr valign="top">
2346
   *                              <td>Constant Name</td>
2347
   *                              <td>Description</td>
2348
   *                              </tr>
2349
   *                              <tr valign="top">
2350
   *                              <td><b>ENT_COMPAT</b></td>
2351
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_QUOTES</b></td>
2355
   *                              <td>Will convert both double and single quotes.</td>
2356
   *                              </tr>
2357
   *                              <tr valign="top">
2358
   *                              <td><b>ENT_NOQUOTES</b></td>
2359
   *                              <td>Will leave both double and single quotes unconverted.</td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_IGNORE</b></td>
2363
   *                              <td>
2364
   *                              Silently discard invalid code unit sequences instead of returning
2365
   *                              an empty string. Using this flag is discouraged as it
2366
   *                              may have security implications.
2367
   *                              </td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2371
   *                              <td>
2372
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2373
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2374
   *                              </td>
2375
   *                              </tr>
2376
   *                              <tr valign="top">
2377
   *                              <td><b>ENT_DISALLOWED</b></td>
2378
   *                              <td>
2379
   *                              Replace invalid code points for the given document type with a
2380
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2381
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2382
   *                              instance, to ensure the well-formedness of XML documents with
2383
   *                              embedded external content.
2384
   *                              </td>
2385
   *                              </tr>
2386
   *                              <tr valign="top">
2387
   *                              <td><b>ENT_HTML401</b></td>
2388
   *                              <td>
2389
   *                              Handle code as HTML 4.01.
2390
   *                              </td>
2391
   *                              </tr>
2392
   *                              <tr valign="top">
2393
   *                              <td><b>ENT_XML1</b></td>
2394
   *                              <td>
2395
   *                              Handle code as XML 1.
2396
   *                              </td>
2397
   *                              </tr>
2398
   *                              <tr valign="top">
2399
   *                              <td><b>ENT_XHTML</b></td>
2400
   *                              <td>
2401
   *                              Handle code as XHTML.
2402
   *                              </td>
2403 2
   *                              </tr>
2404
   *                              <tr valign="top">
2405 2
   *                              <td><b>ENT_HTML5</b></td>
2406 1
   *                              <td>
2407 1
   *                              Handle code as HTML 5.
2408
   *                              </td>
2409 2
   *                              </tr>
2410
   *                              </table>
2411
   *                              </p>
2412
   * @param string $encoding      [optional] <p>
2413
   *                              Like <b>htmlspecialchars</b>,
2414
   *                              <b>htmlentities</b> takes an optional third argument
2415
   *                              <i>encoding</i> which defines encoding used in
2416
   *                              conversion.
2417
   *                              Although this argument is technically optional, you are highly
2418
   *                              encouraged to specify the correct value for your code.
2419 2
   *                              </p>
2420
   * @param bool   $double_encode [optional] <p>
2421 2
   *                              When <i>double_encode</i> is turned off PHP will not
2422 1
   *                              encode existing html entities. The default is to convert everything.
2423
   *                              </p>
2424
   *
2425 2
   *
2426 2
   * @return string the encoded string.
2427 2
   * </p>
2428 2
   * <p>
2429 2
   * If the input <i>string</i> contains an invalid code unit
2430 1
   * sequence within the given <i>encoding</i> an empty string
2431
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2432 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2433 1
   */
2434 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2435 1
  {
2436 1
    if ($encoding !== 'UTF-8') {
2437 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2438
    }
2439 2
2440
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2441
2442
    /**
2443
     * PHP doesn't replace a backslash to its html entity since this is something
2444
     * that's mostly used to escape characters when inserting in a database. Since
2445
     * we're using a decent database layer, we don't need this shit and we're replacing
2446
     * the double backslashes by its' html entity equivalent.
2447
     *
2448
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2449
     */
2450
    $str = str_replace('\\', '&#92;', $str);
2451
2452
    if ($encoding !== 'UTF-8') {
2453
      return $str;
2454
    }
2455
2456
    $byteLengths = self::chr_size_list($str);
2457
    $search = array();
2458
    $replacements = array();
2459
    foreach ($byteLengths as $counter => $byteLength) {
2460
      if ($byteLength >= 3) {
2461
        $char = self::access($str, $counter);
2462
2463
        if (!isset($replacements[$char])) {
2464
          $search[$char] = $char;
2465
          $replacements[$char] = self::html_encode($char);
2466
        }
2467
      }
2468
    }
2469
2470
    return str_replace($search, $replacements, $str);
2471
  }
2472
2473
  /**
2474
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2475
   *
2476
   * INFO: Take a look at "UTF8::htmlentities()"
2477
   *
2478
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2479
   *
2480
   * @param string $str           <p>
2481
   *                              The string being converted.
2482
   *                              </p>
2483
   * @param int    $flags         [optional] <p>
2484
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2485
   *                              invalid code unit sequences and the used document type. The default is
2486
   *                              ENT_COMPAT | ENT_HTML401.
2487
   *                              <table>
2488
   *                              Available <i>flags</i> constants
2489
   *                              <tr valign="top">
2490
   *                              <td>Constant Name</td>
2491
   *                              <td>Description</td>
2492
   *                              </tr>
2493
   *                              <tr valign="top">
2494
   *                              <td><b>ENT_COMPAT</b></td>
2495
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2496
   *                              </tr>
2497
   *                              <tr valign="top">
2498
   *                              <td><b>ENT_QUOTES</b></td>
2499
   *                              <td>Will convert both double and single quotes.</td>
2500
   *                              </tr>
2501
   *                              <tr valign="top">
2502
   *                              <td><b>ENT_NOQUOTES</b></td>
2503
   *                              <td>Will leave both double and single quotes unconverted.</td>
2504
   *                              </tr>
2505
   *                              <tr valign="top">
2506
   *                              <td><b>ENT_IGNORE</b></td>
2507
   *                              <td>
2508
   *                              Silently discard invalid code unit sequences instead of returning
2509
   *                              an empty string. Using this flag is discouraged as it
2510
   *                              may have security implications.
2511
   *                              </td>
2512
   *                              </tr>
2513
   *                              <tr valign="top">
2514
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2515
   *                              <td>
2516
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2517
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2518
   *                              </td>
2519
   *                              </tr>
2520
   *                              <tr valign="top">
2521
   *                              <td><b>ENT_DISALLOWED</b></td>
2522
   *                              <td>
2523
   *                              Replace invalid code points for the given document type with a
2524
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2525
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2526
   *                              instance, to ensure the well-formedness of XML documents with
2527
   *                              embedded external content.
2528
   *                              </td>
2529
   *                              </tr>
2530
   *                              <tr valign="top">
2531
   *                              <td><b>ENT_HTML401</b></td>
2532
   *                              <td>
2533
   *                              Handle code as HTML 4.01.
2534
   *                              </td>
2535
   *                              </tr>
2536
   *                              <tr valign="top">
2537
   *                              <td><b>ENT_XML1</b></td>
2538
   *                              <td>
2539
   *                              Handle code as XML 1.
2540
   *                              </td>
2541
   *                              </tr>
2542
   *                              <tr valign="top">
2543
   *                              <td><b>ENT_XHTML</b></td>
2544
   *                              <td>
2545
   *                              Handle code as XHTML.
2546
   *                              </td>
2547
   *                              </tr>
2548
   *                              <tr valign="top">
2549
   *                              <td><b>ENT_HTML5</b></td>
2550
   *                              <td>
2551 1
   *                              Handle code as HTML 5.
2552
   *                              </td>
2553 1
   *                              </tr>
2554 1
   *                              </table>
2555 1
   *                              </p>
2556
   * @param string $encoding      [optional] <p>
2557 1
   *                              Defines encoding used in conversion.
2558
   *                              </p>
2559
   *                              <p>
2560
   *                              For the purposes of this function, the encodings
2561
   *                              ISO-8859-1, ISO-8859-15,
2562
   *                              UTF-8, cp866,
2563
   *                              cp1251, cp1252, and
2564
   *                              KOI8-R are effectively equivalent, provided the
2565 1
   *                              <i>string</i> itself is valid for the encoding, as
2566
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2567 1
   *                              the same positions in all of these encodings.
2568
   *                              </p>
2569
   * @param bool   $double_encode [optional] <p>
2570
   *                              When <i>double_encode</i> is turned off PHP will not
2571 1
   *                              encode existing html entities, the default is to convert everything.
2572
   *                              </p>
2573 1
   *
2574 1
   * @return string The converted string.
2575 1
   * </p>
2576 1
   * <p>
2577
   * If the input <i>string</i> contains an invalid code unit
2578 1
   * sequence within the given <i>encoding</i> an empty string
2579
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2580
   * <b>ENT_SUBSTITUTE</b> flags are set.
2581
   */
2582
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2583
  {
2584
    if ($encoding !== 'UTF-8') {
2585
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2586
    }
2587
2588
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2589
  }
2590 2
2591
  /**
2592 2
   * Checks whether iconv is available on the server.
2593
   *
2594
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2595
   */
2596
  public static function iconv_loaded()
2597
  {
2598
    $return = extension_loaded('iconv') ? true : false;
2599
2600
    // INFO: "default_charset" is already set by the "Bootup"-class
2601
2602
    if (Bootup::is_php('5.6') === false) {
2603
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2604
      iconv_set_encoding('input_encoding', 'UTF-8');
2605 3
      iconv_set_encoding('output_encoding', 'UTF-8');
2606
      iconv_set_encoding('internal_encoding', 'UTF-8');
2607 3
    }
2608 3
2609
    return $return;
2610 3
  }
2611
2612 3
  /**
2613
   * alias for "UTF8::decimal_to_chr()"
2614
   *
2615 1
   * @see UTF8::decimal_to_chr()
2616
   *
2617
   * @param mixed $int
2618
   *
2619
   * @return string
2620
   */
2621
  public static function int_to_chr($int)
2622
  {
2623 1
    return self::decimal_to_chr($int);
2624
  }
2625
2626 1
  /**
2627 1
   * Converts Integer to hexadecimal U+xxxx code point representation.
2628
   *
2629 1
   * INFO: opposite to UTF8::hex_to_int()
2630
   *
2631
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2632
   * @param string $pfix [optional]
2633
   *
2634
   * @return string <p>The code point, or empty string on failure.</p>
2635
   */
2636
  public static function int_to_hex($int, $pfix = 'U+')
2637 4
  {
2638
    if ((int)$int === $int) {
2639 4
      $hex = dechex($int);
2640
2641
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2642
2643
      return $pfix . $hex;
2644
    }
2645
2646
    return '';
2647
  }
2648
2649
  /**
2650
   * Checks whether intl-char is available on the server.
2651
   *
2652
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2653
   */
2654
  public static function intlChar_loaded()
2655
  {
2656
    return (
2657
        Bootup::is_php('7.0') === true
2658
        &&
2659
        class_exists('IntlChar') === true
2660
    );
2661
  }
2662
2663
  /**
2664
   * Checks whether intl is available on the server.
2665
   *
2666
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2667
   */
2668
  public static function intl_loaded()
2669
  {
2670
    return extension_loaded('intl') ? true : false;
2671
  }
2672
2673
  /**
2674
   * alias for "UTF8::is_ascii()"
2675
   *
2676
   * @see UTF8::is_ascii()
2677
   *
2678
   * @param string $str
2679
   *
2680
   * @return boolean
2681
   *
2682
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2683
   */
2684
  public static function isAscii($str)
2685
  {
2686
    return self::is_ascii($str);
2687
  }
2688
2689
  /**
2690
   * alias for "UTF8::is_base64()"
2691
   *
2692
   * @see UTF8::is_base64()
2693
   *
2694
   * @param string $str
2695
   *
2696
   * @return bool
2697
   *
2698
   * @deprecated <p>use "UTF8::is_base64()"</p>
2699
   */
2700
  public static function isBase64($str)
2701
  {
2702
    return self::is_base64($str);
2703
  }
2704
2705
  /**
2706
   * alias for "UTF8::is_binary()"
2707
   *
2708
   * @see UTF8::is_binary()
2709
   *
2710
   * @param string $str
2711
   *
2712
   * @return bool
2713
   *
2714
   * @deprecated <p>use "UTF8::is_binary()"</p>
2715
   */
2716
  public static function isBinary($str)
2717
  {
2718
    return self::is_binary($str);
2719
  }
2720
2721
  /**
2722
   * alias for "UTF8::is_bom()"
2723
   *
2724
   * @see UTF8::is_bom()
2725
   *
2726
   * @param string $utf8_chr
2727
   *
2728
   * @return boolean
2729
   *
2730
   * @deprecated <p>use "UTF8::is_bom()"</p>
2731
   */
2732
  public static function isBom($utf8_chr)
2733
  {
2734
    return self::is_bom($utf8_chr);
2735
  }
2736
2737
  /**
2738
   * alias for "UTF8::is_html()"
2739
   *
2740
   * @see UTF8::is_html()
2741
   *
2742
   * @param string $str
2743
   *
2744
   * @return boolean
2745
   *
2746
   * @deprecated <p>use "UTF8::is_html()"</p>
2747
   */
2748
  public static function isHtml($str)
2749
  {
2750
    return self::is_html($str);
2751
  }
2752
2753
  /**
2754
   * alias for "UTF8::is_json()"
2755
   *
2756
   * @see UTF8::is_json()
2757
   *
2758
   * @param string $str
2759
   *
2760
   * @return bool
2761
   *
2762
   * @deprecated <p>use "UTF8::is_json()"</p>
2763
   */
2764
  public static function isJson($str)
2765
  {
2766
    return self::is_json($str);
2767
  }
2768
2769
  /**
2770
   * alias for "UTF8::is_utf16()"
2771
   *
2772
   * @see UTF8::is_utf16()
2773
   *
2774
   * @param string $str
2775
   *
2776
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2777
   *
2778
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2779
   */
2780
  public static function isUtf16($str)
2781
  {
2782
    return self::is_utf16($str);
2783
  }
2784
2785
  /**
2786
   * alias for "UTF8::is_utf32()"
2787
   *
2788
   * @see UTF8::is_utf32()
2789
   *
2790
   * @param string $str
2791
   *
2792
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2793
   *
2794
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2795
   */
2796
  public static function isUtf32($str)
2797 53
  {
2798
    return self::is_utf32($str);
2799 53
  }
2800
2801 53
  /**
2802 6
   * alias for "UTF8::is_utf8()"
2803
   *
2804
   * @see UTF8::is_utf8()
2805 52
   *
2806
   * @param string $str
2807
   * @param bool   $strict
2808
   *
2809
   * @return bool
2810
   *
2811
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2812
   */
2813
  public static function isUtf8($str, $strict = false)
2814
  {
2815 1
    return self::is_utf8($str, $strict);
2816
  }
2817 1
2818
  /**
2819 1
   * Checks if a string is 7 bit ASCII.
2820 1
   *
2821
   * @param string $str <p>The string to check.</p>
2822
   *
2823 1
   * @return bool <p>
2824 1
   *              <strong>true</strong> if it is ASCII<br>
2825 1
   *              <strong>false</strong> otherwise
2826
   *              </p>
2827
   */
2828 1
  public static function is_ascii($str)
2829
  {
2830
    $str = (string)$str;
2831
2832
    if (!isset($str[0])) {
2833
      return true;
2834
    }
2835
2836
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2837
  }
2838 16
2839
  /**
2840 16
   * Returns true if the string is base64 encoded, false otherwise.
2841
   *
2842 16
   * @param string $str <p>The input string.</p>
2843 4
   *
2844
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2845
   */
2846 16
  public static function is_base64($str)
2847 4
  {
2848
    $str = (string)$str;
2849
2850 16
    if (!isset($str[0])) {
2851 16
      return false;
2852 5
    }
2853
2854
    $base64String = (string)base64_decode($str, true);
2855 15
    if ($base64String && base64_encode($base64String) === $str) {
2856 1
      return true;
2857
    }
2858
2859 15
    return false;
2860
  }
2861
2862
  /**
2863
   * Check if the input is binary... (is look like a hack).
2864
   *
2865
   * @param mixed $input
2866
   *
2867
   * @return bool
2868
   */
2869
  public static function is_binary($input)
2870
  {
2871
    $input = (string)$input;
2872
2873
    if (!isset($input[0])) {
2874
      return false;
2875
    }
2876
2877
    if (preg_match('~^[01]+$~', $input)) {
2878
      return true;
2879
    }
2880
2881
    $testLength = strlen($input);
2882
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2883
      return true;
2884
    }
2885
2886
    if (substr_count($input, "\x00") > 0) {
2887
      return true;
2888
    }
2889
2890
    return false;
2891 1
  }
2892
2893 1
  /**
2894 1
   * Check if the file is binary.
2895 1
   *
2896
   * @param string $file
2897 1
   *
2898
   * @return boolean
2899 1
   */
2900
  public static function is_binary_file($file)
2901
  {
2902
    try {
2903
      $fp = fopen($file, 'rb');
2904
      $block = fread($fp, 512);
2905
      fclose($fp);
2906
    } catch (\Exception $e) {
2907
      $block = '';
2908
    }
2909 1
2910
    return self::is_binary($block);
2911 1
  }
2912
2913 1
  /**
2914 1
   * Checks if the given string is equal to any "Byte Order Mark".
2915
   *
2916
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2917
   *
2918 1
   * @param string $str <p>The input string.</p>
2919
   *
2920 1
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2921
   */
2922 1
  public static function is_bom($str)
2923 1
  {
2924
    foreach (self::$BOM as $bomString => $bomByteLength) {
2925
      if ($str === $bomString) {
2926 1
        return true;
2927
      }
2928
    }
2929
2930
    return false;
2931
  }
2932
2933
  /**
2934
   * Check if the string contains any html-tags <lall>.
2935
   *
2936 1
   * @param string $str <p>The input string.</p>
2937
   *
2938 1
   * @return boolean
2939
   */
2940 1
  public static function is_html($str)
2941
  {
2942
    $str = (string)$str;
2943
2944 1
    if (!isset($str[0])) {
2945
      return false;
2946
    }
2947
2948 1
    // init
2949 1
    $matches = array();
2950 1
2951 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2952 1
2953 1
    if (count($matches) === 0) {
2954 1
      return false;
2955 1
    }
2956
2957
    return true;
2958 1
  }
2959
2960
  /**
2961
   * Try to check if "$str" is an json-string.
2962
   *
2963
   * @param string $str <p>The input string.</p>
2964
   *
2965
   * @return bool
2966
   */
2967
  public static function is_json($str)
2968
  {
2969
    $str = (string)$str;
2970
2971
    if (!isset($str[0])) {
2972 5
      return false;
2973
    }
2974 5
2975
    $json = self::json_decode($str);
2976 5
2977
    if (
2978 5
        (
2979 5
            is_object($json) === true
2980 5
            ||
2981 5
            is_array($json) === true
2982 5
        )
2983 5
        &&
2984 5
        json_last_error() === JSON_ERROR_NONE
2985 5
    ) {
2986 4
      return true;
2987 2
    }
2988 2
2989 5
    return false;
2990 5
  }
2991 5
2992
  /**
2993 5
   * Check if the string is UTF-16.
2994 5
   *
2995 5
   * @param string $str <p>The input string.</p>
2996 5
   *
2997 5
   * @return int|false <p>
2998 5
   *                   <strong>false</strong> if is't not UTF-16,<br>
2999 5
   *                   <strong>1</strong> for UTF-16LE,<br>
3000 5
   *                   <strong>2</strong> for UTF-16BE.
3001 4
   *                   </p>
3002 3
   */
3003 3 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3004 5
  {
3005 5
    $str = self::remove_bom($str);
3006 5
3007
    if (self::is_binary($str) === true) {
3008 5
3009 3
      $maybeUTF16LE = 0;
3010 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3011
      if ($test) {
3012
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3013 3
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3014
        if ($test3 === $test) {
3015
          $strChars = self::count_chars($str, true);
3016 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3017
            if (in_array($test3char, $strChars, true) === true) {
3018 3
              $maybeUTF16LE++;
3019
            }
3020
          }
3021
        }
3022
      }
3023
3024
      $maybeUTF16BE = 0;
3025
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3026
      if ($test) {
3027
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3028
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3029
        if ($test3 === $test) {
3030
          $strChars = self::count_chars($str, true);
3031
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3032 3
            if (in_array($test3char, $strChars, true) === true) {
3033
              $maybeUTF16BE++;
3034 3
            }
3035
          }
3036 3
        }
3037
      }
3038 3
3039 3
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3040 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3041 2
          return 1;
3042 2
        }
3043 2
3044 2
        return 2;
3045 2
      }
3046 2
3047 1
    }
3048 1
3049 2
    return false;
3050 2
  }
3051 2
3052
  /**
3053 3
   * Check if the string is UTF-32.
3054 3
   *
3055 3
   * @param string $str
3056 2
   *
3057 2
   * @return int|false <p>
3058 2
   *                   <strong>false</strong> if is't not UTF-32,<br>
3059 2
   *                   <strong>1</strong> for UTF-32LE,<br>
3060 2
   *                   <strong>2</strong> for UTF-32BE.
3061 2
   *                   </p>
3062 1
   */
3063 1 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3064 2
  {
3065 2
    $str = self::remove_bom($str);
3066 2
3067
    if (self::is_binary($str) === true) {
3068 3
3069 1
      $maybeUTF32LE = 0;
3070 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3071
      if ($test) {
3072
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3073 1
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3074
        if ($test3 === $test) {
3075
          $strChars = self::count_chars($str, true);
3076 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3077
            if (in_array($test3char, $strChars, true) === true) {
3078 3
              $maybeUTF32LE++;
3079
            }
3080
          }
3081
        }
3082
      }
3083
3084
      $maybeUTF32BE = 0;
3085
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3086
      if ($test) {
3087
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3088
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3089
        if ($test3 === $test) {
3090
          $strChars = self::count_chars($str, true);
3091 60
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3092
            if (in_array($test3char, $strChars, true) === true) {
3093 60
              $maybeUTF32BE++;
3094
            }
3095 60
          }
3096 3
        }
3097
      }
3098
3099 58
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3100 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3101 1
          return 1;
3102
        }
3103
3104
        return 2;
3105
      }
3106
3107
    }
3108
3109 58
    return false;
3110
  }
3111
3112
  /**
3113
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3114
   *
3115
   * @see    http://hsivonen.iki.fi/php-utf8/
3116
   *
3117
   * @param string $str    <p>The string to be checked.</p>
3118 58
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3119
   *
3120 58
   * @return bool
3121 58
   */
3122
  public static function is_utf8($str, $strict = false)
3123 58
  {
3124
    $str = (string)$str;
3125
3126
    if (!isset($str[0])) {
3127 58
      return true;
3128
    }
3129
3130 58
    if ($strict === true) {
3131
      if (self::is_utf16($str) !== false) {
3132
        return false;
3133
      }
3134 58
3135 58
      if (self::is_utf32($str) !== false) {
3136 58
        return false;
3137
      }
3138
    }
3139 58
3140
    if (self::pcre_utf8_support() !== true) {
3141 52
3142 58
      // If even just the first character can be matched, when the /u
3143
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3144 48
      // invalid, nothing at all will match, even if the string contains
3145 48
      // some valid sequences
3146 48
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3147 48
    }
3148 55
3149
    $mState = 0; // cached expected number of octets after the current octet
3150 29
    // until the beginning of the next UTF8 character sequence
3151 29
    $mUcs4 = 0; // cached Unicode character
3152 29
    $mBytes = 1; // cached expected number of octets in the current sequence
3153 29
3154 46
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3155
      self::checkForSupport();
3156 11
    }
3157 11
3158 11 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3159 11
      $len = \mb_strlen($str, '8BIT');
3160 22
    } else {
3161
      $len = strlen($str);
3162
    }
3163
3164
    /** @noinspection ForeachInvariantsInspection */
3165
    for ($i = 0; $i < $len; $i++) {
3166
      $in = ord($str[$i]);
3167
      if ($mState === 0) {
3168
        // When mState is zero we expect either a US-ASCII character or a
3169 4
        // multi-octet sequence.
3170 4
        if (0 === (0x80 & $in)) {
3171 4
          // US-ASCII, pass straight through.
3172 4
          $mBytes = 1;
3173 12 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3174
          // First octet of 2 octet sequence.
3175 4
          $mUcs4 = $in;
3176 4
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3177 4
          $mState = 1;
3178 4
          $mBytes = 2;
3179 4
        } elseif (0xE0 === (0xF0 & $in)) {
3180
          // First octet of 3 octet sequence.
3181
          $mUcs4 = $in;
3182
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3183 6
          $mState = 2;
3184
          $mBytes = 3;
3185 57 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3186
          // First octet of 4 octet sequence.
3187
          $mUcs4 = $in;
3188 52
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3189
          $mState = 3;
3190 48
          $mBytes = 4;
3191 48
        } elseif (0xF8 === (0xFC & $in)) {
3192 48
          /* First octet of 5 octet sequence.
3193 48
          *
3194
          * This is illegal because the encoded codepoint must be either
3195
          * (a) not the shortest form or
3196
          * (b) outside the Unicode range of 0-0x10FFFF.
3197
          * Rather than trying to resynchronize, we will carry on until the end
3198 48
          * of the sequence and let the later error handling code catch it.
3199
          */
3200
          $mUcs4 = $in;
3201
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3202
          $mState = 4;
3203
          $mBytes = 5;
3204 48 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3205 48
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3206 48
          $mUcs4 = $in;
3207 48
          $mUcs4 = ($mUcs4 & 1) << 30;
3208
          $mState = 5;
3209 48
          $mBytes = 6;
3210
        } else {
3211 48
          /* Current octet is neither in the US-ASCII range nor a legal first
3212 48
           * octet of a multi-octet sequence.
3213 7
           */
3214
          return false;
3215
        }
3216 48
      } else {
3217 48
        // When mState is non-zero, we expect a continuation of the multi-octet
3218 48
        // sequence
3219 48
        if (0x80 === (0xC0 & $in)) {
3220 48
          // Legal continuation.
3221
          $shift = ($mState - 1) * 6;
3222
          $tmp = $in;
3223
          $tmp = ($tmp & 0x0000003F) << $shift;
3224
          $mUcs4 |= $tmp;
3225 26
          /**
3226
           * End of the multi-octet sequence. mUcs4 now contains the final
3227
           * Unicode code point to be output
3228 57
           */
3229
          if (0 === --$mState) {
3230 27
            /*
3231
            * Check for illegal sequences and code points.
3232
            */
3233
            // From Unicode 3.1, non-shortest form is illegal
3234
            if (
3235
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3236
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3237
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3238
                (4 < $mBytes) ||
3239
                // From Unicode 3.2, surrogate characters are illegal.
3240
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3241
                // Code points outside the Unicode range are illegal.
3242
                ($mUcs4 > 0x10FFFF)
3243
            ) {
3244
              return false;
3245
            }
3246
            // initialize UTF8 cache
3247
            $mState = 0;
3248
            $mUcs4 = 0;
3249
            $mBytes = 1;
3250
          }
3251
        } else {
3252
          /**
3253
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3254
           * Incomplete multi-octet sequence.
3255
           */
3256
          return false;
3257
        }
3258
      }
3259
    }
3260
3261
    return true;
3262
  }
3263
3264
  /**
3265
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3266
   * Decodes a JSON string
3267
   *
3268
   * @link http://php.net/manual/en/function.json-decode.php
3269 2
   *
3270
   * @param string $json    <p>
3271 2
   *                        The <i>json</i> string being decoded.
3272
   *                        </p>
3273 2
   *                        <p>
3274
   *                        This function only works with UTF-8 encoded strings.
3275
   *                        </p>
3276 2
   *                        <p>PHP implements a superset of
3277
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3278
   *                        only supports these values when they are nested inside an array or an object.
3279 2
   *                        </p>
3280
   * @param bool   $assoc   [optional] <p>
3281
   *                        When <b>TRUE</b>, returned objects will be converted into
3282
   *                        associative arrays.
3283
   *                        </p>
3284
   * @param int    $depth   [optional] <p>
3285
   *                        User specified recursion depth.
3286
   *                        </p>
3287
   * @param int    $options [optional] <p>
3288
   *                        Bitmask of JSON decode options. Currently only
3289
   *                        <b>JSON_BIGINT_AS_STRING</b>
3290
   *                        is supported (default is to cast large integers as floats)
3291
   *                        </p>
3292
   *
3293
   * @return mixed the value encoded in <i>json</i> in appropriate
3294
   * PHP type. Values true, false and
3295
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3296
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3297
   * <i>json</i> cannot be decoded or if the encoded
3298
   * data is deeper than the recursion limit.
3299
   */
3300 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3301
  {
3302
    $json = (string)self::filter($json);
3303
3304
    if (Bootup::is_php('5.4') === true) {
3305
      $json = json_decode($json, $assoc, $depth, $options);
3306
    } else {
3307
      $json = json_decode($json, $assoc, $depth);
3308
    }
3309
3310
    return $json;
3311
  }
3312
3313
  /**
3314
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3315
   * Returns the JSON representation of a value.
3316
   *
3317
   * @link http://php.net/manual/en/function.json-encode.php
3318 2
   *
3319
   * @param mixed $value   <p>
3320 2
   *                       The <i>value</i> being encoded. Can be any type except
3321
   *                       a resource.
3322 2
   *                       </p>
3323
   *                       <p>
3324
   *                       All string data must be UTF-8 encoded.
3325 2
   *                       </p>
3326
   *                       <p>PHP implements a superset of
3327
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3328 2
   *                       only supports these values when they are nested inside an array or an object.
3329
   *                       </p>
3330
   * @param int   $options [optional] <p>
3331
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3332
   *                       <b>JSON_HEX_TAG</b>,
3333
   *                       <b>JSON_HEX_AMP</b>,
3334
   *                       <b>JSON_HEX_APOS</b>,
3335
   *                       <b>JSON_NUMERIC_CHECK</b>,
3336
   *                       <b>JSON_PRETTY_PRINT</b>,
3337
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3338
   *                       <b>JSON_FORCE_OBJECT</b>,
3339
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3340 7
   *                       constants is described on
3341
   *                       the JSON constants page.
3342 7
   *                       </p>
3343 7
   * @param int   $depth   [optional] <p>
3344
   *                       Set the maximum depth. Must be greater than zero.
3345
   *                       </p>
3346
   *
3347 7
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3348 7
   */
3349 7 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3350
  {
3351 7
    $value = self::filter($value);
3352
3353 7
    if (Bootup::is_php('5.5') === true) {
3354
      $json = json_encode($value, $options, $depth);
3355
    } else {
3356
      $json = json_encode($value, $options);
3357
    }
3358
3359
    return $json;
3360
  }
3361
3362
  /**
3363
   * Makes string's first char lowercase.
3364
   *
3365
   * @param string  $str       <p>The input string</p>
3366
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3367 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3368
   *
3369 1
   * @return string <p>The resulting string</p>
3370
   */
3371
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3372
  {
3373
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3374
    if ($strPartTwo === false) {
3375
      $strPartTwo = '';
3376
    }
3377
3378
    $strPartOne = self::strtolower(
3379
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3380
        $encoding,
3381
        $cleanUtf8
3382
    );
3383 1
3384
    return $strPartOne . $strPartTwo;
3385 1
  }
3386 1
3387
  /**
3388
   * alias for "UTF8::lcfirst()"
3389 1
   *
3390 1
   * @see UTF8::lcfirst()
3391
   *
3392 1
   * @param string  $word
3393 1
   * @param string  $encoding
3394 1
   * @param boolean $cleanUtf8
3395 1
   *
3396
   * @return string
3397
   */
3398 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3399
  {
3400 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3401 1
  }
3402
3403
  /**
3404
   * Lowercase for all words in the string.
3405
   *
3406 1
   * @param string   $str        <p>The input string.</p>
3407
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3408
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3409 1
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3410 1
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3411 1
   *
3412 1
   * @return string
3413 1
   */
3414 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3415
  {
3416 1
    if (!$str) {
3417 1
      return '';
3418
    }
3419 1
3420
    $words = self::str_to_words($str, $charlist);
3421
    $newWords = array();
3422
3423
    if (count($exceptions) > 0) {
3424
      $useExceptions = true;
3425
    } else {
3426
      $useExceptions = false;
3427
    }
3428
3429 View Code Duplication
    foreach ($words as $word) {
3430 24
3431
      if (!$word) {
3432 24
        continue;
3433
      }
3434 24
3435 2
      if (
3436
          $useExceptions === false
3437
          ||
3438
          (
3439 23
              $useExceptions === true
3440 2
              &&
3441
              !in_array($word, $exceptions, true)
3442
          )
3443 23
      ) {
3444
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3445
      }
3446
3447
      $newWords[] = $word;
3448
    }
3449
3450
    return implode('', $newWords);
3451
  }
3452
3453 1
  /**
3454
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3455 1
   *
3456 1
   * @param string $str   <p>The string to be trimmed</p>
3457 1
   * @param string $chars <p>Optional characters to be stripped</p>
3458
   *
3459 1
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3460
   */
3461 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3462
  {
3463
    $str = (string)$str;
3464
3465
    if (!isset($str[0])) {
3466
      return '';
3467
    }
3468
3469
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3470 1
    if ($chars === INF || !$chars) {
3471
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3472 1
    }
3473 1
3474 1
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3475
  }
3476
3477 1
  /**
3478
   * Returns the UTF-8 character with the maximum code point in the given data.
3479
   *
3480
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3481
   *
3482
   * @return string <p>The character with the highest code point than others.</p>
3483
   */
3484 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3485 15
  {
3486
    if (is_array($arg) === true) {
3487 15
      $arg = implode('', $arg);
3488
    }
3489 15
3490 15
    return self::chr(max(self::codepoints($arg)));
3491 15
  }
3492
3493 15
  /**
3494
   * Calculates and returns the maximum number of bytes taken by any
3495
   * UTF-8 encoded character in the given string.
3496
   *
3497
   * @param string $str <p>The original Unicode string.</p>
3498
   *
3499
   * @return int <p>Max byte lengths of the given chars.</p>
3500
   */
3501
  public static function max_chr_width($str)
3502
  {
3503 1
    $bytes = self::chr_size_list($str);
3504
    if (count($bytes) > 0) {
3505 1
      return (int)max($bytes);
3506 1
    }
3507 1
3508
    return 0;
3509 1
  }
3510
3511
  /**
3512
   * Checks whether mbstring is available on the server.
3513
   *
3514
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3515
   */
3516
  public static function mbstring_loaded()
3517
  {
3518
    $return = extension_loaded('mbstring') ? true : false;
3519
3520
    if ($return === true) {
3521
      \mb_internal_encoding('UTF-8');
3522
    }
3523
3524
    return $return;
3525
  }
3526
3527
  /**
3528
   * Returns the UTF-8 character with the minimum code point in the given data.
3529
   *
3530
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3531
   *
3532
   * @return string <p>The character with the lowest code point than others.</p>
3533
   */
3534 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3535
  {
3536
    if (is_array($arg) === true) {
3537 77
      $arg = implode('', $arg);
3538
    }
3539 77
3540
    return self::chr(min(self::codepoints($arg)));
3541 77
  }
3542 3
3543
  /**
3544
   * alias for "UTF8::normalize_encoding()"
3545 76
   *
3546
   * @see UTF8::normalize_encoding()
3547
   *
3548
   * @param string $encoding
3549 76
   * @param mixed  $fallback
3550 6
   *
3551
   * @return string
3552
   *
3553 75
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3554 74
   */
3555
  public static function normalizeEncoding($encoding, $fallback = false)
3556
  {
3557 4
    return self::normalize_encoding($encoding, $fallback);
3558 4
  }
3559 4
3560
  /**
3561
   * Normalize the encoding-"name" input.
3562 4
   *
3563 4
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3564 4
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3565 4
   *
3566 4
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3567 4
   */
3568 4
  public static function normalize_encoding($encoding, $fallback = false)
3569 4
  {
3570 4
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3571 4
3572 4
    // init
3573 4
    $encoding = trim((string)$encoding);
3574 4
3575 4
    // fallback
3576 4
    if (!$encoding && $fallback) {
3577
      return $fallback;
3578 4
    }
3579 4
    if (!$encoding) {
3580 4
      return 'UTF-8';
3581
    }
3582 4
3583
    if ('UTF-8' === $encoding) {
3584 4
      return $encoding;
3585
    }
3586
3587
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3588
      return $encoding;
3589
    }
3590
3591
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3592
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3593
    }
3594 16
3595
    $encodingOrig = $encoding;
3596 16
    $encoding = strtoupper($encoding);
3597
    $encodingUpperHelper = preg_replace('/[^A-Z0-9\s]/', '', $encoding);
3598 16
3599 1
    $equivalences = array(
3600
        'ISO8859'     => 'ISO-8859-1',
3601
        'ISO88591'    => 'ISO-8859-1',
3602 16
        'ISO'         => 'ISO-8859-1',
3603 16
        'LATIN'       => 'ISO-8859-1',
3604
        'LATIN1'      => 'ISO-8859-1', // Western European
3605 16
        'ISO88592'    => 'ISO-8859-2',
3606 1
        'LATIN2'      => 'ISO-8859-2', // Central European
3607 1
        'ISO88593'    => 'ISO-8859-3',
3608 1
        'LATIN3'      => 'ISO-8859-3', // Southern European
3609
        'ISO88594'    => 'ISO-8859-4',
3610 16
        'LATIN4'      => 'ISO-8859-4', // Northern European
3611
        'ISO88595'    => 'ISO-8859-5',
3612
        'ISO88596'    => 'ISO-8859-6', // Greek
3613
        'ISO88597'    => 'ISO-8859-7',
3614
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3615
        'ISO88599'    => 'ISO-8859-9',
3616
        'LATIN5'      => 'ISO-8859-9', // Turkish
3617
        'ISO885911'   => 'ISO-8859-11',
3618
        'TIS620'      => 'ISO-8859-11', // Thai
3619
        'ISO885910'   => 'ISO-8859-10',
3620
        'LATIN6'      => 'ISO-8859-10', // Nordic
3621
        'ISO885913'   => 'ISO-8859-13',
3622
        'LATIN7'      => 'ISO-8859-13', // Baltic
3623 37
        'ISO885914'   => 'ISO-8859-14',
3624
        'LATIN8'      => 'ISO-8859-14', // Celtic
3625 37
        'ISO885915'   => 'ISO-8859-15',
3626
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3627 37
        'ISO885916'   => 'ISO-8859-16',
3628 4
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3629
        'CP1250'      => 'WINDOWS-1250',
3630
        'WIN1250'     => 'WINDOWS-1250',
3631 37
        'WINDOWS1250' => 'WINDOWS-1250',
3632 37
        'CP1251'      => 'WINDOWS-1251',
3633
        'WIN1251'     => 'WINDOWS-1251',
3634 37
        'WINDOWS1251' => 'WINDOWS-1251',
3635
        'CP1252'      => 'WINDOWS-1252',
3636 2
        'WIN1252'     => 'WINDOWS-1252',
3637
        'WINDOWS1252' => 'WINDOWS-1252',
3638 2
        'CP1253'      => 'WINDOWS-1253',
3639
        'WIN1253'     => 'WINDOWS-1253',
3640 1
        'WINDOWS1253' => 'WINDOWS-1253',
3641 1
        'CP1254'      => 'WINDOWS-1254',
3642
        'WIN1254'     => 'WINDOWS-1254',
3643 2
        'WINDOWS1254' => 'WINDOWS-1254',
3644 2
        'CP1255'      => 'WINDOWS-1255',
3645
        'WIN1255'     => 'WINDOWS-1255',
3646 37
        'WINDOWS1255' => 'WINDOWS-1255',
3647 37
        'CP1256'      => 'WINDOWS-1256',
3648
        'WIN1256'     => 'WINDOWS-1256',
3649 37
        'WINDOWS1256' => 'WINDOWS-1256',
3650 1
        'CP1257'      => 'WINDOWS-1257',
3651 1
        'WIN1257'     => 'WINDOWS-1257',
3652
        'WINDOWS1257' => 'WINDOWS-1257',
3653 37
        'CP1258'      => 'WINDOWS-1258',
3654 37
        'WIN1258'     => 'WINDOWS-1258',
3655
        'WINDOWS1258' => 'WINDOWS-1258',
3656 37
        'UTF16'       => 'UTF-16',
3657
        'UTF32'       => 'UTF-32',
3658
        'UTF8'        => 'UTF-8',
3659
        'UTF'         => 'UTF-8',
3660
        'UTF7'        => 'UTF-7',
3661
        '8BIT'        => 'CP850',
3662
        'BINARY'      => 'CP850',
3663
    );
3664
3665
    if (!empty($equivalences[$encodingUpperHelper])) {
3666
      $encoding = $equivalences[$encodingUpperHelper];
3667
    }
3668 12
3669
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3670 12
3671
    return $encoding;
3672 12
  }
3673 1
3674
  /**
3675
   * Normalize some MS Word special characters.
3676 11
   *
3677
   * @param string $str <p>The string to be normalized.</p>
3678
   *
3679
   * @return string
3680
   */
3681 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3682
  {
3683
    $str = (string)$str;
3684
3685
    if (!isset($str[0])) {
3686
      return '';
3687
    }
3688
3689
    static $UTF8_MSWORD_KEYS_CACHE = null;
3690
    static $UTF8_MSWORD_VALUES_CACHE = null;
3691
3692
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3693
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3694
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3695
    }
3696
3697
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3698
  }
3699
3700
  /**
3701
   * Normalize the whitespace.
3702
   *
3703
   * @param string $str                     <p>The string to be normalized.</p>
3704
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3705
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3706
   *                                        bidirectional text chars.</p>
3707
   *
3708
   * @return string
3709
   */
3710
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3711
  {
3712
    $str = (string)$str;
3713
3714
    if (!isset($str[0])) {
3715
      return '';
3716
    }
3717
3718
    static $WHITESPACE_CACHE = array();
3719
    $cacheKey = (int)$keepNonBreakingSpace;
3720
3721
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3722
3723
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3724
3725
      if ($keepNonBreakingSpace === true) {
3726
        /** @noinspection OffsetOperationsInspection */
3727
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3728
      }
3729
3730
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3731 23
    }
3732
3733
    if ($keepBidiUnicodeControls === false) {
3734 23
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3735 1
3736
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3737
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3738
      }
3739 1
3740 1
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3741 1
    }
3742 1
3743
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3744 23
  }
3745
3746
  /**
3747
   * Strip all whitespace characters. This includes tabs and newline
3748 23
   * characters, as well as multibyte whitespace such as the thin space
3749
   * and ideographic space.
3750
   *
3751
   * @param string $str
3752
   *
3753
   * @return string
3754
   */
3755
  public static function strip_whitespace($str)
3756 23
  {
3757 23
    $str = (string)$str;
3758 23
3759
    if (!isset($str[0])) {
3760
      return '';
3761 10
    }
3762
3763 10
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3764 10
  }
3765
3766 10
  /**
3767 1
   * Format a number with grouped thousands.
3768
   *
3769
   * @param float  $number
3770 10
   * @param int    $decimals
3771 4
   * @param string $dec_point
3772
   * @param string $thousands_sep
3773
   *
3774 10
   * @return string
3775 6
   *
3776
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3777
   */
3778 10
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3779
  {
3780
    $thousands_sep = (string)$thousands_sep;
3781
    $dec_point = (string)$dec_point;
3782
    $number = (float)$number;
3783
3784
    if (
3785
        isset($thousands_sep[1], $dec_point[1])
3786
        &&
3787
        Bootup::is_php('5.4') === true
3788
    ) {
3789
      return str_replace(
3790
          array(
3791
              '.',
3792
              ',',
3793
          ),
3794
          array(
3795 1
              $dec_point,
3796
              $thousands_sep,
3797 1
          ),
3798 1
          number_format($number, $decimals, '.', ',')
3799 1
      );
3800
    }
3801
3802 1
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3803 1
  }
3804 1
3805
  /**
3806
   * Calculates Unicode code point of the given UTF-8 encoded character.
3807 1
   *
3808
   * INFO: opposite to UTF8::chr()
3809
   *
3810
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3811
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3812
   *
3813
   * @return int <p>
3814
   *             Unicode code point of the given character,<br>
3815 58
   *             0 on invalid UTF-8 byte sequence.
3816
   *             </p>
3817
   */
3818
  public static function ord($chr, $encoding = 'UTF-8')
3819 58
  {
3820
    // init
3821
    static $CHAR_CACHE = array();
3822
    $encoding = (string)$encoding;
3823
3824
    if ($encoding !== 'UTF-8') {
3825
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3826
3827
      // check again, if it's still not UTF-8
3828
      /** @noinspection NotOptimalIfConditionsInspection */
3829
      if ($encoding !== 'UTF-8') {
3830 1
        $chr = \mb_convert_encoding($chr, 'UTF-8', $encoding);
3831
      }
3832 1
    }
3833 1
3834
    if (isset($CHAR_CACHE[$chr]) === true) {
3835
      return $CHAR_CACHE[$chr];
3836 1
    }
3837 1
3838 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3839
      self::checkForSupport();
3840
    }
3841 1
3842
    // save the original string
3843
    $chr_orig = $chr;
3844 1
3845
    if (self::$SUPPORT['intlChar'] === true) {
3846
      $code = \IntlChar::ord($chr);
3847
      if ($code) {
3848 1
        $CHAR_CACHE[$chr_orig] = $code;
3849 1
        return $code;
3850 1
      }
3851
    }
3852
3853 1
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3854
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3855
    $code = $chr ? $chr[1] : 0;
3856 1
3857
    if (0xF0 <= $code && isset($chr[4])) {
3858
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3859
    }
3860 1
3861
    if (0xE0 <= $code && isset($chr[3])) {
3862 1
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3863 1
    }
3864 1
3865 1
    if (0xC0 <= $code && isset($chr[2])) {
3866 1
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3867
    }
3868
3869
    return $CHAR_CACHE[$chr_orig] = $code;
3870
  }
3871
3872
  /**
3873
   * Parses the string into an array (into the the second parameter).
3874
   *
3875
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3876
   *          if the second parameter is not set!
3877
   *
3878
   * @link http://php.net/manual/en/function.parse-str.php
3879
   *
3880
   * @param string  $str       <p>The input string.</p>
3881
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3882
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3883
   *
3884
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3885
   */
3886
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3887
  {
3888 2
    if ($cleanUtf8 === true) {
3889
      $str = self::clean($str);
3890 2
    }
3891
3892 2
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3893 1
    $return = \mb_parse_str($str, $result);
3894
    if ($return === false || empty($result)) {
3895
      return false;
3896 2
    }
3897 2
3898 1
    return true;
3899 1
  }
3900
3901 2
  /**
3902
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3903
   *
3904 2
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3905
   */
3906 2
  public static function pcre_utf8_support()
3907 2
  {
3908 2
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3909 2
    return (bool)@preg_match('//u', '');
3910
  }
3911 2
3912 2
  /**
3913 2
   * Create an array containing a range of UTF-8 characters.
3914
   *
3915 2
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3916
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3917 2
   *
3918
   * @return array
3919
   */
3920
  public static function range($var1, $var2)
3921
  {
3922
    if (!$var1 || !$var2) {
3923
      return array();
3924
    }
3925
3926 View Code Duplication
    if (ctype_digit((string)$var1)) {
3927
      $start = (int)$var1;
3928
    } elseif (ctype_xdigit($var1)) {
3929
      $start = (int)self::hex_to_int($var1);
3930
    } else {
3931
      $start = self::ord($var1);
3932
    }
3933
3934
    if (!$start) {
3935
      return array();
3936
    }
3937
3938 View Code Duplication
    if (ctype_digit((string)$var2)) {
3939
      $end = (int)$var2;
3940
    } elseif (ctype_xdigit($var2)) {
3941
      $end = (int)self::hex_to_int($var2);
3942
    } else {
3943 40
      $end = self::ord($var2);
3944
    }
3945 40
3946
    if (!$end) {
3947 40
      return array();
3948 5
    }
3949
3950
    return array_map(
3951 40
        array(
3952 40
            '\\voku\\helper\\UTF8',
3953 5
            'chr',
3954 5
        ),
3955
        range($start, $end)
3956
    );
3957 5
  }
3958 5
3959 40
  /**
3960
   * Multi decode html entity & fix urlencoded-win1252-chars.
3961 40
   *
3962
   * e.g:
3963
   * 'test+test'                     => 'test+test'
3964
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3965
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3966
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3967
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3968
   * 'Düsseldorf'                   => 'Düsseldorf'
3969
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3970
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3971
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3972 1
   *
3973
   * @param string $str          <p>The input string.</p>
3974 1
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3975 1
   *
3976 1
   * @return string
3977
   */
3978 1 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3979
  {
3980 1
    $str = (string)$str;
3981 1
3982 1
    if (!isset($str[0])) {
3983 1
      return '';
3984
    }
3985 1
3986
    $pattern = '/%u([0-9a-f]{3,4})/i';
3987
    if (preg_match($pattern, $str)) {
3988
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3989
    }
3990
3991
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3992
3993
    do {
3994
      $str_compare = $str;
3995
3996
      $str = self::fix_simple_utf8(
3997
          rawurldecode(
3998
              self::html_entity_decode(
3999
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
4000
                  $flags
4001 57
              )
4002
          )
4003
      );
4004 57
4005
    } while ($multi_decode === true && $str_compare !== $str);
4006
4007
    return (string)$str;
4008 57
  }
4009 57
4010 57
  /**
4011 57
   * alias for "UTF8::remove_bom()"
4012
   *
4013 57
   * @see UTF8::remove_bom()
4014
   *
4015
   * @param string $str
4016 57
   *
4017 57
   * @return string
4018
   *
4019 57
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4020
   */
4021
  public static function removeBOM($str)
4022
  {
4023
    return self::remove_bom($str);
4024
  }
4025
4026
  /**
4027
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4028
   *
4029
   * @param string $str <p>The input string.</p>
4030
   *
4031 57
   * @return string <p>String without UTF-BOM</p>
4032
   */
4033 57
  public static function remove_bom($str)
4034
  {
4035 57
    $str = (string)$str;
4036 4
4037
    if (!isset($str[0])) {
4038
      return '';
4039 57
    }
4040 57
4041 57
    foreach (self::$BOM as $bomString => $bomByteLength) {
4042 57
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4043 57
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4044
        if ($strTmp === false) {
4045 57
          $strTmp = '';
4046
        }
4047
        $str = (string)$strTmp;
4048
      }
4049 57
    }
4050 57
4051
    return $str;
4052 57
  }
4053 57
4054 57
  /**
4055
   * Removes duplicate occurrences of a string in another string.
4056 57
   *
4057
   * @param string          $str  <p>The base string.</p>
4058 57
   * @param string|string[] $what <p>String to search for in the base string.</p>
4059 57
   *
4060 57
   * @return string <p>The result string with removed duplicates.</p>
4061
   */
4062 57
  public static function remove_duplicates($str, $what = ' ')
4063 57
  {
4064 57
    if (is_string($what) === true) {
4065
      $what = array($what);
4066 57
    }
4067
4068
    if (is_array($what) === true) {
4069
      /** @noinspection ForeachSourceInspection */
4070
      foreach ($what as $item) {
4071
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4072
      }
4073
    }
4074
4075
    return $str;
4076
  }
4077 23
4078
  /**
4079 23
   * Remove invisible characters from a string.
4080
   *
4081 23
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4082 5
   *
4083
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4084
   *
4085
   * @param string $str
4086 19
   * @param bool   $url_encoded
4087 3
   * @param string $replacement
4088
   *
4089
   * @return string
4090 18
   */
4091
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4092
  {
4093
    // init
4094
    $non_displayables = array();
4095
4096
    // every control character except newline (dec 10),
4097
    // carriage return (dec 13) and horizontal tab (dec 09)
4098
    if ($url_encoded) {
4099
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4100
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4101 60
    }
4102
4103 60
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4104
4105 60
    do {
4106
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4107 60
    } while ($count !== 0);
4108 48
4109
    return $str;
4110
  }
4111
4112 19
  /**
4113
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4114
   *
4115 19
   * @param string $str                <p>The input string</p>
4116 18
   * @param string $replacementChar    <p>The replacement character.</p>
4117
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4118 18
   *
4119 18
   * @return string
4120 18
   */
4121 2
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4122 2
  {
4123
    $str = (string)$str;
4124
4125 19
    if (!isset($str[0])) {
4126
      return '';
4127 19
    }
4128 19
4129 19
    if ($processInvalidUtf8 === true) {
4130
      $replacementCharHelper = $replacementChar;
4131 19
      if ($replacementChar === '') {
4132 19
        $replacementCharHelper = 'none';
4133 19
      }
4134
4135
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4136
        self::checkForSupport();
4137 19
      }
4138
4139 19
      $save = \mb_substitute_character();
4140
      \mb_substitute_character($replacementCharHelper);
4141
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4142
      \mb_substitute_character($save);
4143
    }
4144
4145
    return str_replace(
4146
        array(
4147
            "\xEF\xBF\xBD",
4148
            '�',
4149
        ),
4150
        array(
4151
            $replacementChar,
4152
            $replacementChar,
4153
        ),
4154
        $str
4155
    );
4156
  }
4157
4158
  /**
4159
   * Strip whitespace or other characters from end of a UTF-8 string.
4160
   *
4161
   * @param string $str   <p>The string to be trimmed.</p>
4162
   * @param string $chars <p>Optional characters to be stripped.</p>
4163
   *
4164
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4165 1
   */
4166 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4167 1
  {
4168
    $str = (string)$str;
4169 1
4170 1
    if (!isset($str[0])) {
4171
      return '';
4172
    }
4173
4174
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4175 1
    if ($chars === INF || !$chars) {
4176 1
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4177 1
    }
4178 1
4179
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4180
  }
4181 1
4182
  /**
4183
   * rxClass
4184
   *
4185 1
   * @param string $s
4186
   * @param string $class
4187
   *
4188
   * @return string
4189
   */
4190
  private static function rxClass($s, $class = '')
4191
  {
4192
    static $RX_CLASSS_CACHE = array();
4193
4194
    $cacheKey = $s . $class;
4195
4196
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4197 39
      return $RX_CLASSS_CACHE[$cacheKey];
4198
    }
4199 39
4200
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4201 39
    $class = array($class);
4202 3
4203
    /** @noinspection SuspiciousLoopInspection */
4204
    foreach (self::str_split($s) as $s) {
4205
      if ('-' === $s) {
4206 38
        $class[0] = '-' . $class[0];
4207
      } elseif (!isset($s[2])) {
4208 38
        $class[0] .= preg_quote($s, '/');
4209
      } elseif (1 === self::strlen($s)) {
4210
        $class[0] .= $s;
4211
      } else {
4212 38
        $class[] = $s;
4213 7
      }
4214 7
    }
4215
4216 38
    if ($class[0]) {
4217
      $class[0] = '[' . $class[0] . ']';
4218 38
    }
4219 38
4220 38
    if (1 === count($class)) {
4221 38
      $return = $class[0];
4222 38
    } else {
4223
      $return = '(?:' . implode('|', $class) . ')';
4224 38
    }
4225
4226
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4227
4228
    return $return;
4229
  }
4230
4231
  /**
4232
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4233
   */
4234
  public static function showSupport()
4235
  {
4236
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4237
      self::checkForSupport();
4238
    }
4239
4240
    foreach (self::$SUPPORT as $utf8Support) {
4241
      echo $utf8Support . "\n<br>";
4242
    }
4243
  }
4244
4245
  /**
4246
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4247
   *
4248
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4249
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4250
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4251
   *
4252
   * @return string <p>The HTML numbered entity.</p>
4253
   */
4254
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4255
  {
4256
    $char = (string)$char;
4257
4258
    if (!isset($char[0])) {
4259
      return '';
4260
    }
4261
4262
    if (
4263
        $keepAsciiChars === true
4264
        &&
4265
        self::is_ascii($char) === true
4266
    ) {
4267
      return $char;
4268
    }
4269
4270
    if ($encoding !== 'UTF-8') {
4271
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4272
    }
4273
4274
    return '&#' . self::ord($char, $encoding) . ';';
4275
  }
4276
4277
  /**
4278
   * Convert a string to an array of Unicode characters.
4279
   *
4280
   * @param string  $str       <p>The string to split into array.</p>
4281
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4282
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4283
   *
4284
   * @return string[] <p>An array containing chunks of the string.</p>
4285
   */
4286
  public static function split($str, $length = 1, $cleanUtf8 = false)
4287
  {
4288
    $str = (string)$str;
4289
4290
    if (!isset($str[0])) {
4291
      return array();
4292
    }
4293
4294
    // init
4295 38
    $ret = array();
4296 5
4297
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4298 5
      self::checkForSupport();
4299
    }
4300 5
4301 5
    if ($cleanUtf8 === true) {
4302 5
      $str = self::clean($str);
4303
    }
4304
4305
    if (self::$SUPPORT['pcre_utf8'] === true) {
4306 34
4307
      preg_match_all('/./us', $str, $retArray);
4308
      if (isset($retArray[0])) {
4309
        $ret = $retArray[0];
4310 34
      }
4311
      unset($retArray);
4312
4313
    } else {
4314
4315
      // fallback
4316
4317
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4318
        self::checkForSupport();
4319
      }
4320
4321 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4322
        $len = \mb_strlen($str, '8BIT');
4323 12
      } else {
4324
        $len = strlen($str);
4325
      }
4326
4327
      /** @noinspection ForeachInvariantsInspection */
4328
      for ($i = 0; $i < $len; $i++) {
4329 12
4330
        if (($str[$i] & "\x80") === "\x00") {
4331 3
4332 1
          $ret[] = $str[$i];
4333
4334
        } elseif (
4335 3
            isset($str[$i + 1])
4336 1
            &&
4337
            ($str[$i] & "\xE0") === "\xC0"
4338
        ) {
4339 2
4340
          if (($str[$i + 1] & "\xC0") === "\x80") {
4341
            $ret[] = $str[$i] . $str[$i + 1];
4342
4343 2
            $i++;
4344
          }
4345
4346 View Code Duplication
        } elseif (
4347 2
            isset($str[$i + 2])
4348
            &&
4349
            ($str[$i] & "\xF0") === "\xE0"
4350
        ) {
4351
4352
          if (
4353 12
              ($str[$i + 1] & "\xC0") === "\x80"
4354 3
              &&
4355
              ($str[$i + 2] & "\xC0") === "\x80"
4356
          ) {
4357
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4358
4359
            $i += 2;
4360
          }
4361 12
4362 9
        } elseif (
4363
            isset($str[$i + 3])
4364
            &&
4365
            ($str[$i] & "\xF8") === "\xF0"
4366
        ) {
4367
4368 View Code Duplication
          if (
4369
              ($str[$i + 1] & "\xC0") === "\x80"
4370
              &&
4371 7
              ($str[$i + 2] & "\xC0") === "\x80"
4372 7
              &&
4373 7
              ($str[$i + 3] & "\xC0") === "\x80"
4374 7
          ) {
4375 7
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4376 7
4377 7
            $i += 3;
4378 7
          }
4379 7
4380 7
        }
4381 7
      }
4382 7
    }
4383 7
4384 7
    if ($length > 1) {
4385 7
      $ret = array_chunk($ret, $length);
4386 7
4387 7
      return array_map(
4388 7
          function ($item) {
4389 7
            return implode('', $item);
4390 7
          }, $ret
4391 7
      );
4392
    }
4393 7
4394 7
    if (isset($ret[0]) && $ret[0] === '') {
4395 7
      return array();
4396
    }
4397
4398
    return $ret;
4399
  }
4400
4401
  /**
4402
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4403
   *
4404
   * @param string $str <p>The input string.</p>
4405
   *
4406
   * @return false|string <p>
4407
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4408
   *                      otherwise it will return false.
4409
   *                      </p>
4410
   */
4411
  public static function str_detect_encoding($str)
4412
  {
4413
    //
4414
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4415
    //
4416
4417
    if (self::is_binary($str) === true) {
4418
4419
      if (self::is_utf16($str) === 1) {
4420
        return 'UTF-16LE';
4421
      }
4422 2
4423
      if (self::is_utf16($str) === 2) {
4424 2
        return 'UTF-16BE';
4425 2
      }
4426
4427 2
      if (self::is_utf32($str) === 1) {
4428 1
        return 'UTF-32LE';
4429
      }
4430
4431 2
      if (self::is_utf32($str) === 2) {
4432 2
        return 'UTF-32BE';
4433
      }
4434
4435
    }
4436 2
4437 2
    //
4438
    // 2.) simple check for ASCII chars
4439
    //
4440 2
4441
    if (self::is_ascii($str) === true) {
4442
      return 'ASCII';
4443
    }
4444
4445
    //
4446
    // 3.) simple check for UTF-8 chars
4447
    //
4448
4449
    if (self::is_utf8($str) === true) {
4450
      return 'UTF-8';
4451 2
    }
4452
4453 2
    //
4454 2
    // 4.) check via "\mb_detect_encoding()"
4455
    //
4456 2
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4457 1
4458
    $detectOrder = array(
4459
        'ISO-8859-1',
4460 2
        'ISO-8859-2',
4461 2
        'ISO-8859-3',
4462
        'ISO-8859-4',
4463
        'ISO-8859-5',
4464 2
        'ISO-8859-6',
4465
        'ISO-8859-7',
4466
        'ISO-8859-8',
4467
        'ISO-8859-9',
4468
        'ISO-8859-10',
4469
        'ISO-8859-13',
4470
        'ISO-8859-14',
4471
        'ISO-8859-15',
4472
        'ISO-8859-16',
4473
        'WINDOWS-1251',
4474
        'WINDOWS-1252',
4475
        'WINDOWS-1254',
4476
        'ISO-2022-JP',
4477
        'JIS',
4478
        'EUC-JP',
4479
    );
4480
4481
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4482
    if ($encoding) {
4483
      return $encoding;
4484
    }
4485
4486
    //
4487
    // 5.) check via "iconv()"
4488
    //
4489
4490
    $md5 = md5($str);
4491
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4492 26
      # INFO: //IGNORE and //TRANSLIT still throw notice
4493
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4494 26
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4495
        return $encodingTmp;
4496
      }
4497 26
    }
4498 26
4499 2
    return false;
4500 2
  }
4501 24
4502
  /**
4503 26
   * Check if the string ends with the given substring.
4504
   *
4505 26
   * @param string $haystack <p>The string to search in.</p>
4506 26
   * @param string $needle   <p>The substring to search for.</p>
4507
   *
4508 26
   * @return bool
4509
   */
4510
  public static function str_ends_with($haystack, $needle)
4511
  {
4512
    $haystack = (string)$haystack;
4513
    $needle = (string)$needle;
4514
4515
    if (!isset($haystack[0], $needle[0])) {
4516
      return false;
4517
    }
4518
4519 2
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4520
    if ($haystackSub === false) {
4521 2
      return false;
4522 2
    }
4523
4524 2
    if ($needle === $haystackSub) {
4525 1
      return true;
4526
    }
4527
4528 2
    return false;
4529 2
  }
4530
4531
  /**
4532 2
   * Check if the string ends with the given substring, case insensitive.
4533
   *
4534
   * @param string $haystack <p>The string to search in.</p>
4535
   * @param string $needle   <p>The substring to search for.</p>
4536
   *
4537
   * @return bool
4538
   */
4539 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4540
  {
4541
    $haystack = (string)$haystack;
4542
    $needle = (string)$needle;
4543
4544 1
    if (!isset($haystack[0], $needle[0])) {
4545
      return false;
4546 1
    }
4547
4548 1
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4549 1
      return true;
4550
    }
4551
4552 1
    return false;
4553
  }
4554 1
4555 1
  /**
4556
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4557
   *
4558 1
   * @link  http://php.net/manual/en/function.str-ireplace.php
4559 1
   *
4560
   * @param mixed $search  <p>
4561
   *                       Every replacement with search array is
4562 1
   *                       performed on the result of previous replacement.
4563 1
   *                       </p>
4564 1
   * @param mixed $replace <p>
4565 1
   *                       </p>
4566
   * @param mixed $subject <p>
4567 1
   *                       If subject is an array, then the search and
4568 1
   *                       replace is performed with every entry of
4569 1
   *                       subject, and the return value is an array as
4570 1
   *                       well.
4571
   *                       </p>
4572
   * @param int   $count   [optional] <p>
4573 1
   *                       The number of matched and replaced needles will
4574
   *                       be returned in count which is passed by
4575
   *                       reference.
4576
   *                       </p>
4577
   *
4578
   * @return mixed <p>A string or an array of replacements.</p>
4579
   */
4580
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4581
  {
4582
    $search = (array)$search;
4583
4584
    /** @noinspection AlterInForeachInspection */
4585
    foreach ($search as &$s) {
4586
      if ('' === $s .= '') {
4587
        $s = '/^(?<=.)$/';
4588
      } else {
4589 2
        $s = '/' . preg_quote($s, '/') . '/ui';
4590
      }
4591 2
    }
4592
4593
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4594 2
    $count = $replace; // used as reference parameter
4595 2
4596
    return $subject;
4597 2
  }
4598
4599 2
  /**
4600 2
   * Check if the string starts with the given substring, case insensitive.
4601
   *
4602 2
   * @param string $haystack <p>The string to search in.</p>
4603
   * @param string $needle   <p>The substring to search for.</p>
4604
   *
4605 2
   * @return bool
4606 2
   */
4607 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4608 2
  {
4609 2
    $haystack = (string)$haystack;
4610
    $needle = (string)$needle;
4611 2
4612 2
    if (!isset($haystack[0], $needle[0])) {
4613 2
      return false;
4614 2
    }
4615 2
4616 2
    if (self::stripos($haystack, $needle) === 0) {
4617
      return true;
4618 2
    }
4619 2
4620 2
    return false;
4621 2
  }
4622 2
4623 2
  /**
4624
   * Limit the number of characters in a string, but also after the next word.
4625 2
   *
4626
   * @param string $str
4627
   * @param int    $length
4628 2
   * @param string $strAddOn
4629
   *
4630
   * @return string
4631
   */
4632
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4633
  {
4634
    $str = (string)$str;
4635
4636
    if (!isset($str[0])) {
4637
      return '';
4638
    }
4639
4640
    $length = (int)$length;
4641
4642
    if (self::strlen($str) <= $length) {
4643
      return $str;
4644
    }
4645
4646
    if (self::substr($str, $length - 1, 1) === ' ') {
4647
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4648
    }
4649 1
4650
    $str = (string)self::substr($str, 0, $length);
4651 1
    $array = explode(' ', $str);
4652
    array_pop($array);
4653 1
    $new_str = implode(' ', $array);
4654
4655
    if ($new_str === '') {
4656
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4657
    } else {
4658
      $str = $new_str . $strAddOn;
4659
    }
4660
4661
    return $str;
4662
  }
4663
4664
  /**
4665
   * Pad a UTF-8 string to given length with another string.
4666
   *
4667
   * @param string $str        <p>The input string.</p>
4668
   * @param int    $pad_length <p>The length of return string.</p>
4669
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4670
   * @param int    $pad_type   [optional] <p>
4671
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4672
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4673
   *                           </p>
4674
   *
4675
   * @return string <strong>Returns the padded string</strong>
4676
   */
4677
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4678
  {
4679
    $str_length = self::strlen($str);
4680
4681
    if (
4682
        is_int($pad_length) === true
4683
        &&
4684
        $pad_length > 0
4685 12
        &&
4686
        $pad_length >= $str_length
4687 12
    ) {
4688
      $ps_length = self::strlen($pad_string);
4689
4690
      $diff = $pad_length - $str_length;
4691
4692
      switch ($pad_type) {
4693 View Code Duplication
        case STR_PAD_LEFT:
4694
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4695
          $pre = (string)self::substr($pre, 0, $diff);
4696
          $post = '';
4697
          break;
4698
4699 1
        case STR_PAD_BOTH:
4700
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4701 1
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4702
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4703 1
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4704 1
          break;
4705
4706
        case STR_PAD_RIGHT:
4707 1 View Code Duplication
        default:
4708
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4709
          $post = (string)self::substr($post, 0, $diff);
4710
          $pre = '';
4711
      }
4712
4713
      return $pre . $str . $post;
4714
    }
4715
4716
    return $str;
4717 1
  }
4718
4719 1
  /**
4720
   * Repeat a string.
4721 1
   *
4722
   * @param string $str        <p>
4723 1
   *                           The string to be repeated.
4724
   *                           </p>
4725
   * @param int    $multiplier <p>
4726
   *                           Number of time the input string should be
4727
   *                           repeated.
4728
   *                           </p>
4729
   *                           <p>
4730
   *                           multiplier has to be greater than or equal to 0.
4731
   *                           If the multiplier is set to 0, the function
4732
   *                           will return an empty string.
4733
   *                           </p>
4734
   *
4735 1
   * @return string <p>The repeated string.</p>
4736
   */
4737 1
  public static function str_repeat($str, $multiplier)
4738
  {
4739 1
    $str = self::filter($str);
4740 1
4741 1
    return str_repeat($str, $multiplier);
4742
  }
4743 1
4744 1
  /**
4745 1
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4746 1
   *
4747
   * Replace all occurrences of the search string with the replacement string
4748
   *
4749 1
   * @link http://php.net/manual/en/function.str-replace.php
4750
   *
4751
   * @param mixed $search  <p>
4752
   *                       The value being searched for, otherwise known as the needle.
4753
   *                       An array may be used to designate multiple needles.
4754
   *                       </p>
4755
   * @param mixed $replace <p>
4756
   *                       The replacement value that replaces found search
4757
   *                       values. An array may be used to designate multiple replacements.
4758
   *                       </p>
4759
   * @param mixed $subject <p>
4760 22
   *                       The string or array being searched and replaced on,
4761
   *                       otherwise known as the haystack.
4762 22
   *                       </p>
4763
   *                       <p>
4764 22
   *                       If subject is an array, then the search and
4765 1
   *                       replace is performed with every entry of
4766
   *                       subject, and the return value is an array as
4767
   *                       well.
4768 21
   *                       </p>
4769
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4770 21
   *
4771
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4772
   */
4773
  public static function str_replace($search, $replace, $subject, &$count = null)
4774
  {
4775 21
    return str_replace($search, $replace, $subject, $count);
4776 21
  }
4777
4778 21
  /**
4779 21
   * Replace the first "$search"-term with the "$replace"-term.
4780
   *
4781
   * @param string $search
4782 1
   * @param string $replace
4783 1
   * @param string $subject
4784
   *
4785
   * @return string
4786 1
   */
4787 1
  public static function str_replace_first($search, $replace, $subject)
4788 1
  {
4789 1
    $pos = self::strpos($subject, $search);
4790 1
4791
    if ($pos !== false) {
4792 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4793
    }
4794 1
4795
    return $subject;
4796
  }
4797
4798
  /**
4799
   * Shuffles all the characters in the string.
4800
   *
4801
   * @param string $str <p>The input string</p>
4802
   *
4803
   * @return string <p>The shuffled string.</p>
4804
   */
4805 2
  public static function str_shuffle($str)
4806
  {
4807 2
    $array = self::split($str);
4808 2
4809
    shuffle($array);
4810 2
4811 1
    return implode('', $array);
4812
  }
4813
4814 2
  /**
4815 2
   * Sort all characters according to code points.
4816
   *
4817
   * @param string $str    <p>A UTF-8 string.</p>
4818 2
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4819
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4820
   *
4821
   * @return string <p>String of sorted characters.</p>
4822
   */
4823
  public static function str_sort($str, $unique = false, $desc = false)
4824
  {
4825
    $array = self::codepoints($str);
4826
4827
    if ($unique) {
4828 1
      $array = array_flip(array_flip($array));
4829
    }
4830 1
4831
    if ($desc) {
4832 1
      arsort($array);
4833
    } else {
4834 1
      asort($array);
4835
    }
4836
4837
    return self::string($array);
4838
  }
4839
4840
  /**
4841
   * Split a string into an array.
4842
   *
4843
   * @param string $str
4844
   * @param int    $len
4845
   *
4846
   * @return array
4847 10
   */
4848
  public static function str_split($str, $len = 1)
4849 10
  {
4850
    $str = (string)$str;
4851 10
4852 1
    if (!isset($str[0])) {
4853 1
      return array();
4854
    }
4855 10
4856 2
    $len = (int)$len;
4857
4858
    if ($len < 1) {
4859
      return str_split($str, $len);
4860 2
    }
4861
4862
    /** @noinspection PhpInternalEntityUsedInspection */
4863 10
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4864
    $a = $a[0];
4865 10
4866
    if ($len === 1) {
4867
      return $a;
4868
    }
4869 10
4870
    $arrayOutput = array();
4871 10
    $p = -1;
4872 10
4873
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4874
    foreach ($a as $l => $a) {
4875 1
      if ($l % $len) {
4876 1
        $arrayOutput[$p] .= $a;
4877
      } else {
4878
        $arrayOutput[++$p] = $a;
4879 1
      }
4880 1
    }
4881 1
4882 1
    return $arrayOutput;
4883
  }
4884
4885
  /**
4886
   * Check if the string starts with the given substring.
4887 1
   *
4888 1
   * @param string $haystack <p>The string to search in.</p>
4889 1
   * @param string $needle   <p>The substring to search for.</p>
4890 1
   *
4891
   * @return bool
4892
   */
4893 1 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4894 1
  {
4895
    $haystack = (string)$haystack;
4896 1
    $needle = (string)$needle;
4897
4898
    if (!isset($haystack[0], $needle[0])) {
4899
      return false;
4900
    }
4901
4902
    if (self::strpos($haystack, $needle) === 0) {
4903
      return true;
4904
    }
4905
4906
    return false;
4907
  }
4908
4909
  /**
4910 7
   * Get a binary representation of a specific string.
4911
   *
4912 7
   * @param string $str <p>The input string.</p>
4913
   *
4914
   * @return string
4915
   */
4916
  public static function str_to_binary($str)
4917
  {
4918
    $str = (string)$str;
4919
4920
    $value = unpack('H*', $str);
4921
4922
    return base_convert($value[1], 16, 2);
4923
  }
4924
4925
  /**
4926
   * Convert a string into an array of words.
4927
   *
4928 1
   * @param string   $str
4929
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4930 1
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4931
   * @param null|int $removeShortValues
4932 1
   *
4933
   * @return array
4934 1
   */
4935
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4936 1
  {
4937 1
    $str = (string)$str;
4938 1
4939 1
    if ($removeShortValues !== null) {
4940
      $removeShortValues = (int)$removeShortValues;
4941 1
    }
4942
4943 1
    if (!isset($str[0])) {
4944 1
      if ($removeEmptyValues === true) {
4945 1
        return array();
4946 1
      }
4947 1
4948 1
      return array('');
4949
    }
4950 1
4951
    $charList = self::rxClass($charList, '\pL');
4952 1
4953
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4954
4955
    if (
4956 1
        $removeShortValues === null
4957
        &&
4958
        $removeEmptyValues === false
4959
    ) {
4960
      return $return;
4961
    }
4962
4963
    $tmpReturn = array();
4964
    foreach ($return as $returnValue) {
4965
      if (
4966
          $removeShortValues !== null
4967
          &&
4968
          self::strlen($returnValue) <= $removeShortValues
4969
      ) {
4970
        continue;
4971
      }
4972
4973 11
      if (
4974
          $removeEmptyValues === true
4975 11
          &&
4976
          trim($returnValue) === ''
4977
      ) {
4978
        continue;
4979
      }
4980
4981
      $tmpReturn[] = $returnValue;
4982
    }
4983
4984
    return $tmpReturn;
4985
  }
4986
4987
  /**
4988
   * alias for "UTF8::to_ascii()"
4989
   *
4990
   * @see UTF8::to_ascii()
4991 1
   *
4992
   * @param string $str
4993 1
   * @param string $unknown
4994
   * @param bool   $strict
4995
   *
4996
   * @return string
4997
   */
4998
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4999
  {
5000
    return self::to_ascii($str, $unknown, $strict);
5001
  }
5002
5003
  /**
5004
   * Counts number of words in the UTF-8 string.
5005
   *
5006
   * @param string $str      <p>The input string.</p>
5007
   * @param int    $format   [optional] <p>
5008 14
   *                         <strong>0</strong> => return a number of words (default)<br>
5009
   *                         <strong>1</strong> => return an array of words<br>
5010
   *                         <strong>2</strong> => return an array of words with word-offset as key
5011 14
   *                         </p>
5012 13
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5013 13
   *
5014 14
   * @return array|int <p>The number of words in the string</p>
5015
   */
5016
  public static function str_word_count($str, $format = 0, $charlist = '')
5017
  {
5018
    $strParts = self::str_to_words($str, $charlist);
5019
5020
    $len = count($strParts);
5021
5022
    if ($format === 1) {
5023
5024
      $numberOfWords = array();
5025
      for ($i = 1; $i < $len; $i += 2) {
5026
        $numberOfWords[] = $strParts[$i];
5027 15
      }
5028
5029 15
    } elseif ($format === 2) {
5030 1
5031
      $numberOfWords = array();
5032
      $offset = self::strlen($strParts[0]);
5033 14
      for ($i = 1; $i < $len; $i += 2) {
5034 2
        $numberOfWords[$offset] = $strParts[$i];
5035 2
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5036
      }
5037
5038 2
    } else {
5039 2
5040
      $numberOfWords = ($len - 1) / 2;
5041 14
5042 14
    }
5043 1
5044
    return $numberOfWords;
5045
  }
5046 13
5047
  /**
5048 13
   * Case-insensitive string comparison.
5049
   *
5050
   * INFO: Case-insensitive version of UTF8::strcmp()
5051 1
   *
5052
   * @param string $str1
5053
   * @param string $str2
5054
   *
5055
   * @return int <p>
5056
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5057
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5058
   *             <strong>0</strong> if they are equal.
5059
   *             </p>
5060
   */
5061
  public static function strcasecmp($str1, $str2)
5062
  {
5063
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5064
  }
5065
5066
  /**
5067 1
   * alias for "UTF8::strstr()"
5068
   *
5069 1
   * @see UTF8::strstr()
5070
   *
5071
   * @param string  $haystack
5072
   * @param string  $needle
5073
   * @param bool    $before_needle
5074
   * @param string  $encoding
5075
   * @param boolean $cleanUtf8
5076
   *
5077
   * @return string|false
5078
   */
5079
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5080
  {
5081 2
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5082
  }
5083 2
5084 2
  /**
5085 2
   * Case-sensitive string comparison.
5086
   *
5087 2
   * @param string $str1
5088 2
   * @param string $str2
5089 2
   *
5090
   * @return int  <p>
5091 2
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5092 2
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5093
   *              <strong>0</strong> if they are equal.
5094
   *              </p>
5095
   */
5096
  public static function strcmp($str1, $str2)
5097
  {
5098
    /** @noinspection PhpUndefinedClassInspection */
5099
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5100
        \Normalizer::normalize($str1, \Normalizer::NFD),
5101
        \Normalizer::normalize($str2, \Normalizer::NFD)
5102 3
    );
5103
  }
5104 3
5105 3
  /**
5106 3
   * Find length of initial segment not matching mask.
5107
   *
5108 3
   * @param string $str
5109
   * @param string $charList
5110 3
   * @param int    $offset
5111
   * @param int    $length
5112
   *
5113
   * @return int|null
5114
   */
5115
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5116
  {
5117
    if ('' === $charList .= '') {
5118
      return null;
5119
    }
5120
5121 View Code Duplication
    if ($offset || $length !== null) {
5122
      $strTmp = self::substr($str, $offset, $length);
5123
      if ($strTmp === false) {
5124
        return null;
5125
      }
5126
      $str = (string)$strTmp;
5127
    }
5128
5129
    $str = (string)$str;
5130
    if (!isset($str[0])) {
5131
      return null;
5132
    }
5133 2
5134
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5135 2
      /** @noinspection OffsetOperationsInspection */
5136
      return self::strlen($length[1]);
5137 2
    }
5138 1
5139
    return self::strlen($str);
5140
  }
5141 2
5142 1
  /**
5143 1
   * alias for "UTF8::stristr()"
5144
   *
5145 2
   * @see UTF8::stristr()
5146
   *
5147
   * @param string  $haystack
5148
   * @param string  $needle
5149
   * @param bool    $before_needle
5150
   * @param string  $encoding
5151
   * @param boolean $cleanUtf8
5152
   *
5153
   * @return string|false
5154
   */
5155
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5156
  {
5157
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5158
  }
5159
5160
  /**
5161
   * Create a UTF-8 string from code points.
5162
   *
5163
   * INFO: opposite to UTF8::codepoints()
5164 10
   *
5165
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5166 10
   *
5167 10
   * @return string <p>UTF-8 encoded string.</p>
5168 10
   */
5169
  public static function string(array $array)
5170 10
  {
5171 3
    return implode(
5172
        '',
5173
        array_map(
5174 9
            array(
5175
                '\\voku\\helper\\UTF8',
5176
                'chr',
5177 1
            ),
5178 1
            $array
5179 1
        )
5180
    );
5181
  }
5182
5183 9
  /**
5184 2
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5185 9
   *
5186 9
   * @param string $str <p>The input string.</p>
5187 9
   *
5188 1
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5189
   */
5190
  public static function string_has_bom($str)
5191 9
  {
5192
    foreach (self::$BOM as $bomString => $bomByteLength) {
5193
      if (0 === strpos($str, $bomString)) {
5194
        return true;
5195
      }
5196
    }
5197 9
5198 9
    return false;
5199 9
  }
5200 9
5201 9
  /**
5202
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5203
   *
5204
   * @link http://php.net/manual/en/function.strip-tags.php
5205
   *
5206 9
   * @param string  $str            <p>
5207
   *                                The input string.
5208
   *                                </p>
5209
   * @param string  $allowable_tags [optional] <p>
5210
   *                                You can use the optional second parameter to specify tags which should
5211
   *                                not be stripped.
5212
   *                                </p>
5213
   *                                <p>
5214
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5215
   *                                can not be changed with allowable_tags.
5216
   *                                </p>
5217
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5218
   *
5219
   * @return string <p>The stripped string.</p>
5220
   */
5221 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5222
  {
5223 17
    $str = (string)$str;
5224
5225 17
    if (!isset($str[0])) {
5226 17
      return '';
5227 17
    }
5228
5229 17
    if ($cleanUtf8 === true) {
5230 6
      $str = self::clean($str);
5231
    }
5232
5233 11
    return strip_tags($str, $allowable_tags);
5234 1
  }
5235 1
5236
  /**
5237 11
   * Finds position of first occurrence of a string within another, case insensitive.
5238
   *
5239
   * @link http://php.net/manual/en/function.mb-stripos.php
5240 1
   *
5241 1
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5242 1
   * @param string  $needle    <p>The string to find in haystack.</p>
5243
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5244 11
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5245
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5246
   *
5247
   * @return int|false <p>
5248 11
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5249
   *                   or false if needle is not found.
5250
   *                   </p>
5251
   */
5252
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5253
  {
5254 11
    $haystack = (string)$haystack;
5255 1
    $needle = (string)$needle;
5256 11
    $offset = (int)$offset;
5257
5258
    if (!isset($haystack[0], $needle[0])) {
5259
      return false;
5260 11
    }
5261 11
5262
    if ($cleanUtf8 === true) {
5263
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5264
      // if invalid characters are found in $haystack before $needle
5265
      $haystack = self::clean($haystack);
5266
      $needle = self::clean($needle);
5267
    }
5268
5269 View Code Duplication
    if (
5270
        $encoding === 'UTF-8'
5271
        ||
5272
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5273
    ) {
5274
      $encoding = 'UTF-8';
5275
    } else {
5276
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5277
    }
5278
5279
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5280
      self::checkForSupport();
5281
    }
5282
5283
    if (
5284
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5285
        &&
5286
        self::$SUPPORT['intl'] === true
5287
        &&
5288
        Bootup::is_php('5.4') === true
5289
    ) {
5290
      return \grapheme_stripos($haystack, $needle, $offset);
5291
    }
5292
5293
    // fallback to "mb_"-function via polyfill
5294
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5295
  }
5296
5297
  /**
5298
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5299
   *
5300
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5301
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5302
   * @param bool    $before_needle [optional] <p>
5303 88
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5304
   *                               haystack before the first occurrence of the needle (excluding the needle).
5305 88
   *                               </p>
5306
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5307 88
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5308 5
   *
5309
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5310
   */
5311
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5312
  {
5313 87
    $haystack = (string)$haystack;
5314 13
    $needle = (string)$needle;
5315 87
    $before_needle = (bool)$before_needle;
5316 78
5317 78
    if (!isset($haystack[0], $needle[0])) {
5318 12
      return false;
5319
    }
5320
5321 87
    if ($encoding !== 'UTF-8') {
5322
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5323
    }
5324
5325
    if ($cleanUtf8 === true) {
5326 87
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5327 87
      // if invalid characters are found in $haystack before $needle
5328
      $needle = self::clean($needle);
5329
      $haystack = self::clean($haystack);
5330 10
    }
5331 10
5332 10
    if (!$needle) {
5333 10
      return $haystack;
5334
    }
5335
5336
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5337
      self::checkForSupport();
5338
    }
5339 79
5340 View Code Duplication
    if (
5341
        $encoding !== 'UTF-8'
5342 2
        &&
5343 2
        self::$SUPPORT['mbstring'] === false
5344
    ) {
5345
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5346
    }
5347 79
5348 2
    if (self::$SUPPORT['mbstring'] === true) {
5349 79
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5350
    }
5351 79
5352
    if (
5353
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5354
        &&
5355
        self::$SUPPORT['intl'] === true
5356
        &&
5357 79
        Bootup::is_php('5.4') === true
5358 2
    ) {
5359 79
      return \grapheme_stristr($haystack, $needle, $before_needle);
5360 2
    }
5361 79
5362
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
5363
      return stristr($haystack, $needle);
5364
    }
5365 79
5366 79
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5367
5368
    if (!isset($match[1])) {
5369
      return false;
5370
    }
5371
5372
    if ($before_needle) {
5373
      return $match[1];
5374
    }
5375
5376
    return self::substr($haystack, self::strlen($match[1]));
5377
  }
5378
5379
  /**
5380
   * Get the string length, not the byte-length!
5381
   *
5382
   * @link     http://php.net/manual/en/function.mb-strlen.php
5383
   *
5384
   * @param string  $str       <p>The string being checked for length.</p>
5385
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5386
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5387
   *
5388
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5389
   *             character counted as +1)</p>
5390
   */
5391
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5392
  {
5393
    $str = (string)$str;
5394
5395
    if (!isset($str[0])) {
5396
      return 0;
5397
    }
5398
5399 View Code Duplication
    if (
5400
        $encoding === 'UTF-8'
5401
        ||
5402
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5403
    ) {
5404
      $encoding = 'UTF-8';
5405
    } else {
5406
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5407
    }
5408
5409
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5410 1
      self::checkForSupport();
5411
    }
5412 1
5413
    switch ($encoding) {
5414
      case 'ASCII':
5415
      case 'CP850':
5416
      case '8BIT':
5417
        if (
5418
            $encoding === 'CP850'
5419
            &&
5420
            self::$SUPPORT['mbstring_func_overload'] === false
5421
        ) {
5422
          return strlen($str);
5423
        }
5424
5425
        return \mb_strlen($str, '8BIT');
5426
    }
5427
5428
    if ($cleanUtf8 === true) {
5429 2
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5430
      // if invalid characters are found in $str
5431 2
      $str = self::clean($str);
5432
    }
5433
5434 View Code Duplication
    if (
5435
        $encoding !== 'UTF-8'
5436
        &&
5437
        self::$SUPPORT['mbstring'] === false
5438
        &&
5439
        self::$SUPPORT['iconv'] === false
5440
    ) {
5441
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5442
    }
5443
5444
    if (
5445
        $encoding !== 'UTF-8'
5446
        &&
5447 1
        self::$SUPPORT['iconv'] === true
5448
        &&
5449 1
        self::$SUPPORT['mbstring'] === false
5450
    ) {
5451
      return \iconv_strlen($str, $encoding);
5452
    }
5453
5454
    if (self::$SUPPORT['mbstring'] === true) {
5455
      return \mb_strlen($str, $encoding);
5456
    }
5457
5458
    if (self::$SUPPORT['iconv'] === true) {
5459
      return \iconv_strlen($str, $encoding);
5460
    }
5461
5462
    if (
5463
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5464
        &&
5465 2
        self::$SUPPORT['intl'] === true
5466
        &&
5467 2
        Bootup::is_php('5.4') === true
5468 2
    ) {
5469
      return \grapheme_strlen($str);
5470 2
    }
5471
5472
    if (self::is_ascii($str)) {
5473
      return strlen($str);
5474
    }
5475
5476
    // fallback via vanilla php
5477
    preg_match_all('/./us', $str, $parts);
5478
    $returnTmp = count($parts[0]);
5479
    if ($returnTmp !== 0) {
5480
      return $returnTmp;
5481
    }
5482
5483 1
    // fallback to "mb_"-function via polyfill
5484
    return \mb_strlen($str, $encoding);
5485 1
  }
5486 1
5487
  /**
5488 1
   * Case insensitive string comparisons using a "natural order" algorithm.
5489 1
   *
5490
   * INFO: natural order version of UTF8::strcasecmp()
5491
   *
5492 1
   * @param string $str1 <p>The first string.</p>
5493 1
   * @param string $str2 <p>The second string.</p>
5494
   *
5495
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5496 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5497
   *             <strong>0</strong> if they are equal
5498
   */
5499
  public static function strnatcasecmp($str1, $str2)
5500
  {
5501
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5502
  }
5503
5504
  /**
5505
   * String comparisons using a "natural order" algorithm
5506
   *
5507
   * INFO: natural order version of UTF8::strcmp()
5508
   *
5509
   * @link  http://php.net/manual/en/function.strnatcmp.php
5510
   *
5511
   * @param string $str1 <p>The first string.</p>
5512
   * @param string $str2 <p>The second string.</p>
5513
   *
5514
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5515 58
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5516
   *             <strong>0</strong> if they are equal
5517 58
   */
5518 58
  public static function strnatcmp($str1, $str2)
5519
  {
5520 58
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5521 3
  }
5522
5523
  /**
5524
   * Case-insensitive string comparison of the first n characters.
5525 57
   *
5526
   * @link  http://php.net/manual/en/function.strncasecmp.php
5527
   *
5528
   * @param string $str1 <p>The first string.</p>
5529 57
   * @param string $str2 <p>The second string.</p>
5530
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5531
   *
5532
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5533 57
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5534
   *             <strong>0</strong> if they are equal
5535
   */
5536 2
  public static function strncasecmp($str1, $str2, $len)
5537 2
  {
5538 2
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5539
  }
5540
5541
  /**
5542 57
   * String comparison of the first n characters.
5543 42
   *
5544 57
   * @link  http://php.net/manual/en/function.strncmp.php
5545 17
   *
5546 17
   * @param string $str1 <p>The first string.</p>
5547 41
   * @param string $str2 <p>The second string.</p>
5548
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5549
   *
5550 57
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5551
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5552
   *             <strong>0</strong> if they are equal
5553
   */
5554
  public static function strncmp($str1, $str2, $len)
5555
  {
5556 57
    $str1 = (string)self::substr($str1, 0, $len);
5557 41
    $str2 = (string)self::substr($str2, 0, $len);
5558 57
5559 41
    return self::strcmp($str1, $str2);
5560
  }
5561
5562
  /**
5563
   * Search a string for any of a set of characters.
5564 17
   *
5565 17
   * @link  http://php.net/manual/en/function.strpbrk.php
5566 17
   *
5567 1
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5568 17
   * @param string $char_list <p>This parameter is case sensitive.</p>
5569
   *
5570
   * @return string String starting from the character found, or false if it is not found.
5571
   */
5572
  public static function strpbrk($haystack, $char_list)
5573
  {
5574 17
    $haystack = (string)$haystack;
5575
    $char_list = (string)$char_list;
5576 17
5577 1
    if (!isset($haystack[0], $char_list[0])) {
5578 17
      return false;
5579
    }
5580 17
5581
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5582
      return substr($haystack, strpos($haystack, $m[0]));
5583
    }
5584
5585
    return false;
5586 17
  }
5587 17
5588
  /**
5589
   * Find position of first occurrence of string in a string.
5590
   *
5591
   * @link http://php.net/manual/en/function.mb-strpos.php
5592
   *
5593
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5594
   * @param string  $needle    <p>The string to find in haystack.</p>
5595
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5596
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5597
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5598
   *
5599
   * @return int|false <p>
5600
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5601
   *                   If needle is not found it returns false.
5602
   *                   </p>
5603
   */
5604
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5605
  {
5606
    $haystack = (string)$haystack;
5607
    $needle = (string)$needle;
5608
5609
    if (!isset($haystack[0], $needle[0])) {
5610
      return false;
5611
    }
5612
5613
    // init
5614
    $offset = (int)$offset;
5615
5616
    // iconv and mbstring do not support integer $needle
5617
5618
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5619
      $needle = (string)self::chr($needle);
5620
    }
5621
5622
    if ($cleanUtf8 === true) {
5623
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5624
      // if invalid characters are found in $haystack before $needle
5625
      $needle = self::clean($needle);
5626
      $haystack = self::clean($haystack);
5627
    }
5628
5629 View Code Duplication
    if (
5630
        $encoding === 'UTF-8'
5631
        ||
5632
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5633
    ) {
5634
      $encoding = 'UTF-8';
5635
    } else {
5636
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5637
    }
5638
5639
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5640
      self::checkForSupport();
5641
    }
5642
5643
    if (
5644
        $encoding === 'CP850'
5645
        &&
5646
        self::$SUPPORT['mbstring_func_overload'] === false
5647
    ) {
5648
      return strpos($haystack, $needle, $offset);
5649
    }
5650
5651 View Code Duplication
    if (
5652
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5653
        &
5654
        self::$SUPPORT['iconv'] === true
5655
        &&
5656
        self::$SUPPORT['mbstring'] === false
5657
    ) {
5658
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5659
    }
5660
5661
    if (
5662
        $offset >= 0 // iconv_strpos() can't handle negative offset
5663
        &&
5664
        $encoding !== 'UTF-8'
5665
        &&
5666
        self::$SUPPORT['mbstring'] === false
5667
        &&
5668 1
        self::$SUPPORT['iconv'] === true
5669
    ) {
5670 1
      // ignore invalid negative offset to keep compatibility
5671 1
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5672 1
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5673
    }
5674 1
5675
    if (self::$SUPPORT['mbstring'] === true) {
5676
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5677
    }
5678
5679
    if (
5680
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5681
        &&
5682 1
        self::$SUPPORT['intl'] === true
5683
        &&
5684
        Bootup::is_php('5.4') === true
5685
    ) {
5686
      return \grapheme_strpos($haystack, $needle, $offset);
5687
    }
5688
5689
    if (
5690
        $offset >= 0 // iconv_strpos() can't handle negative offset
5691
        &&
5692 4
        self::$SUPPORT['iconv'] === true
5693
    ) {
5694 4
      // ignore invalid negative offset to keep compatibility
5695
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5696 4
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5697 2
    }
5698
5699
    $haystackIsAscii = self::is_ascii($haystack);
5700 3
    if ($haystackIsAscii && self::is_ascii($needle)) {
5701
      return strpos($haystack, $needle, $offset);
5702
    }
5703
5704
    // fallback via vanilla php
5705
5706
    if ($haystackIsAscii) {
5707
      $haystackTmp = substr($haystack, $offset);
5708
    } else {
5709
      $haystackTmp = self::substr($haystack, $offset);
5710
    }
5711
    if ($haystackTmp === false) {
5712
      $haystackTmp = '';
5713
    }
5714
    $haystack = (string)$haystackTmp;
5715
5716
    if ($offset < 0) {
5717
      $offset = 0;
5718
    }
5719
5720
    $pos = strpos($haystack, $needle);
5721
    if ($pos === false) {
5722
      return false;
5723
    }
5724
5725
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5726 1
    if ($returnTmp !== false) {
5727
      return $returnTmp;
5728 1
    }
5729 1
5730 1
    // fallback to "mb_"-function via polyfill
5731
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5732 1
  }
5733
5734
  /**
5735
   * Finds the last occurrence of a character in a string within another.
5736
   *
5737
   * @link http://php.net/manual/en/function.mb-strrchr.php
5738
   *
5739 1
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5740
   * @param string $needle        <p>The string to find in haystack</p>
5741
   * @param bool   $before_needle [optional] <p>
5742
   *                              Determines which portion of haystack
5743
   *                              this function returns.
5744
   *                              If set to true, it returns all of haystack
5745
   *                              from the beginning to the last occurrence of needle.
5746
   *                              If set to false, it returns all of haystack
5747
   *                              from the last occurrence of needle to the end,
5748
   *                              </p>
5749
   * @param string $encoding      [optional] <p>
5750
   *                              Character encoding name to use.
5751
   *                              If it is omitted, internal character encoding is used.
5752
   *                              </p>
5753
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5754
   *
5755
   * @return string|false The portion of haystack or false if needle is not found.
5756 1
   */
5757 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5758 1
  {
5759
    if ($encoding !== 'UTF-8') {
5760
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5761
    }
5762
5763 1
    if ($cleanUtf8 === true) {
5764 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5765 1
      // if invalid characters are found in $haystack before $needle
5766
      $needle = self::clean($needle);
5767 1
      $haystack = self::clean($haystack);
5768
    }
5769
5770
    // fallback to "mb_"-function via polyfill
5771
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5772
  }
5773 1
5774
  /**
5775 1
   * Reverses characters order in the string.
5776
   *
5777
   * @param string $str The input string
5778
   *
5779
   * @return string The string with characters in the reverse sequence
5780
   */
5781
  public static function strrev($str)
5782
  {
5783
    $str = (string)$str;
5784 1
5785
    if (!isset($str[0])) {
5786 1
      return '';
5787 1
    }
5788 1
5789
    return implode('', array_reverse(self::split($str)));
5790
  }
5791
5792 1
  /**
5793
   * Finds the last occurrence of a character in a string within another, case insensitive.
5794
   *
5795
   * @link http://php.net/manual/en/function.mb-strrichr.php
5796
   *
5797
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5798 1
   * @param string  $needle        <p>The string to find in haystack.</p>
5799
   * @param bool    $before_needle [optional] <p>
5800 1
   *                               Determines which portion of haystack
5801
   *                               this function returns.
5802
   *                               If set to true, it returns all of haystack
5803
   *                               from the beginning to the last occurrence of needle.
5804 1
   *                               If set to false, it returns all of haystack
5805 1
   *                               from the last occurrence of needle to the end,
5806
   *                               </p>
5807
   * @param string  $encoding      [optional] <p>
5808
   *                               Character encoding name to use.
5809
   *                               If it is omitted, internal character encoding is used.
5810
   *                               </p>
5811
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5812
   *
5813
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5814
   */
5815 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5816
  {
5817
    if ($encoding !== 'UTF-8') {
5818
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5819
    }
5820
5821
    if ($cleanUtf8 === true) {
5822
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5823
      // if invalid characters are found in $haystack before $needle
5824
      $needle = self::clean($needle);
5825
      $haystack = self::clean($haystack);
5826
    }
5827
5828
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5829
  }
5830
5831
  /**
5832
   * Find position of last occurrence of a case-insensitive string.
5833
   *
5834
   * @param string  $haystack  <p>The string to look in.</p>
5835
   * @param string  $needle    <p>The string to look for.</p>
5836
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5837
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5838
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5839
   *
5840 10
   * @return int|false <p>
5841
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5842 10
   *                   not found, it returns false.
5843 2
   *                   </p>
5844 2
   */
5845
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5846
  {
5847 10
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5848 10
      $needle = (string)self::chr($needle);
5849 10
    }
5850
5851 10
    // init
5852 2
    $haystack = (string)$haystack;
5853
    $needle = (string)$needle;
5854
    $offset = (int)$offset;
5855
5856
    if (!isset($haystack[0], $needle[0])) {
5857 9
      return false;
5858
    }
5859 9
5860 View Code Duplication
    if (
5861 3
        $cleanUtf8 === true
5862 3
        ||
5863 3
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5864
    ) {
5865
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5866
5867 9
      $needle = self::clean($needle);
5868 1
      $haystack = self::clean($haystack);
5869 9
    }
5870 9
5871 9 View Code Duplication
    if (
5872 1
        $encoding === 'UTF-8'
5873
        ||
5874
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5875 9
    ) {
5876
      $encoding = 'UTF-8';
5877
    } else {
5878
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5879
    }
5880
5881 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5882 1
      self::checkForSupport();
5883 9
    }
5884
5885 View Code Duplication
    if (
5886
        $encoding !== 'UTF-8'
5887 9
        &&
5888 9
        self::$SUPPORT['mbstring'] === false
5889
    ) {
5890
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5891
    }
5892
5893
    if (self::$SUPPORT['mbstring'] === true) {
5894
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5895
    }
5896
5897
    if (
5898
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5899
        &&
5900
        self::$SUPPORT['intl'] === true
5901
        &&
5902
        Bootup::is_php('5.4') === true
5903
    ) {
5904
      return \grapheme_strripos($haystack, $needle, $offset);
5905
    }
5906
5907
    // fallback via vanilla php
5908
5909
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5910
  }
5911
5912
  /**
5913
   * Find position of last occurrence of a string in a string.
5914
   *
5915
   * @link http://php.net/manual/en/function.mb-strrpos.php
5916
   *
5917
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5918
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5919
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5920
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5921
   *                              the end of the string.
5922
   *                              </p>
5923
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5924
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5925
   *
5926
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5927
   *                   is not found, it returns false.</p>
5928
   */
5929
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5930
  {
5931
    if ((int)$needle === $needle && $needle >= 0) {
5932
      $needle = (string)self::chr($needle);
5933
    }
5934
5935
    // init
5936
    $haystack = (string)$haystack;
5937 10
    $needle = (string)$needle;
5938
    $offset = (int)$offset;
5939 10
5940 2
    if (!isset($haystack[0], $needle[0])) {
5941 2
      return false;
5942
    }
5943
5944 2 View Code Duplication
    if (
5945 2
        $cleanUtf8 === true
5946
        ||
5947 10
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5948 10
    ) {
5949 2
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5950
      $needle = self::clean($needle);
5951
      $haystack = self::clean($haystack);
5952 8
    }
5953
5954 View Code Duplication
    if (
5955
        $encoding === 'UTF-8'
5956
        ||
5957
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5958
    ) {
5959
      $encoding = 'UTF-8';
5960
    } else {
5961
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5962
    }
5963
5964
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5965
      self::checkForSupport();
5966
    }
5967
5968 View Code Duplication
    if (
5969 2
        $encoding !== 'UTF-8'
5970
        &&
5971 2
        self::$SUPPORT['mbstring'] === false
5972 2
    ) {
5973
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5974 2
    }
5975 1
5976
    if (self::$SUPPORT['mbstring'] === true) {
5977
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5978 2
    }
5979
5980
    if (
5981
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5982
        &&
5983
        self::$SUPPORT['intl'] === true
5984
        &&
5985 2
        Bootup::is_php('5.4') === true
5986 1
    ) {
5987 1
      return \grapheme_strrpos($haystack, $needle, $offset);
5988
    }
5989 2
5990
    // fallback via vanilla php
5991
5992
    $haystackTmp = null;
5993
    if ($offset > 0) {
5994
      $haystackTmp = self::substr($haystack, $offset);
5995 2
    } elseif ($offset < 0) {
5996 1
      $haystackTmp = self::substr($haystack, 0, $offset);
5997 2
      $offset = 0;
5998
    }
5999
6000
    if ($haystackTmp !== null) {
6001 2
      if ($haystackTmp === false) {
6002 2
        $haystackTmp = '';
6003
      }
6004
      $haystack = (string)$haystackTmp;
6005
    }
6006
6007
    $pos = strrpos($haystack, $needle);
6008
    if ($pos === false) {
6009
      return false;
6010
    }
6011
6012
    return $offset + self::strlen(substr($haystack, 0, $pos));
6013
  }
6014
6015
  /**
6016
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6017
   * mask.
6018
   *
6019
   * @param string $str    <p>The input string.</p>
6020
   * @param string $mask   <p>The mask of chars</p>
6021
   * @param int    $offset [optional]
6022
   * @param int    $length [optional]
6023
   *
6024
   * @return int
6025
   */
6026
  public static function strspn($str, $mask, $offset = 0, $length = null)
6027
  {
6028 View Code Duplication
    if ($offset || $length !== null) {
6029
      $strTmp = self::substr($str, $offset, $length);
6030
      if ($strTmp === false) {
6031
        $strTmp = '';
6032
      }
6033
      $str = (string)$strTmp;
6034
    }
6035
6036
    $str = (string)$str;
6037
    if (!isset($str[0], $mask[0])) {
6038
      return 0;
6039
    }
6040
6041
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6042 13
  }
6043
6044
  /**
6045 13
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6046
   *
6047 13
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6048 4
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6049
   * @param bool    $before_needle [optional] <p>
6050
   *                               If <b>TRUE</b>, strstr() returns the part of the
6051 12
   *                               haystack before the first occurrence of the needle (excluding the needle).
6052 12
   *                               </p>
6053
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6054 12
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6055 1
   *
6056 1
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6057 1
   */
6058
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6059 12
  {
6060
    $haystack = (string)$haystack;
6061 12
    $needle = (string)$needle;
6062
6063 12
    if (!isset($haystack[0], $needle[0])) {
6064
      return false;
6065 12
    }
6066 1
6067 1
    if ($cleanUtf8 === true) {
6068
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6069
      // if invalid characters are found in $haystack before $needle
6070 12
      $needle = self::clean($needle);
6071 12
      $haystack = self::clean($haystack);
6072
    }
6073 12
6074 1
    if ($encoding !== 'UTF-8') {
6075 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6076
    }
6077 12
6078
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6079
      self::checkForSupport();
6080
    }
6081
6082 View Code Duplication
    if (
6083
        $encoding !== 'UTF-8'
6084
        &&
6085
        self::$SUPPORT['mbstring'] === false
6086
    ) {
6087
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6088
    }
6089
6090
    if (self::$SUPPORT['mbstring'] === true) {
6091
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6092 25
    }
6093
6094
    if (
6095 25
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6096
        &&
6097 25
        self::$SUPPORT['intl'] === true
6098 3
        &&
6099
        Bootup::is_php('5.4') === true
6100
    ) {
6101 23
      return \grapheme_strstr($haystack, $needle, $before_needle);
6102
    }
6103
6104 1
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6105 1
6106
    if (!isset($match[1])) {
6107 23
      return false;
6108 2
    }
6109 2
6110
    if ($before_needle) {
6111 23
      return $match[1];
6112
    }
6113
6114
    return self::substr($haystack, self::strlen($match[1]));
6115
  }
6116
6117
  /**
6118
   * Unicode transformation for case-less matching.
6119
   *
6120
   * @link http://unicode.org/reports/tr21/tr21-5.html
6121
   *
6122
   * @param string  $str       <p>The input string.</p>
6123
   * @param bool    $full      [optional] <p>
6124
   *                           <b>true</b>, replace full case folding chars (default)<br>
6125
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6126
   *                           </p>
6127
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6128
   *
6129
   * @return string
6130
   */
6131
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6132
  {
6133
    // init
6134
    $str = (string)$str;
6135 23
6136
    if (!isset($str[0])) {
6137
      return '';
6138
    }
6139
6140
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6141
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6142
6143
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6144
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6145 3
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6146
    }
6147
6148 3
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6149
6150
    if ($full) {
6151
6152
      static $FULL_CASE_FOLD = null;
6153
6154
      if ($FULL_CASE_FOLD === null) {
6155
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6156
      }
6157
6158
      /** @noinspection OffsetOperationsInspection */
6159
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6160
    }
6161
6162
    if ($cleanUtf8 === true) {
6163 19
      $str = self::clean($str);
6164
    }
6165 19
6166
    return self::strtolower($str);
6167 19
  }
6168 3
6169
  /**
6170
   * Make a string lowercase.
6171 17
   *
6172
   * @link http://php.net/manual/en/function.mb-strtolower.php
6173
   *
6174 1
   * @param string      $str       <p>The string being lowercased.</p>
6175 1
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6176
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6177 17
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6178 2
   *
6179 2
   * @return string str with all alphabetic characters converted to lowercase.
6180
   */
6181 17 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6182
  {
6183
    // init
6184
    $str = (string)$str;
6185
6186
    if (!isset($str[0])) {
6187
      return '';
6188
    }
6189
6190
    if ($cleanUtf8 === true) {
6191
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6192
      // if invalid characters are found in $haystack before $needle
6193
      $str = self::clean($str);
6194
    }
6195
6196
    if ($encoding !== 'UTF-8') {
6197
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6198
    }
6199
6200
    if ($lang !== null) {
6201
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6202
        self::checkForSupport();
6203
      }
6204
6205 17
      if (
6206
          self::$SUPPORT['intl'] === true
6207
          &&
6208
          Bootup::is_php('5.4') === true
6209
      ) {
6210
6211
        $langCode = $lang . '-Lower';
6212
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6213
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6214
6215
          $langCode = 'Any-Lower';
6216
        }
6217
6218
        return transliterator_transliterate($langCode, $str);
6219
      }
6220
6221
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6222 1
    }
6223
6224 1
    return \mb_strtolower($str, $encoding);
6225
  }
6226 1
6227
  /**
6228
   * Generic case sensitive transformation for collation matching.
6229
   *
6230 1
   * @param string $str <p>The input string</p>
6231
   *
6232
   * @return string
6233
   */
6234 1
  private static function strtonatfold($str)
6235 1
  {
6236 1
    /** @noinspection PhpUndefinedClassInspection */
6237 1
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6238 1
  }
6239
6240 1
  /**
6241 1
   * Make a string uppercase.
6242 1
   *
6243 1
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6244 1
   *
6245
   * @param string      $str       <p>The string being uppercased.</p>
6246 1
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6247 1
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6248
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6249 1
   *
6250 1
   * @return string str with all alphabetic characters converted to uppercase.
6251
   */
6252 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6253 1
  {
6254
    $str = (string)$str;
6255
6256
    if (!isset($str[0])) {
6257
      return '';
6258
    }
6259
6260
    if ($cleanUtf8 === true) {
6261
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6262
      // if invalid characters are found in $haystack before $needle
6263
      $str = self::clean($str);
6264
    }
6265 1
6266
    if ($encoding !== 'UTF-8') {
6267 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6268 1
    }
6269 1
6270
    if ($lang !== null) {
6271 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6272
        self::checkForSupport();
6273
      }
6274 1
6275 1
      if (
6276
          self::$SUPPORT['intl'] === true
6277
          &&
6278 1
          Bootup::is_php('5.4') === true
6279
      ) {
6280
6281
        $langCode = $lang . '-Upper';
6282
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6283
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6284
6285
          $langCode = 'Any-Upper';
6286
        }
6287
6288
        return transliterator_transliterate($langCode, $str);
6289
      }
6290
6291 1
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6292
    }
6293 1
6294
    return \mb_strtoupper($str, $encoding);
6295
  }
6296
6297
  /**
6298
   * Translate characters or replace sub-strings.
6299 1
   *
6300
   * @link  http://php.net/manual/en/function.strtr.php
6301 1
   *
6302
   * @param string          $str  <p>The string being translated.</p>
6303
   * @param string|string[] $from <p>The string replacing from.</p>
6304
   * @param string|string[] $to   <p>The string being translated to to.</p>
6305 1
   *
6306 1
   * @return string <p>
6307 1
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6308 1
   *                corresponding character in to.
6309 1
   *                </p>
6310 1
   */
6311
  public static function strtr($str, $from, $to = INF)
6312
  {
6313 1
    $str = (string)$str;
6314 1
6315
    if (!isset($str[0])) {
6316 1
      return '';
6317
    }
6318
6319
    if ($from === $to) {
6320
      return $str;
6321
    }
6322
6323
    if (INF !== $to) {
6324
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6324 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6325
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6325 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6326
      $countFrom = count($from);
6327
      $countTo = count($to);
6328
6329
      if ($countFrom > $countTo) {
6330
        $from = array_slice($from, 0, $countTo);
6331
      } elseif ($countFrom < $countTo) {
6332
        $to = array_slice($to, 0, $countFrom);
6333
      }
6334 76
6335
      $from = array_combine($from, $to);
6336
    }
6337 76
6338
    if (is_string($from)) {
6339 76
      return str_replace($from, '', $str);
6340 10
    }
6341
6342
    return strtr($str, $from);
6343
  }
6344 74
6345 3
  /**
6346
   * Return the width of a string.
6347
   *
6348 73
   * @param string  $str       <p>The input string.</p>
6349
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6350
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6351 1
   *
6352 1
   * @return int
6353
   */
6354
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6355 73
  {
6356 1
    if ($encoding !== 'UTF-8') {
6357
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6358
    }
6359 72
6360 72
    if ($cleanUtf8 === true) {
6361 49
      // iconv and mbstring are not tolerant to invalid encoding
6362 49
      // further, their behaviour is inconsistent with that of PHP's substr
6363
      $str = self::clean($str);
6364
    }
6365 72
6366 2
    // fallback to "mb_"-function via polyfill
6367
    return \mb_strwidth($str, $encoding);
6368
  }
6369 70
6370 34
  /**
6371 34
   * Changes all keys in an array.
6372 59
   *
6373
   * @param array $array <p>The array to work on</p>
6374
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6375
   *                     or <strong>CASE_LOWER</strong> (default)</p>
6376
   *
6377 70
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6378 23
   *                     input is not an array.</p>
6379 70
   */
6380 49
  public static function array_change_key_case($array, $case = CASE_LOWER)
6381 49
  {
6382 22
    if (!is_array($array)) {
6383
      return false;
6384
    }
6385 70
6386
    if (
6387
        $case !== CASE_LOWER
6388
        &&
6389
        $case !== CASE_UPPER
6390
    ) {
6391 70
      $case = CASE_UPPER;
6392 21
    }
6393 70
6394 21
    $return = array();
6395
    foreach ($array as $key => $value) {
6396
      if ($case === CASE_LOWER) {
6397
        $key = self::strtolower($key);
6398
      } else {
6399 49
        $key = self::strtoupper($key);
6400 1
      }
6401 49
6402
      $return[$key] = $value;
6403
    }
6404
6405 49
    return $return;
6406 49
  }
6407
6408
  /**
6409
   * Get part of a string.
6410
   *
6411
   * @link http://php.net/manual/en/function.mb-substr.php
6412
   *
6413
   * @param string  $str       <p>The string being checked.</p>
6414
   * @param int     $offset    <p>The first position used in str.</p>
6415
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6416
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6417
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6418
   *
6419
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6420
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6421
   *                      characters long, <b>FALSE</b> will be returned.</p>
6422
   */
6423
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6424
  {
6425
    // init
6426
    $str = (string)$str;
6427
6428
    if (!isset($str[0])) {
6429
      return '';
6430
    }
6431
6432
    // Empty string
6433
    if ($length === 0) {
6434
      return '';
6435
    }
6436
6437
    if ($cleanUtf8 === true) {
6438
      // iconv and mbstring are not tolerant to invalid encoding
6439
      // further, their behaviour is inconsistent with that of PHP's substr
6440
      $str = self::clean($str);
6441
    }
6442
6443
    // Whole string
6444
    if (!$offset && $length === null) {
6445
      return $str;
6446
    }
6447
6448
    $str_length = 0;
6449
    if ($offset || $length === null) {
6450
      $str_length = (int)self::strlen($str, $encoding);
6451
    }
6452
6453
    // Impossible
6454
    if ($offset && $offset > $str_length) {
6455
      return false;
6456
    }
6457
6458
    if ($length === null) {
6459
      $length = $str_length;
6460 1
    } else {
6461
      $length = (int)$length;
6462
    }
6463
6464 1 View Code Duplication
    if (
6465
        $encoding === 'UTF-8'
6466 1
        ||
6467 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6468 1
    ) {
6469
      $encoding = 'UTF-8';
6470
    } else {
6471 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6472
    }
6473 1
6474 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6475
      self::checkForSupport();
6476
    }
6477 1
6478 1
    if (
6479
        $encoding === 'CP850'
6480 1
        &&
6481 1
        self::$SUPPORT['mbstring_func_overload'] === false
6482
    ) {
6483
      return substr($str, $offset, $length === null ? $str_length : $length);
6484 1
    }
6485
6486 View Code Duplication
    if (
6487
        $encoding !== 'UTF-8'
6488
        &&
6489
        self::$SUPPORT['mbstring'] === false
6490
    ) {
6491
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6492
    }
6493
6494
    if (self::$SUPPORT['mbstring'] === true) {
6495
      return \mb_substr($str, $offset, $length, $encoding);
6496
    }
6497
6498
    if (
6499
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6500
        &&
6501
        self::$SUPPORT['intl'] === true
6502
        &&
6503
        Bootup::is_php('5.4') === true
6504
    ) {
6505 1
      return \grapheme_substr($str, $offset, $length);
6506
    }
6507
6508 1
    if (
6509 1
        $length >= 0 // "iconv_substr()" can't handle negative length
6510
        &&
6511 1
        self::$SUPPORT['iconv'] === true
6512 1
    ) {
6513
      return \iconv_substr($str, $offset, $length);
6514
    }
6515 1
6516
    if (self::is_ascii($str)) {
6517 1
      return ($length === null) ?
6518 1
          substr($str, $offset) :
6519 1
          substr($str, $offset, $length);
6520
    }
6521 1
6522 1
    // fallback via vanilla php
6523
6524
    // split to array, and remove invalid characters
6525
    $array = self::split($str);
6526
6527 1
    // extract relevant part, and join to make sting again
6528
    return implode('', array_slice($array, $offset, $length));
6529 1
  }
6530 1
6531 1
  /**
6532 1
   * Binary safe comparison of two strings from an offset, up to length characters.
6533 1
   *
6534 1
   * @param string  $str1               <p>The main string being compared.</p>
6535 1
   * @param string  $str2               <p>The secondary string being compared.</p>
6536
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6537
   *                                    counting from the end of the string.</p>
6538 1
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6539 1
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6540
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6541
   *                                    insensitive.</p>
6542 1
   *
6543 1
   * @return int <p>
6544
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6545 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6546 1
   *             <strong>0</strong> if they are equal.
6547 1
   *             </p>
6548
   */
6549 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6550
  {
6551
    if (
6552
        $offset !== 0
6553
        ||
6554
        $length !== null
6555
    ) {
6556 1
      $str1Tmp = self::substr($str1, $offset, $length);
6557
      if ($str1Tmp === false) {
6558
        $str1Tmp = '';
6559
      }
6560
      $str1 = (string)$str1Tmp;
6561
6562 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6563 1
      if ($str2Tmp === false) {
6564 1
        $str2Tmp = '';
6565
      }
6566
      $str2 = (string)$str2Tmp;
6567
    }
6568 1
6569 1
    if ($case_insensitivity === true) {
6570
      return self::strcasecmp($str1, $str2);
6571
    }
6572
6573
    return self::strcmp($str1, $str2);
6574
  }
6575
6576
  /**
6577
   * Count the number of substring occurrences.
6578
   *
6579
   * @link  http://php.net/manual/en/function.substr-count.php
6580
   *
6581
   * @param string  $haystack  <p>The string to search in.</p>
6582
   * @param string  $needle    <p>The substring to search for.</p>
6583
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6584
   * @param int     $length    [optional] <p>
6585 1
   *                           The maximum length after the specified offset to search for the
6586
   *                           substring. It outputs a warning if the offset plus the length is
6587
   *                           greater than the haystack length.
6588 1
   *                           </p>
6589 1
   * @param string  $encoding  <p>Set the charset.</p>
6590
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6591 1
   *
6592 1
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6593
   */
6594
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6595 1
  {
6596 1
    // init
6597
    $haystack = (string)$haystack;
6598
    $needle = (string)$needle;
6599 1
6600 1
    if (!isset($haystack[0], $needle[0])) {
6601 1
      return false;
6602
    }
6603
6604 1
    if ($offset || $length !== null) {
6605 1
6606
      if ($length === null) {
6607 1
        $length = (int)self::strlen($haystack);
6608
      }
6609
6610
      $offset = (int)$offset;
6611
      $length = (int)$length;
6612
6613
      if (
6614
          (
6615
              $length !== 0
6616
              &&
6617
              $offset !== 0
6618 1
          )
6619
          &&
6620
          $length + $offset <= 0
6621 1
          &&
6622 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6623
      ) {
6624 1
        return false;
6625 1
      }
6626
6627
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6628 1
      if ($haystackTmp === false) {
6629 1
        $haystackTmp = '';
6630
      }
6631
      $haystack = (string)$haystackTmp;
6632 1
    }
6633 1
6634 1
    if ($encoding !== 'UTF-8') {
6635
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6636
    }
6637 1
6638 1
    if ($cleanUtf8 === true) {
6639
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6640 1
      // if invalid characters are found in $haystack before $needle
6641
      $needle = self::clean($needle);
6642
      $haystack = self::clean($haystack);
6643
    }
6644
6645
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6646
      self::checkForSupport();
6647
    }
6648
6649 View Code Duplication
    if (
6650
        $encoding !== 'UTF-8'
6651 1
        &&
6652
        self::$SUPPORT['mbstring'] === false
6653
    ) {
6654 1
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6655 1
    }
6656
6657 1
    if (self::$SUPPORT['mbstring'] === true) {
6658 1
      return \mb_substr_count($haystack, $needle, $encoding);
6659
    }
6660
6661 1
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6662 1
6663
    return count($matches);
6664
  }
6665 1
6666 1
  /**
6667 1
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6668
   *
6669
   * @param string $haystack <p>The string to search in.</p>
6670 1
   * @param string $needle   <p>The substring to search for.</p>
6671 1
   *
6672
   * @return string <p>Return the sub-string.</p>
6673 1
   */
6674 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6675
  {
6676
    // init
6677
    $haystack = (string)$haystack;
6678
    $needle = (string)$needle;
6679
6680
    if (!isset($haystack[0])) {
6681
      return '';
6682
    }
6683
6684
    if (!isset($needle[0])) {
6685
      return $haystack;
6686
    }
6687
6688
    if (self::str_istarts_with($haystack, $needle) === true) {
6689
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6690
      if ($haystackTmp === false) {
6691
        $haystackTmp = '';
6692
      }
6693
      $haystack = (string)$haystackTmp;
6694
    }
6695
6696
    return $haystack;
6697
  }
6698
6699
  /**
6700 7
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6701
   *
6702 7
   * @param string $haystack <p>The string to search in.</p>
6703 1
   * @param string $needle   <p>The substring to search for.</p>
6704
   *
6705
   * @return string <p>Return the sub-string.</p>
6706 1
   */
6707 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6708 1
  {
6709 1
    // init
6710
    $haystack = (string)$haystack;
6711
    $needle = (string)$needle;
6712
6713 1
    if (!isset($haystack[0])) {
6714 1
      return '';
6715 1
    }
6716 1
6717 1
    if (!isset($needle[0])) {
6718 1
      return $haystack;
6719 1
    }
6720 1
6721
    if (self::str_iends_with($haystack, $needle) === true) {
6722
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6723
      if ($haystackTmp === false) {
6724 1
        $haystackTmp = '';
6725 1
      }
6726 1
      $haystack = (string)$haystackTmp;
6727 1
    }
6728 1
6729 1
    return $haystack;
6730 1
  }
6731 1
6732
  /**
6733
   * Removes an prefix ($needle) from start of the string ($haystack).
6734 1
   *
6735 1
   * @param string $haystack <p>The string to search in.</p>
6736 1
   * @param string $needle   <p>The substring to search for.</p>
6737 1
   *
6738
   * @return string <p>Return the sub-string.</p>
6739
   */
6740 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6741 1
  {
6742
    // init
6743
    $haystack = (string)$haystack;
6744 7
    $needle = (string)$needle;
6745 1
6746 1
    if (!isset($haystack[0])) {
6747 1
      return '';
6748 1
    }
6749
6750 1
    if (!isset($needle[0])) {
6751
      return $haystack;
6752
    }
6753 7
6754 7
    if (self::str_starts_with($haystack, $needle) === true) {
6755
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6756 7
      if ($haystackTmp === false) {
6757 1
        $haystackTmp = '';
6758
      }
6759
      $haystack = (string)$haystackTmp;
6760 6
    }
6761 3
6762 3
    return $haystack;
6763 3
  }
6764
6765
  /**
6766 5
   * Replace text within a portion of a string.
6767 5
   *
6768
   * source: https://gist.github.com/stemar/8287074
6769 5
   *
6770 3
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6771 3
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6772
   * @param int|int[]       $offset           <p>
6773 5
   *                                          If start is positive, the replacing will begin at the start'th offset
6774
   *                                          into string.
6775 5
   *                                          <br><br>
6776
   *                                          If start is negative, the replacing will begin at the start'th character
6777
   *                                          from the end of string.
6778
   *                                          </p>
6779
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6780
   *                                          portion of string which is to be replaced. If it is negative, it
6781
   *                                          represents the number of characters from the end of string at which to
6782
   *                                          stop replacing. If it is not given, then it will default to strlen(
6783
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6784
   *                                          length is zero then this function will have the effect of inserting
6785
   *                                          replacement into string at the given start offset.</p>
6786 1
   *
6787
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6788 1
   */
6789 1
  public static function substr_replace($str, $replacement, $offset, $length = null)
6790
  {
6791 1
    if (is_array($str) === true) {
6792 1
      $num = count($str);
6793
6794
      // the replacement
6795 1
      if (is_array($replacement) === true) {
6796 1
        $replacement = array_slice($replacement, 0, $num);
6797
      } else {
6798
        $replacement = array_pad(array($replacement), $num, $replacement);
6799 1
      }
6800 1
6801 1
      // the offset
6802
      if (is_array($offset) === true) {
6803
        $offset = array_slice($offset, 0, $num);
6804 1
        foreach ($offset as &$valueTmp) {
6805 1
          if (!$valueTmp || (int)$valueTmp !== $valueTmp) {
6806
            $valueTmp = 0;
6807 1
          }
6808
        }
6809
        unset($valueTmp);
6810
      } else {
6811
        $offset = array_pad(array($offset), $num, $offset);
6812
      }
6813
6814
      // the length
6815
      if (null === $length) {
6816
        $length = array_fill(0, $num, 0);
6817
      } elseif (is_array($length) === true) {
6818
        $length = array_slice($length, 0, $num);
6819 1
        foreach ($length as &$valueTmpV2) {
6820
          if (!$valueTmpV2) {
6821 1
            $valueTmpV2 = 0;
6822
          } else {
6823 1
            if ((int)$valueTmpV2 !== $valueTmpV2) {
6824 1
              $valueTmpV2 = $num;
6825
            }
6826
          }
6827 1
        }
6828 1
        unset($valueTmpV2);
6829 1
      } else {
6830
        $length = array_pad(array($length), $num, $length);
6831 1
      }
6832
6833
      // recursive call
6834 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6835 1
    }
6836
6837 1
    if (is_array($replacement) === true) {
6838 1
      if (count($replacement) > 0) {
6839
        $replacement = $replacement[0];
6840 1
      } else {
6841
        $replacement = '';
6842 1
      }
6843 1
    }
6844
6845
    // init
6846 1
    $str = (string)$str;
6847 1
    $replacement = (string)$replacement;
6848
6849 1
    if (!isset($str[0])) {
6850
      return $replacement;
6851 1
    }
6852
6853
    if (self::is_ascii($str)) {
6854
      return ($length === null) ?
6855
          substr_replace($str, $replacement, $offset) :
6856
          substr_replace($str, $replacement, $offset, $length);
6857
    }
6858
6859
    preg_match_all('/./us', $str, $smatches);
6860
    preg_match_all('/./us', $replacement, $rmatches);
6861
6862
    if ($length === null) {
6863
      $length = (int)self::strlen($str);
6864
    }
6865
6866
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6867
6868
    return implode('', $smatches[0]);
6869
  }
6870
6871
  /**
6872
   * Removes an suffix ($needle) from end of the string ($haystack).
6873
   *
6874
   * @param string $haystack <p>The string to search in.</p>
6875
   * @param string $needle   <p>The substring to search for.</p>
6876
   *
6877
   * @return string <p>Return the sub-string.</p>
6878
   */
6879 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6880
  {
6881
    $haystack = (string)$haystack;
6882
    $needle = (string)$needle;
6883
6884
    if (!isset($haystack[0])) {
6885
      return '';
6886
    }
6887
6888
    if (!isset($needle[0])) {
6889
      return $haystack;
6890
    }
6891
6892
    if (self::str_ends_with($haystack, $needle) === true) {
6893
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6894
      if ($haystackTmp === false) {
6895
        $haystackTmp = '';
6896
      }
6897
      $haystack = (string)$haystackTmp;
6898
    }
6899
6900
    return $haystack;
6901
  }
6902
6903
  /**
6904
   * Returns a case swapped version of the string.
6905
   *
6906
   * @param string  $str       <p>The input string.</p>
6907
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6908
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6909
   *
6910
   * @return string <p>Each character's case swapped.</p>
6911
   */
6912
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6913
  {
6914
    $str = (string)$str;
6915
6916
    if (!isset($str[0])) {
6917
      return '';
6918
    }
6919
6920
    if ($encoding !== 'UTF-8') {
6921
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6922
    }
6923
6924
    if ($cleanUtf8 === true) {
6925
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6926
      // if invalid characters are found in $haystack before $needle
6927
      $str = self::clean($str);
6928
    }
6929
6930 21
    $strSwappedCase = preg_replace_callback(
6931
        '/[\S]/u',
6932 21
        function ($match) use ($encoding) {
6933
          $marchToUpper = self::strtoupper($match[0], $encoding);
6934
6935 21
          if ($match[0] === $marchToUpper) {
6936
            return self::strtolower($match[0], $encoding);
6937 21
          }
6938 4
6939
          return $marchToUpper;
6940
        },
6941
        $str
6942 18
    );
6943 6
6944
    return $strSwappedCase;
6945
  }
6946 13
6947
  /**
6948
   * alias for "UTF8::to_ascii()"
6949 13
   *
6950 7
   * @see UTF8::to_ascii()
6951
   *
6952
   * @param string $s
6953 7
   * @param string $subst_chr
6954
   * @param bool   $strict
6955
   *
6956
   * @return string
6957
   *
6958
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6959
   */
6960
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6961
  {
6962
    return self::to_ascii($s, $subst_chr, $strict);
6963
  }
6964
6965
  /**
6966
   * alias for "UTF8::to_iso8859()"
6967
   *
6968
   * @see UTF8::to_iso8859()
6969
   *
6970
   * @param string $str
6971
   *
6972
   * @return string|string[]
6973
   *
6974
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6975
   */
6976
  public static function toIso8859($str)
6977
  {
6978
    return self::to_iso8859($str);
6979
  }
6980
6981 7
  /**
6982 7
   * alias for "UTF8::to_latin1()"
6983 7
   *
6984
   * @see UTF8::to_latin1()
6985 7
   *
6986
   * @param $str
6987 7
   *
6988 7
   * @return string
6989
   *
6990
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6991 7
   */
6992
  public static function toLatin1($str)
6993
  {
6994 7
    return self::to_latin1($str);
6995 7
  }
6996 7
6997
  /**
6998 7
   * alias for "UTF8::to_utf8()"
6999 2
   *
7000
   * @see UTF8::to_utf8()
7001 2
   *
7002 2
   * @param string $str
7003 2
   *
7004
   * @return string
7005 2
   *
7006 1
   * @deprecated <p>use "UTF8::to_utf8()"</p>
7007
   */
7008 1
  public static function toUTF8($str)
7009 1
  {
7010 1
    return self::to_utf8($str);
7011
  }
7012 1
7013
  /**
7014
   * Convert a string into ASCII.
7015
   *
7016
   * @param string $str     <p>The input string.</p>
7017
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7018
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7019
   *                        performance</p>
7020
   *
7021
   * @return string
7022
   */
7023
  public static function to_ascii($str, $unknown = '?', $strict = false)
7024
  {
7025
    static $UTF8_TO_ASCII;
7026
7027 1
    // init
7028 2
    $str = (string)$str;
7029
7030 7
    if (!isset($str[0])) {
7031
      return '';
7032
    }
7033
7034
    // check if we only have ASCII, first (better performance)
7035 7
    if (self::is_ascii($str) === true) {
7036
      return $str;
7037
    }
7038
7039
    $str = self::clean($str, true, true, true);
7040 7
7041 7
    // check again, if we only have ASCII, now ...
7042 3
    if (self::is_ascii($str) === true) {
7043 3
      return $str;
7044 1
    }
7045 1
7046 3
    if ($strict === true) {
7047
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7048 7
        self::checkForSupport();
7049
      }
7050 7
7051
      if (
7052
          self::$SUPPORT['intl'] === true
7053
          &&
7054
          Bootup::is_php('5.4') === true
7055
      ) {
7056
7057
        // HACK for issue from "transliterator_transliterate()"
7058
        $str = str_replace(
7059
            'ℌ',
7060
            'H',
7061
            $str
7062 7
        );
7063 7
7064
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7065
7066
        // check again, if we only have ASCII, now ...
7067
        if (self::is_ascii($str) === true) {
7068
          return $str;
7069
        }
7070
7071
      }
7072
    }
7073
7074 1
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7075
    $chars = $ar[0];
7076 7
    $ord = null;
7077
    foreach ($chars as &$c) {
7078 7
7079
      $ordC0 = ord($c[0]);
7080
7081
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7082
        continue;
7083
      }
7084
7085
      $ordC1 = ord($c[1]);
7086
7087
      // ASCII - next please
7088 3
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7089
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7090 3
      }
7091
7092
      if ($ordC0 >= 224) {
7093 1
        $ordC2 = ord($c[2]);
7094
7095
        if ($ordC0 <= 239) {
7096 1
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7097 1
        }
7098
7099 1
        if ($ordC0 >= 240) {
7100
          $ordC3 = ord($c[3]);
7101
7102 3
          if ($ordC0 <= 247) {
7103
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7104 3
          }
7105 1
7106
          if ($ordC0 >= 248) {
7107
            $ordC4 = ord($c[4]);
7108 3
7109 View Code Duplication
            if ($ordC0 <= 251) {
7110
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7111
            }
7112
7113
            if ($ordC0 >= 252) {
7114
              $ordC5 = ord($c[5]);
7115
7116 View Code Duplication
              if ($ordC0 <= 253) {
7117
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7118
              }
7119
            }
7120 1
          }
7121
        }
7122 1
      }
7123
7124
      if ($ordC0 === 254 || $ordC0 === 255) {
7125
        $c = $unknown;
7126
        continue;
7127
      }
7128
7129
      if (null === $ord) {
7130
        $c = $unknown;
7131
        continue;
7132
      }
7133
7134
      $bank = $ord >> 8;
7135
      if (!isset($UTF8_TO_ASCII[$bank])) {
7136
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7137
        if ($UTF8_TO_ASCII[$bank] === false) {
7138
          $UTF8_TO_ASCII[$bank] = array();
7139
        }
7140 22
      }
7141
7142 22
      $newchar = $ord & 255;
7143
7144 2
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7145
7146
        // keep for debugging
7147 2
        /*
7148 2
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7149
        echo "char: " . $c . "\n";
7150 2
        echo "ord: " . $ord . "\n";
7151
        echo "newchar: " . $newchar . "\n";
7152
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7153 22
        echo "bank:" . $bank . "\n\n";
7154
        */
7155 22
7156 3
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7157
      } else {
7158
7159 22
        // keep for debugging missing chars
7160
        /*
7161
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7162
        echo "char: " . $c . "\n";
7163 22
        echo "ord: " . $ord . "\n";
7164
        echo "newchar: " . $newchar . "\n";
7165
        echo "bank:" . $bank . "\n\n";
7166 22
        */
7167
7168
        $c = $unknown;
7169 22
      }
7170
    }
7171
7172 22
    return implode('', $chars);
7173 22
  }
7174
7175 22
  /**
7176
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7177 22
   *
7178
   * @param string|string[] $str
7179 20
   *
7180
   * @return string|string[]
7181 20
   */
7182 18
  public static function to_iso8859($str)
7183 18
  {
7184 18
    if (is_array($str) === true) {
7185 8
7186
      /** @noinspection ForeachSourceInspection */
7187
      foreach ($str as $k => $v) {
7188 22
        /** @noinspection AlterInForeachInspection */
7189
        /** @noinspection OffsetOperationsInspection */
7190 21
        $str[$k] = self::to_iso8859($v);
7191 21
      }
7192
7193 21
      return $str;
7194 15
    }
7195 15
7196 15
    $str = (string)$str;
7197 11
7198
    if (!isset($str[0])) {
7199
      return '';
7200 22
    }
7201
7202 12
    return self::utf8_decode($str);
7203 12
  }
7204 12
7205
  /**
7206 12
   * alias for "UTF8::to_iso8859()"
7207 5
   *
7208 5
   * @see UTF8::to_iso8859()
7209 5
   *
7210 9
   * @param string|string[] $str
7211
   *
7212
   * @return string|string[]
7213 12
   */
7214 9
  public static function to_latin1($str)
7215
  {
7216
    return self::to_iso8859($str);
7217 22
  }
7218
7219 5
  /**
7220
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7221 5
   *
7222 20
   * <ul>
7223
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7224 22
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7225
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7226
   * case.</li>
7227 22
   * </ul>
7228 22
   *
7229 22
   * @param string|string[] $str                    <p>Any string or array.</p>
7230 4
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7231 22
   *
7232
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7233 22
   */
7234
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7235
  {
7236 22
    if (is_array($str) === true) {
7237 1
      /** @noinspection ForeachSourceInspection */
7238 1
      foreach ($str as $k => $v) {
7239
        /** @noinspection AlterInForeachInspection */
7240 22
        /** @noinspection OffsetOperationsInspection */
7241
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7242
      }
7243
7244
      return $str;
7245
    }
7246
7247
    $str = (string)$str;
7248 16
7249
    if (!isset($str[0])) {
7250 16
      return $str;
7251
    }
7252 16
7253 16
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7254 2
      self::checkForSupport();
7255 2
    }
7256 16
7257 16 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7258 16
      $max = \mb_strlen($str, '8BIT');
7259
    } else {
7260
      $max = strlen($str);
7261 16
    }
7262
7263
    $buf = '';
7264
7265
    /** @noinspection ForeachInvariantsInspection */
7266
    for ($i = 0; $i < $max; $i++) {
7267
      $c1 = $str[$i];
7268
7269
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7270
7271
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7272
7273
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7274
7275
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7276
            $buf .= $c1 . $c2;
7277 26
            $i++;
7278
          } else { // not valid UTF8 - convert it
7279 26
            $buf .= self::to_utf8_convert($c1);
7280
          }
7281 26
7282 5
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7283
7284
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7285
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7286 22
7287 6
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7288
            $buf .= $c1 . $c2 . $c3;
7289
            $i += 2;
7290 16
          } else { // not valid UTF8 - convert it
7291
            $buf .= self::to_utf8_convert($c1);
7292
          }
7293
7294
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7295
7296
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7297
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7298
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7299
7300
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7301
            $buf .= $c1 . $c2 . $c3 . $c4;
7302 14
            $i += 3;
7303
          } else { // not valid UTF8 - convert it
7304 14
            $buf .= self::to_utf8_convert($c1);
7305
          }
7306
7307
        } else { // doesn't look like UTF8, but should be converted
7308
          $buf .= self::to_utf8_convert($c1);
7309
        }
7310 14
7311 14
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7312
7313
        $buf .= self::to_utf8_convert($c1);
7314
7315 14
      } else { // it doesn't need conversion
7316 14
        $buf .= $c1;
7317 14
      }
7318
    }
7319 14
7320
    // decode unicode escape sequences
7321 14
    $buf = preg_replace_callback(
7322
        '/\\\\u([0-9a-f]{4})/i',
7323
        function ($match) {
7324
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7325
        },
7326
        $buf
7327
    );
7328
7329
    // decode UTF-8 codepoints
7330
    if ($decodeHtmlEntityToUtf8 === true) {
7331
      $buf = self::html_entity_decode($buf);
7332
    }
7333
7334
    return $buf;
7335 1
  }
7336
7337 1
  /**
7338
   * @param int $int
7339
   *
7340
   * @return string
7341
   */
7342
  private static function to_utf8_convert($int)
7343
  {
7344
    $buf = '';
7345
7346
    $ordC1 = ord($int);
7347
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7348
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7349
    } else {
7350
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7351 8
      $cc2 = ($int & "\x3F") | "\x80";
7352
      $buf .= $cc1 . $cc2;
7353 8
    }
7354 2
7355
    return $buf;
7356
  }
7357
7358
  /**
7359
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7360 7
   *
7361
   * INFO: This is slower then "trim()"
7362
   *
7363
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7364
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7365
   *
7366 7
   * @param string $str   <p>The string to be trimmed</p>
7367
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7368
   *
7369
   * @return string <p>The trimmed string.</p>
7370 7
   */
7371 7
  public static function trim($str = '', $chars = INF)
7372 7
  {
7373
    $str = (string)$str;
7374
7375
    if (!isset($str[0])) {
7376 7
      return '';
7377 7
    }
7378
7379 7
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7380 1
    if ($chars === INF || !$chars) {
7381 1
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7382 7
    }
7383
7384
    return self::rtrim(self::ltrim($str, $chars), $chars);
7385 7
  }
7386
7387 7
  /**
7388 7
   * Makes string's first char uppercase.
7389
   *
7390
   * @param string  $str       <p>The input string.</p>
7391
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7392
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7393 7
   *
7394
   * @return string <p>The resulting string</p>
7395
   */
7396 1
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7397 1
  {
7398 1
    if ($cleanUtf8 === true) {
7399 7
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7400 7
      // if invalid characters are found in $haystack before $needle
7401 7
      $str = self::clean($str);
7402
    }
7403 7
7404 7
    $strPartTwo = self::substr($str, 1, null, $encoding);
7405
    if ($strPartTwo === false) {
7406 7
      $strPartTwo = '';
7407
    }
7408
7409
    $strPartOne = self::strtoupper(
7410
        (string)self::substr($str, 0, 1, $encoding),
7411
        $encoding,
7412
        $cleanUtf8
7413
    );
7414
7415
    return $strPartOne . $strPartTwo;
7416
  }
7417
7418
  /**
7419
   * alias for "UTF8::ucfirst()"
7420
   *
7421
   * @see UTF8::ucfirst()
7422
   *
7423
   * @param string  $word
7424
   * @param string  $encoding
7425
   * @param boolean $cleanUtf8
7426
   *
7427
   * @return string
7428 1
   */
7429
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7430 1
  {
7431
    return self::ucfirst($word, $encoding, $cleanUtf8);
7432 1
  }
7433 1
7434
  /**
7435
   * Uppercase for all words in the string.
7436 1
   *
7437 1
   * @param string   $str        <p>The input string.</p>
7438 1
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7439 1
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7440
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7441 1
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7442
   *
7443
   * @return string
7444 1
   */
7445
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7446 1
  {
7447 1
    if (!$str) {
7448 1
      return '';
7449 1
    }
7450
7451 1
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7452 1
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7453 1
7454
    if ($cleanUtf8 === true) {
7455 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7456
      // if invalid characters are found in $haystack before $needle
7457 1
      $str = self::clean($str);
7458
    }
7459
7460
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7461
7462
    if (
7463
        $usePhpDefaultFunctions === true
7464
        &&
7465
        self::is_ascii($str) === true
7466
    ) {
7467
      return ucwords($str);
7468
    }
7469
7470
    $words = self::str_to_words($str, $charlist);
7471
    $newWords = array();
7472
7473
    if (count($exceptions) > 0) {
7474
      $useExceptions = true;
7475
    } else {
7476
      $useExceptions = false;
7477
    }
7478
7479 View Code Duplication
    foreach ($words as $word) {
7480
7481
      if (!$word) {
7482
        continue;
7483
      }
7484
7485
      if (
7486
          $useExceptions === false
7487
          ||
7488
          (
7489
              $useExceptions === true
7490
              &&
7491
              !in_array($word, $exceptions, true)
7492
          )
7493
      ) {
7494
        $word = self::ucfirst($word, $encoding);
7495
      }
7496
7497
      $newWords[] = $word;
7498
    }
7499
7500
    return implode('', $newWords);
7501
  }
7502
7503
  /**
7504
   * Multi decode html entity & fix urlencoded-win1252-chars.
7505
   *
7506
   * e.g:
7507
   * 'test+test'                     => 'test test'
7508
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7509
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7510
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7511
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7512
   * 'Düsseldorf'                   => 'Düsseldorf'
7513
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7514
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7515
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7516
   *
7517
   * @param string $str          <p>The input string.</p>
7518
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7519
   *
7520
   * @return string
7521
   */
7522 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7523
  {
7524
    $str = (string)$str;
7525
7526
    if (!isset($str[0])) {
7527
      return '';
7528
    }
7529
7530
    $pattern = '/%u([0-9a-f]{3,4})/i';
7531
    if (preg_match($pattern, $str)) {
7532
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7533
    }
7534
7535
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7536
7537
    do {
7538
      $str_compare = $str;
7539
7540
      $str = self::fix_simple_utf8(
7541
          urldecode(
7542
              self::html_entity_decode(
7543
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7544
                  $flags
7545
              )
7546
          )
7547
      );
7548
7549
    } while ($multi_decode === true && $str_compare !== $str);
7550
7551
    return (string)$str;
7552
  }
7553
7554
  /**
7555
   * Return a array with "urlencoded"-win1252 -> UTF-8
7556
   *
7557
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7558
   *
7559
   * @return array
7560
   */
7561
  public static function urldecode_fix_win1252_chars()
7562
  {
7563
    return array(
7564
        '%20' => ' ',
7565
        '%21' => '!',
7566
        '%22' => '"',
7567
        '%23' => '#',
7568
        '%24' => '$',
7569
        '%25' => '%',
7570
        '%26' => '&',
7571
        '%27' => "'",
7572
        '%28' => '(',
7573
        '%29' => ')',
7574
        '%2A' => '*',
7575
        '%2B' => '+',
7576
        '%2C' => ',',
7577
        '%2D' => '-',
7578
        '%2E' => '.',
7579
        '%2F' => '/',
7580
        '%30' => '0',
7581
        '%31' => '1',
7582
        '%32' => '2',
7583
        '%33' => '3',
7584
        '%34' => '4',
7585
        '%35' => '5',
7586
        '%36' => '6',
7587
        '%37' => '7',
7588
        '%38' => '8',
7589
        '%39' => '9',
7590
        '%3A' => ':',
7591
        '%3B' => ';',
7592
        '%3C' => '<',
7593
        '%3D' => '=',
7594
        '%3E' => '>',
7595
        '%3F' => '?',
7596
        '%40' => '@',
7597
        '%41' => 'A',
7598
        '%42' => 'B',
7599
        '%43' => 'C',
7600
        '%44' => 'D',
7601
        '%45' => 'E',
7602
        '%46' => 'F',
7603
        '%47' => 'G',
7604
        '%48' => 'H',
7605
        '%49' => 'I',
7606
        '%4A' => 'J',
7607
        '%4B' => 'K',
7608
        '%4C' => 'L',
7609
        '%4D' => 'M',
7610
        '%4E' => 'N',
7611
        '%4F' => 'O',
7612
        '%50' => 'P',
7613
        '%51' => 'Q',
7614
        '%52' => 'R',
7615
        '%53' => 'S',
7616
        '%54' => 'T',
7617
        '%55' => 'U',
7618
        '%56' => 'V',
7619
        '%57' => 'W',
7620
        '%58' => 'X',
7621
        '%59' => 'Y',
7622
        '%5A' => 'Z',
7623
        '%5B' => '[',
7624
        '%5C' => '\\',
7625
        '%5D' => ']',
7626
        '%5E' => '^',
7627
        '%5F' => '_',
7628
        '%60' => '`',
7629
        '%61' => 'a',
7630
        '%62' => 'b',
7631
        '%63' => 'c',
7632
        '%64' => 'd',
7633
        '%65' => 'e',
7634
        '%66' => 'f',
7635
        '%67' => 'g',
7636
        '%68' => 'h',
7637
        '%69' => 'i',
7638
        '%6A' => 'j',
7639
        '%6B' => 'k',
7640
        '%6C' => 'l',
7641
        '%6D' => 'm',
7642
        '%6E' => 'n',
7643
        '%6F' => 'o',
7644
        '%70' => 'p',
7645
        '%71' => 'q',
7646
        '%72' => 'r',
7647
        '%73' => 's',
7648
        '%74' => 't',
7649
        '%75' => 'u',
7650
        '%76' => 'v',
7651
        '%77' => 'w',
7652
        '%78' => 'x',
7653
        '%79' => 'y',
7654
        '%7A' => 'z',
7655
        '%7B' => '{',
7656
        '%7C' => '|',
7657
        '%7D' => '}',
7658
        '%7E' => '~',
7659
        '%7F' => '',
7660
        '%80' => '`',
7661
        '%81' => '',
7662
        '%82' => '‚',
7663
        '%83' => 'ƒ',
7664
        '%84' => '„',
7665
        '%85' => '…',
7666
        '%86' => '†',
7667
        '%87' => '‡',
7668
        '%88' => 'ˆ',
7669
        '%89' => '‰',
7670
        '%8A' => 'Š',
7671
        '%8B' => '‹',
7672
        '%8C' => 'Œ',
7673
        '%8D' => '',
7674
        '%8E' => 'Ž',
7675
        '%8F' => '',
7676
        '%90' => '',
7677
        '%91' => '‘',
7678
        '%92' => '’',
7679
        '%93' => '“',
7680
        '%94' => '”',
7681
        '%95' => '•',
7682
        '%96' => '–',
7683
        '%97' => '—',
7684
        '%98' => '˜',
7685
        '%99' => '™',
7686
        '%9A' => 'š',
7687
        '%9B' => '›',
7688
        '%9C' => 'œ',
7689
        '%9D' => '',
7690
        '%9E' => 'ž',
7691
        '%9F' => 'Ÿ',
7692
        '%A0' => '',
7693
        '%A1' => '¡',
7694
        '%A2' => '¢',
7695
        '%A3' => '£',
7696
        '%A4' => '¤',
7697
        '%A5' => '¥',
7698
        '%A6' => '¦',
7699
        '%A7' => '§',
7700
        '%A8' => '¨',
7701
        '%A9' => '©',
7702
        '%AA' => 'ª',
7703
        '%AB' => '«',
7704 6
        '%AC' => '¬',
7705
        '%AD' => '',
7706
        '%AE' => '®',
7707 6
        '%AF' => '¯',
7708
        '%B0' => '°',
7709 6
        '%B1' => '±',
7710 3
        '%B2' => '²',
7711
        '%B3' => '³',
7712
        '%B4' => '´',
7713 6
        '%B5' => 'µ',
7714
        '%B6' => '¶',
7715 6
        '%B7' => '·',
7716 6
        '%B8' => '¸',
7717
        '%B9' => '¹',
7718 6
        '%BA' => 'º',
7719 1
        '%BB' => '»',
7720 1
        '%BC' => '¼',
7721 1
        '%BD' => '½',
7722
        '%BE' => '¾',
7723
        '%BF' => '¿',
7724 6
        '%C0' => 'À',
7725
        '%C1' => 'Á',
7726 6
        '%C2' => 'Â',
7727
        '%C3' => 'Ã',
7728
        '%C4' => 'Ä',
7729
        '%C5' => 'Å',
7730 6
        '%C6' => 'Æ',
7731
        '%C7' => 'Ç',
7732
        '%C8' => 'È',
7733 6
        '%C9' => 'É',
7734
        '%CA' => 'Ê',
7735
        '%CB' => 'Ë',
7736
        '%CC' => 'Ì',
7737 6
        '%CD' => 'Í',
7738 6
        '%CE' => 'Î',
7739 6
        '%CF' => 'Ï',
7740 6
        '%D0' => 'Ð',
7741 6
        '%D1' => 'Ñ',
7742 6
        '%D2' => 'Ò',
7743 6
        '%D3' => 'Ó',
7744
        '%D4' => 'Ô',
7745
        '%D5' => 'Õ',
7746 6
        '%D6' => 'Ö',
7747
        '%D7' => '×',
7748 6
        '%D8' => 'Ø',
7749 4
        '%D9' => 'Ù',
7750 4
        '%DA' => 'Ú',
7751 4
        '%DB' => 'Û',
7752
        '%DC' => 'Ü',
7753 6
        '%DD' => 'Ý',
7754 6
        '%DE' => 'Þ',
7755 6
        '%DF' => 'ß',
7756 6
        '%E0' => 'à',
7757
        '%E1' => 'á',
7758 6
        '%E2' => 'â',
7759
        '%E3' => 'ã',
7760
        '%E4' => 'ä',
7761
        '%E5' => 'å',
7762
        '%E6' => 'æ',
7763
        '%E7' => 'ç',
7764
        '%E8' => 'è',
7765
        '%E9' => 'é',
7766
        '%EA' => 'ê',
7767
        '%EB' => 'ë',
7768 7
        '%EC' => 'ì',
7769
        '%ED' => 'í',
7770
        '%EE' => 'î',
7771 7
        '%EF' => 'ï',
7772
        '%F0' => 'ð',
7773 7
        '%F1' => 'ñ',
7774 7
        '%F2' => 'ò',
7775
        '%F3' => 'ó',
7776
        '%F4' => 'ô',
7777 7
        '%F5' => 'õ',
7778 7
        '%F6' => 'ö',
7779
        '%F7' => '÷',
7780
        '%F8' => 'ø',
7781
        '%F9' => 'ù',
7782 7
        '%FA' => 'ú',
7783 7
        '%FB' => 'û',
7784 3
        '%FC' => 'ü',
7785
        '%FD' => 'ý',
7786
        '%FE' => 'þ',
7787 6
        '%FF' => 'ÿ',
7788 6
    );
7789
  }
7790 6
7791 1
  /**
7792 1
   * Decodes an UTF-8 string to ISO-8859-1.
7793 1
   *
7794
   * @param string $str <p>The input string.</p>
7795 6
   *
7796
   * @return string
7797
   */
7798
  public static function utf8_decode($str)
7799
  {
7800
    // init
7801
    $str = (string)$str;
7802
7803
    if (!isset($str[0])) {
7804
      return '';
7805
    }
7806
7807
    $str = (string)self::to_utf8($str);
7808
7809
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7810
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7811
7812
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7813
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7814
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7815
    }
7816
7817
    /** @noinspection PhpInternalEntityUsedInspection */
7818
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7819
7820
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7821
      self::checkForSupport();
7822
    }
7823
7824 1 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7825
      $len = \mb_strlen($str, '8BIT');
7826 1
    } else {
7827
      $len = strlen($str);
7828
    }
7829
7830
    /** @noinspection ForeachInvariantsInspection */
7831
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7832
      switch ($str[$i] & "\xF0") {
7833
        case "\xC0":
7834
        case "\xD0":
7835
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7836
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7837
          break;
7838 1
7839
        /** @noinspection PhpMissingBreakStatementInspection */
7840 1
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7841
          ++$i;
7842 1
        case "\xE0":
7843 1
          $str[$j] = '?';
7844
          $i += 2;
7845
          break;
7846
7847 1
        default:
7848
          $str[$j] = $str[$i];
7849 1
      }
7850 1
    }
7851
7852
    return (string)self::substr($str, 0, $j, '8BIT');
7853 1
  }
7854
7855
  /**
7856 1
   * Encodes an ISO-8859-1 string to UTF-8.
7857 1
   *
7858 1
   * @param string $str <p>The input string.</p>
7859 1
   *
7860 1
   * @return string
7861
   */
7862
  public static function utf8_encode($str)
7863 1
  {
7864
    // init
7865
    $str = (string)$str;
7866
7867
    if (!isset($str[0])) {
7868
      return '';
7869
    }
7870
7871
    $strTmp = \utf8_encode($str);
7872
    if ($strTmp === false) {
7873
      return '';
7874
    }
7875
7876
    $str = (string)$strTmp;
7877
    if (false === strpos($str, "\xC2")) {
7878
      return $str;
7879
    }
7880
7881
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7882 10
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7883
7884 10
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7885 10
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7886
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7887 10
    }
7888 3
7889
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7890
  }
7891 8
7892 8
  /**
7893 8
   * fix -> utf8-win1252 chars
7894
   *
7895 8
   * @param string $str <p>The input string.</p>
7896
   *
7897 8
   * @return string
7898
   *
7899 8
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7900 1
   */
7901 1
  public static function utf8_fix_win1252_chars($str)
7902 1
  {
7903
    return self::fix_simple_utf8($str);
7904 8
  }
7905 8
7906
  /**
7907 8
   * Returns an array with all utf8 whitespace characters.
7908 8
   *
7909 8
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7910 8
   *
7911 8
   * @author: Derek E. [email protected]
7912
   *
7913 8
   * @return array <p>
7914 8
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7915 8
   *               as defined in above URL.
7916 8
   *               </p>
7917
   */
7918 8
  public static function whitespace_table()
7919 6
  {
7920 6
    return self::$WHITESPACE_TABLE;
7921 6
  }
7922 6
7923
  /**
7924 6
   * Limit the number of words in a string.
7925 3
   *
7926 3
   * @param string $str      <p>The input string.</p>
7927
   * @param int    $limit    <p>The limit of words as integer.</p>
7928 6
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7929 6
   *
7930
   * @return string
7931 8
   */
7932
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7933
  {
7934
    $str = (string)$str;
7935
7936
    if (!isset($str[0])) {
7937
      return '';
7938
    }
7939 1
7940
    // init
7941 1
    $limit = (int)$limit;
7942
7943
    if ($limit < 1) {
7944
      return '';
7945
    }
7946
7947
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7948
7949
    if (
7950
        !isset($matches[0])
7951
        ||
7952
        self::strlen($str) === self::strlen($matches[0])
7953
    ) {
7954
      return $str;
7955
    }
7956
7957
    return self::rtrim($matches[0]) . $strAddOn;
7958
  }
7959
7960
  /**
7961
   * Wraps a string to a given number of characters
7962
   *
7963
   * @link  http://php.net/manual/en/function.wordwrap.php
7964
   *
7965
   * @param string $str   <p>The input string.</p>
7966
   * @param int    $width [optional] <p>The column width.</p>
7967
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7968
   * @param bool   $cut   [optional] <p>
7969
   *                      If the cut is set to true, the string is
7970
   *                      always wrapped at or before the specified width. So if you have
7971
   *                      a word that is larger than the given width, it is broken apart.
7972
   *                      </p>
7973
   *
7974
   * @return string <p>The given string wrapped at the specified column.</p>
7975
   */
7976
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7977
  {
7978
    $str = (string)$str;
7979
    $break = (string)$break;
7980
7981
    if (!isset($str[0], $break[0])) {
7982
      return '';
7983
    }
7984
7985
    $w = '';
7986
    $strSplit = explode($break, $str);
7987
    $count = count($strSplit);
7988
7989
    $chars = array();
7990
    /** @noinspection ForeachInvariantsInspection */
7991
    for ($i = 0; $i < $count; ++$i) {
7992
7993
      if ($i) {
7994
        $chars[] = $break;
7995
        $w .= '#';
7996
      }
7997
7998
      $c = $strSplit[$i];
7999
      unset($strSplit[$i]);
8000
8001
      foreach (self::split($c) as $c) {
8002
        $chars[] = $c;
8003
        $w .= ' ' === $c ? ' ' : '?';
8004
      }
8005
    }
8006
8007
    $strReturn = '';
8008
    $j = 0;
8009
    $b = $i = -1;
8010
    $w = wordwrap($w, $width, '#', $cut);
8011
8012
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8013
      for (++$i; $i < $b; ++$i) {
8014
        $strReturn .= $chars[$j];
8015
        unset($chars[$j++]);
8016
      }
8017
8018
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8019
        unset($chars[$j++]);
8020
      }
8021
8022
      $strReturn .= $break;
8023
    }
8024
8025
    return $strReturn . implode('', $chars);
8026
  }
8027
8028
  /**
8029
   * Returns an array of Unicode White Space characters.
8030
   *
8031
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
8032
   */
8033
  public static function ws()
8034
  {
8035
    return self::$WHITESPACE;
8036
  }
8037
8038
}
8039