Completed
Push — master ( 924fd9...89c05d )
by Lars
06:06
created

UTF8::html_entity_decode()   C

Complexity

Conditions 15
Paths 15

Size

Total Lines 75
Code Lines 43

Duplication

Lines 9
Ratio 12 %

Code Coverage

Tests 40
CRAP Score 15.1689

Importance

Changes 0
Metric Value
dl 9
loc 75
ccs 40
cts 44
cp 0.9091
rs 5.3122
c 0
b 0
f 0
cc 15
eloc 43
nc 15
nop 3
crap 15.1689

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965
    // init
966 10
    static $CHAR_CACHE = array();
967
968 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
969
      self::checkForSupport();
970
    }
971
972 10
    if ($encoding !== 'UTF-8') {
973 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
974 2
    }
975
976 View Code Duplication
    if (
977
        $encoding !== 'UTF-8'
978 10
        &&
979
        $encoding !== 'WINDOWS-1252'
980 10
        &&
981 1
        self::$SUPPORT['mbstring'] === false
982 10
    ) {
983
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
984
    }
985
986 10
    $cacheKey = $code_point . $encoding;
987 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
988 8
      return $CHAR_CACHE[$cacheKey];
989
    }
990
991 9
    if (self::$SUPPORT['intlChar'] === true) {
992
      $str = \IntlChar::chr($code_point);
993
994
      if ($encoding !== 'UTF-8') {
995
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
996
      }
997
998
      $CHAR_CACHE[$cacheKey] = $str;
999
      return $str;
1000
    }
1001
1002
    // check type of code_point, only if there is no support for "\IntlChar"
1003 9
    if ((int)$code_point !== $code_point) {
1004 1
      $CHAR_CACHE[$cacheKey] = null;
1005 1
      return null;
1006
    }
1007
1008 9
    if ($code_point <= 0x7F) {
1009 7
      $str = self::chr_and_parse_int($code_point);
1010 9
    } elseif ($code_point <= 0x7FF) {
1011 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
1012 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1013 7
    } elseif ($code_point <= 0xFFFF) {
1014 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
1015 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1016 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1017 7
    } else {
1018 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
1019 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1020 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1021 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1022
    }
1023
1024 9
    if ($encoding !== 'UTF-8') {
1025 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1026 1
    }
1027
1028
    // add into static cache
1029 9
    $CHAR_CACHE[$cacheKey] = $str;
1030
1031 9
    return $str;
1032
  }
1033
1034
  /**
1035
   * @param int $int
1036
   *
1037
   * @return string
1038
   */
1039 26
  private static function chr_and_parse_int($int)
1040
  {
1041 26
    return chr((int)$int);
1042
  }
1043
1044
  /**
1045
   * Applies callback to all characters of a string.
1046
   *
1047
   * @param string|array $callback <p>The callback function.</p>
1048
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1049
   *
1050
   * @return array <p>The outcome of callback.</p>
1051
   */
1052 1
  public static function chr_map($callback, $str)
1053
  {
1054 1
    $chars = self::split($str);
1055
1056 1
    return array_map($callback, $chars);
1057
  }
1058
1059
  /**
1060
   * Generates an array of byte length of each character of a Unicode string.
1061
   *
1062
   * 1 byte => U+0000  - U+007F
1063
   * 2 byte => U+0080  - U+07FF
1064
   * 3 byte => U+0800  - U+FFFF
1065
   * 4 byte => U+10000 - U+10FFFF
1066
   *
1067
   * @param string $str <p>The original Unicode string.</p>
1068
   *
1069
   * @return array <p>An array of byte lengths of each character.</p>
1070
   */
1071 4
  public static function chr_size_list($str)
1072
  {
1073 4
    $str = (string)$str;
1074
1075 4
    if (!isset($str[0])) {
1076 3
      return array();
1077
    }
1078
1079 4
    return array_map(
1080
        function ($data) {
1081 4
          return UTF8::strlen($data, '8BIT');
1082 4
        },
1083 4
        self::split($str)
1084 4
    );
1085
  }
1086
1087
  /**
1088
   * Get a decimal code representation of a specific character.
1089
   *
1090
   * @param string $char <p>The input character.</p>
1091
   *
1092
   * @return int
1093
   */
1094 2
  public static function chr_to_decimal($char)
1095
  {
1096 2
    $char = (string)$char;
1097 2
    $code = self::ord($char[0]);
1098 2
    $bytes = 1;
1099
1100 2
    if (!($code & 0x80)) {
1101
      // 0xxxxxxx
1102 2
      return $code;
1103
    }
1104
1105 2
    if (($code & 0xe0) === 0xc0) {
1106
      // 110xxxxx
1107 2
      $bytes = 2;
1108 2
      $code &= ~0xc0;
1109 2
    } elseif (($code & 0xf0) === 0xe0) {
1110
      // 1110xxxx
1111 2
      $bytes = 3;
1112 2
      $code &= ~0xe0;
1113 2
    } elseif (($code & 0xf8) === 0xf0) {
1114
      // 11110xxx
1115 1
      $bytes = 4;
1116 1
      $code &= ~0xf0;
1117 1
    }
1118
1119 2
    for ($i = 2; $i <= $bytes; $i++) {
1120
      // 10xxxxxx
1121 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1122 2
    }
1123
1124 2
    return $code;
1125
  }
1126
1127
  /**
1128
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1129
   *
1130
   * @param string $char <p>The input character</p>
1131
   * @param string $pfix [optional]
1132
   *
1133
   * @return string <p>The code point encoded as U+xxxx<p>
1134
   */
1135 1
  public static function chr_to_hex($char, $pfix = 'U+')
1136
  {
1137 1
    $char = (string)$char;
1138
1139 1
    if (!isset($char[0])) {
1140 1
      return '';
1141
    }
1142
1143 1
    if ($char === '&#0;') {
1144
      $char = '';
1145
    }
1146
1147 1
    return self::int_to_hex(self::ord($char), $pfix);
1148
  }
1149
1150
  /**
1151
   * alias for "UTF8::chr_to_decimal()"
1152
   *
1153
   * @see UTF8::chr_to_decimal()
1154
   *
1155
   * @param string $chr
1156
   *
1157
   * @return int
1158
   */
1159 1
  public static function chr_to_int($chr)
1160
  {
1161 1
    return self::chr_to_decimal($chr);
1162
  }
1163
1164
  /**
1165
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1166
   *
1167
   * @param string $body     <p>The original string to be split.</p>
1168
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1169
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1170
   *
1171
   * @return string <p>The chunked string</p>
1172
   */
1173 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1174
  {
1175 1
    return implode($end, self::split($body, $chunklen));
1176
  }
1177
1178
  /**
1179
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1180
   *
1181
   * @param string $str                     <p>The string to be sanitized.</p>
1182
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1183
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1184
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1185
   *                                        => "..."</p>
1186
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1187
   *                                        $normalize_whitespace</p>
1188
   *
1189
   * @return string <p>Clean UTF-8 encoded string.</p>
1190
   */
1191 56
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1192
  {
1193
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1194
    // caused connection reset problem on larger strings
1195
1196
    $regx = '/
1197
      (
1198
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1199
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1200
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1201
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1202
        ){1,100}                      # ...one or more times
1203
      )
1204
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1205
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1206 56
    /x';
1207 56
    $str = preg_replace($regx, '$1', $str);
1208
1209 56
    $str = self::replace_diamond_question_mark($str, '');
1210 56
    $str = self::remove_invisible_characters($str);
1211
1212 56
    if ($normalize_whitespace === true) {
1213 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1214 36
    }
1215
1216 56
    if ($normalize_msword === true) {
1217 15
      $str = self::normalize_msword($str);
1218 15
    }
1219
1220 56
    if ($remove_bom === true) {
1221 35
      $str = self::remove_bom($str);
1222 35
    }
1223
1224 56
    return $str;
1225
  }
1226
1227
  /**
1228
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1229
   *
1230
   * @param string $str <p>The input string.</p>
1231
   *
1232
   * @return string
1233
   */
1234 21 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236 21
    $str = (string)$str;
1237
1238 21
    if (!isset($str[0])) {
1239 2
      return '';
1240
    }
1241
1242
    // fixed ISO <-> UTF-8 Errors
1243 21
    $str = self::fix_simple_utf8($str);
1244
1245
    // remove all none UTF-8 symbols
1246
    // && remove diamond question mark (�)
1247
    // && remove remove invisible characters (e.g. "\0")
1248
    // && remove BOM
1249
    // && normalize whitespace chars (but keep non-breaking-spaces)
1250 21
    $str = self::clean($str, true, true, false, true);
1251
1252 21
    return (string)$str;
1253
  }
1254
1255
  /**
1256
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1257
   *
1258
   * INFO: opposite to UTF8::string()
1259
   *
1260
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1261
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1262
   *                                    default, code points will be returned as integers.</p>
1263
   *
1264
   * @return array <p>The array of code points.</p>
1265
   */
1266 7
  public static function codepoints($arg, $u_style = false)
1267
  {
1268 7
    if (is_string($arg) === true) {
1269 7
      $arg = self::split($arg);
1270 7
    }
1271
1272 7
    $arg = array_map(
1273
        array(
1274 7
            '\\voku\\helper\\UTF8',
1275 7
            'ord',
1276 7
        ),
1277
        $arg
1278 7
    );
1279
1280 7
    if ($u_style) {
1281 1
      $arg = array_map(
1282
          array(
1283 1
              '\\voku\\helper\\UTF8',
1284 1
              'int_to_hex',
1285 1
          ),
1286
          $arg
1287 1
      );
1288 1
    }
1289
1290 7
    return $arg;
1291
  }
1292
1293
  /**
1294
   * Returns count of characters used in a string.
1295
   *
1296
   * @param string $str       <p>The input string.</p>
1297
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1298
   *
1299
   * @return array <p>An associative array of Character as keys and
1300
   *               their count as values.</p>
1301
   */
1302 7
  public static function count_chars($str, $cleanUtf8 = false)
1303
  {
1304 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1305
  }
1306
1307
  /**
1308
   * Converts a int-value into an UTF-8 character.
1309
   *
1310
   * @param mixed $int
1311
   *
1312
   * @return string
1313
   */
1314 5
  public static function decimal_to_chr($int)
1315
  {
1316 5
    if (Bootup::is_php('5.4') === true) {
1317
      $flags = ENT_QUOTES | ENT_HTML5;
1318
    } else {
1319 5
      $flags = ENT_QUOTES;
1320
    }
1321
1322 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1323
  }
1324
1325
  /**
1326
   * Encode a string with a new charset-encoding.
1327
   *
1328
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1329
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1330
   *
1331
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1332
   * @param string $str      <p>The input string</p>
1333
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1334
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1335
   *
1336
   * @return string
1337
   */
1338 11
  public static function encode($encoding, $str, $force = true)
1339
  {
1340 11
    $str = (string)$str;
1341 11
    $encoding = (string)$encoding;
1342
1343 11
    if (!isset($str[0], $encoding[0])) {
1344 5
      return $str;
1345
    }
1346
1347 11
    if ($encoding !== 'UTF-8') {
1348 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1349 2
    }
1350
1351 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1352
      self::checkForSupport();
1353
    }
1354
1355 11
    $encodingDetected = self::str_detect_encoding($str);
1356
1357
    if (
1358
        $encodingDetected !== false
1359 11
        &&
1360
        (
1361
            $force === true
1362 11
            ||
1363
            $encodingDetected !== $encoding
1364 3
        )
1365 11
    ) {
1366
1367 View Code Duplication
      if (
1368
          $encoding === 'UTF-8'
1369 11
          &&
1370
          (
1371
              $force === true
1372 11
              || $encodingDetected === 'UTF-8'
1373 2
              || $encodingDetected === 'WINDOWS-1252'
1374 2
              || $encodingDetected === 'ISO-8859-1'
1375 2
          )
1376 11
      ) {
1377 11
        return self::to_utf8($str);
1378
      }
1379
1380 View Code Duplication
      if (
1381
          $encoding === 'ISO-8859-1'
1382 3
          &&
1383
          (
1384
              $force === true
1385 2
              || $encodingDetected === 'ISO-8859-1'
1386 1
              || $encodingDetected === 'WINDOWS-1252'
1387 1
              || $encodingDetected === 'UTF-8'
1388 1
          )
1389 3
      ) {
1390 2
        return self::to_iso8859($str);
1391
      }
1392
1393 View Code Duplication
      if (
1394
          $encoding !== 'UTF-8'
1395 2
          &&
1396
          $encoding !== 'WINDOWS-1252'
1397 2
          &&
1398 1
          self::$SUPPORT['mbstring'] === false
1399 2
      ) {
1400
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1401
      }
1402
1403 2
      $strEncoded = \mb_convert_encoding(
1404 2
          $str,
1405 2
          $encoding,
1406
          $encodingDetected
1407 2
      );
1408
1409 2
      if ($strEncoded) {
1410 2
        return $strEncoded;
1411
      }
1412
    }
1413
1414 1
    return $str;
1415
  }
1416
1417
  /**
1418
   * Reads entire file into a string.
1419
   *
1420
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1421
   *
1422
   * @link http://php.net/manual/en/function.file-get-contents.php
1423
   *
1424
   * @param string        $filename      <p>
1425
   *                                     Name of the file to read.
1426
   *                                     </p>
1427
   * @param int|false     $flags         [optional] <p>
1428
   *                                     Prior to PHP 6, this parameter is called
1429
   *                                     use_include_path and is a bool.
1430
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1431
   *                                     to trigger include path
1432
   *                                     search.
1433
   *                                     </p>
1434
   *                                     <p>
1435
   *                                     The value of flags can be any combination of
1436
   *                                     the following flags (with some restrictions), joined with the
1437
   *                                     binary OR (|)
1438
   *                                     operator.
1439
   *                                     </p>
1440
   *                                     <p>
1441
   *                                     <table>
1442
   *                                     Available flags
1443
   *                                     <tr valign="top">
1444
   *                                     <td>Flag</td>
1445
   *                                     <td>Description</td>
1446
   *                                     </tr>
1447
   *                                     <tr valign="top">
1448
   *                                     <td>
1449
   *                                     FILE_USE_INCLUDE_PATH
1450
   *                                     </td>
1451
   *                                     <td>
1452
   *                                     Search for filename in the include directory.
1453
   *                                     See include_path for more
1454
   *                                     information.
1455
   *                                     </td>
1456
   *                                     </tr>
1457
   *                                     <tr valign="top">
1458
   *                                     <td>
1459
   *                                     FILE_TEXT
1460
   *                                     </td>
1461
   *                                     <td>
1462
   *                                     As of PHP 6, the default encoding of the read
1463
   *                                     data is UTF-8. You can specify a different encoding by creating a
1464
   *                                     custom context or by changing the default using
1465
   *                                     stream_default_encoding. This flag cannot be
1466
   *                                     used with FILE_BINARY.
1467
   *                                     </td>
1468
   *                                     </tr>
1469
   *                                     <tr valign="top">
1470
   *                                     <td>
1471
   *                                     FILE_BINARY
1472
   *                                     </td>
1473
   *                                     <td>
1474
   *                                     With this flag, the file is read in binary mode. This is the default
1475
   *                                     setting and cannot be used with FILE_TEXT.
1476
   *                                     </td>
1477
   *                                     </tr>
1478
   *                                     </table>
1479
   *                                     </p>
1480
   * @param resource|null $context       [optional] <p>
1481
   *                                     A valid context resource created with
1482
   *                                     stream_context_create. If you don't need to use a
1483
   *                                     custom context, you can skip this parameter by &null;.
1484
   *                                     </p>
1485
   * @param int|null $offset             [optional] <p>
1486
   *                                     The offset where the reading starts.
1487
   *                                     </p>
1488
   * @param int|null $maxLength          [optional] <p>
1489
   *                                     Maximum length of data read. The default is to read until end
1490
   *                                     of file is reached.
1491
   *                                     </p>
1492
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1493
   *
1494
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1495
   *                                     or pdf, because they used non default utf-8 chars</p>
1496
   *
1497
   * @return string <p>The function returns the read data or false on failure.</p>
1498
   */
1499 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1500
  {
1501
    // init
1502 3
    $timeout = (int)$timeout;
1503 3
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1504
1505 3
    if ($timeout && $context === null) {
1506 2
      $context = stream_context_create(
1507
          array(
1508
              'http' =>
1509
                  array(
1510 2
                      'timeout' => $timeout,
1511 2
                  ),
1512
          )
1513 2
      );
1514 2
    }
1515
1516 3
    if (!$flags) {
1517 3
      $flags = false;
1518 3
    }
1519
1520 3
    if ($offset === null) {
1521 3
      $offset = 0;
1522 3
    }
1523
1524 3
    if (is_int($maxLength) === true) {
1525 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1526 1
    } else {
1527 3
      $data = file_get_contents($filename, $flags, $context, $offset);
1528
    }
1529
1530
    // return false on error
1531 3
    if ($data === false) {
1532 1
      return false;
1533
    }
1534
1535 2
    if ($convertToUtf8 === true) {
1536 2
      $data = self::encode('UTF-8', $data, false);
1537 2
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1538 2
    }
1539
1540 2
    return $data;
1541
  }
1542
1543
  /**
1544
   * Checks if a file starts with BOM (Byte Order Mark) character.
1545
   *
1546
   * @param string $file_path <p>Path to a valid file.</p>
1547
   *
1548
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1549
   */
1550 1
  public static function file_has_bom($file_path)
1551
  {
1552 1
    return self::string_has_bom(file_get_contents($file_path));
1553
  }
1554
1555
  /**
1556
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1557
   *
1558
   * @param mixed  $var
1559
   * @param int    $normalization_form
1560
   * @param string $leading_combining
1561
   *
1562
   * @return mixed
1563
   */
1564 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1565
  {
1566 9
    switch (gettype($var)) {
1567 9 View Code Duplication
      case 'array':
1568 3
        foreach ($var as $k => $v) {
1569
          /** @noinspection AlterInForeachInspection */
1570 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1571 3
        }
1572 3
        break;
1573 9 View Code Duplication
      case 'object':
1574 2
        foreach ($var as $k => $v) {
1575 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1576 2
        }
1577 2
        break;
1578 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1579
1580 8
        if (false !== strpos($var, "\r")) {
1581
          // Workaround https://bugs.php.net/65732
1582 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1583 2
        }
1584
1585 8
        if (self::is_ascii($var) === false) {
1586
          /** @noinspection PhpUndefinedClassInspection */
1587 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1588 6
            $n = '-';
1589 6
          } else {
1590
            /** @noinspection PhpUndefinedClassInspection */
1591 6
            $n = \Normalizer::normalize($var, $normalization_form);
1592
1593 6
            if (isset($n[0])) {
1594 3
              $var = $n;
1595 3
            } else {
1596 5
              $var = self::encode('UTF-8', $var, true);
1597
            }
1598
          }
1599
1600
          if (
1601 8
              $var[0] >= "\x80"
1602 8
              &&
1603 6
              isset($n[0], $leading_combining[0])
1604 8
              &&
1605 5
              preg_match('/^\p{Mn}/u', $var)
1606 8
          ) {
1607
            // Prevent leading combining chars
1608
            // for NFC-safe concatenations.
1609 2
            $var = $leading_combining . $var;
1610 2
          }
1611 8
        }
1612
1613 8
        break;
1614 9
    }
1615
1616 9
    return $var;
1617
  }
1618
1619
  /**
1620
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1621
   *
1622
   * Gets a specific external variable by name and optionally filters it
1623
   *
1624
   * @link  http://php.net/manual/en/function.filter-input.php
1625
   *
1626
   * @param int    $type          <p>
1627
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1628
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1629
   *                              <b>INPUT_ENV</b>.
1630
   *                              </p>
1631
   * @param string $variable_name <p>
1632
   *                              Name of a variable to get.
1633
   *                              </p>
1634
   * @param int    $filter        [optional] <p>
1635
   *                              The ID of the filter to apply. The
1636
   *                              manual page lists the available filters.
1637
   *                              </p>
1638
   * @param mixed  $options       [optional] <p>
1639
   *                              Associative array of options or bitwise disjunction of flags. If filter
1640
   *                              accepts options, flags can be provided in "flags" field of array.
1641
   *                              </p>
1642
   *
1643
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1644
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1645
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1646
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1647
   * @since 5.2.0
1648
   */
1649 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1650
  {
1651
    if (4 > func_num_args()) {
1652
      $var = filter_input($type, $variable_name, $filter);
1653
    } else {
1654
      $var = filter_input($type, $variable_name, $filter, $options);
1655
    }
1656
1657
    return self::filter($var);
1658
  }
1659
1660
  /**
1661
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1662
   *
1663
   * Gets external variables and optionally filters them
1664
   *
1665
   * @link  http://php.net/manual/en/function.filter-input-array.php
1666
   *
1667
   * @param int   $type       <p>
1668
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1669
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1670
   *                          <b>INPUT_ENV</b>.
1671
   *                          </p>
1672
   * @param mixed $definition [optional] <p>
1673
   *                          An array defining the arguments. A valid key is a string
1674
   *                          containing a variable name and a valid value is either a filter type, or an array
1675
   *                          optionally specifying the filter, flags and options. If the value is an
1676
   *                          array, valid keys are filter which specifies the
1677
   *                          filter type,
1678
   *                          flags which specifies any flags that apply to the
1679
   *                          filter, and options which specifies any options that
1680
   *                          apply to the filter. See the example below for a better understanding.
1681
   *                          </p>
1682
   *                          <p>
1683
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1684
   *                          input array are filtered by this filter.
1685
   *                          </p>
1686
   * @param bool  $add_empty  [optional] <p>
1687
   *                          Add missing keys as <b>NULL</b> to the return value.
1688
   *                          </p>
1689
   *
1690
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1691
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1692
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1693
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1694
   * fails.
1695
   * @since 5.2.0
1696
   */
1697 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1698
  {
1699
    if (2 > func_num_args()) {
1700
      $a = filter_input_array($type);
1701
    } else {
1702
      $a = filter_input_array($type, $definition, $add_empty);
1703
    }
1704
1705
    return self::filter($a);
1706
  }
1707
1708
  /**
1709
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
   *
1711
   * Filters a variable with a specified filter
1712
   *
1713
   * @link  http://php.net/manual/en/function.filter-var.php
1714
   *
1715
   * @param mixed $variable <p>
1716
   *                        Value to filter.
1717
   *                        </p>
1718
   * @param int   $filter   [optional] <p>
1719
   *                        The ID of the filter to apply. The
1720
   *                        manual page lists the available filters.
1721
   *                        </p>
1722
   * @param mixed $options  [optional] <p>
1723
   *                        Associative array of options or bitwise disjunction of flags. If filter
1724
   *                        accepts options, flags can be provided in "flags" field of array. For
1725
   *                        the "callback" filter, callable type should be passed. The
1726
   *                        callback must accept one argument, the value to be filtered, and return
1727
   *                        the value after filtering/sanitizing it.
1728
   *                        </p>
1729
   *                        <p>
1730
   *                        <code>
1731
   *                        // for filters that accept options, use this format
1732
   *                        $options = array(
1733
   *                        'options' => array(
1734
   *                        'default' => 3, // value to return if the filter fails
1735
   *                        // other options here
1736
   *                        'min_range' => 0
1737
   *                        ),
1738
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1739
   *                        );
1740
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1741
   *                        // for filter that only accept flags, you can pass them directly
1742
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1743
   *                        // for filter that only accept flags, you can also pass as an array
1744
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1745
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1746
   *                        // callback validate filter
1747
   *                        function foo($value)
1748
   *                        {
1749
   *                        // Expected format: Surname, GivenNames
1750
   *                        if (strpos($value, ", ") === false) return false;
1751
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1752
   *                        $empty = (empty($surname) || empty($givennames));
1753
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1754
   *                        if ($empty || $notstrings) {
1755
   *                        return false;
1756
   *                        } else {
1757
   *                        return $value;
1758
   *                        }
1759
   *                        }
1760
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1761
   *                        </code>
1762
   *                        </p>
1763
   *
1764
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1765
   * @since 5.2.0
1766
   */
1767 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1768
  {
1769 1
    if (3 > func_num_args()) {
1770 1
      $variable = filter_var($variable, $filter);
1771 1
    } else {
1772 1
      $variable = filter_var($variable, $filter, $options);
1773
    }
1774
1775 1
    return self::filter($variable);
1776
  }
1777
1778
  /**
1779
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1780
   *
1781
   * Gets multiple variables and optionally filters them
1782
   *
1783
   * @link  http://php.net/manual/en/function.filter-var-array.php
1784
   *
1785
   * @param array $data       <p>
1786
   *                          An array with string keys containing the data to filter.
1787
   *                          </p>
1788
   * @param mixed $definition [optional] <p>
1789
   *                          An array defining the arguments. A valid key is a string
1790
   *                          containing a variable name and a valid value is either a
1791
   *                          filter type, or an
1792
   *                          array optionally specifying the filter, flags and options.
1793
   *                          If the value is an array, valid keys are filter
1794
   *                          which specifies the filter type,
1795
   *                          flags which specifies any flags that apply to the
1796
   *                          filter, and options which specifies any options that
1797
   *                          apply to the filter. See the example below for a better understanding.
1798
   *                          </p>
1799
   *                          <p>
1800
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1801
   *                          input array are filtered by this filter.
1802
   *                          </p>
1803
   * @param bool  $add_empty  [optional] <p>
1804
   *                          Add missing keys as <b>NULL</b> to the return value.
1805
   *                          </p>
1806
   *
1807
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1808
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1809
   * the variable is not set.
1810
   * @since 5.2.0
1811
   */
1812 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1813
  {
1814 1
    if (2 > func_num_args()) {
1815 1
      $a = filter_var_array($data);
1816 1
    } else {
1817 1
      $a = filter_var_array($data, $definition, $add_empty);
1818
    }
1819
1820 1
    return self::filter($a);
1821
  }
1822
1823
  /**
1824
   * Check if the number of unicode characters are not more than the specified integer.
1825
   *
1826
   * @param string $str      The original string to be checked.
1827
   * @param int    $box_size The size in number of chars to be checked against string.
1828
   *
1829
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1830
   */
1831 1
  public static function fits_inside($str, $box_size)
1832
  {
1833 1
    return (self::strlen($str) <= $box_size);
1834
  }
1835
1836
  /**
1837
   * Try to fix simple broken UTF-8 strings.
1838
   *
1839
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1840
   *
1841
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1842
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1843
   * See: http://en.wikipedia.org/wiki/Windows-1252
1844
   *
1845
   * @param string $str <p>The input string</p>
1846
   *
1847
   * @return string
1848
   */
1849 26 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1850
  {
1851
    // init
1852 26
    $str = (string)$str;
1853
1854 26
    if (!isset($str[0])) {
1855 2
      return '';
1856
    }
1857
1858 26
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1859 26
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1860
1861 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1862 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1863 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1864 1
    }
1865
1866 26
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1867
  }
1868
1869
  /**
1870
   * Fix a double (or multiple) encoded UTF8 string.
1871
   *
1872
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1873
   *
1874
   * @return string|string[] <p>Will return the fixed input-"array" or
1875
   *                         the fixed input-"string".</p>
1876
   */
1877 1
  public static function fix_utf8($str)
1878
  {
1879 1
    if (is_array($str) === true) {
1880
1881
      /** @noinspection ForeachSourceInspection */
1882 1
      foreach ($str as $k => $v) {
1883
        /** @noinspection AlterInForeachInspection */
1884
        /** @noinspection OffsetOperationsInspection */
1885 1
        $str[$k] = self::fix_utf8($v);
1886 1
      }
1887
1888 1
      return $str;
1889
    }
1890
1891 1
    $last = '';
1892 1
    while ($last !== $str) {
1893 1
      $last = $str;
1894 1
      $str = self::to_utf8(
1895 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1894 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1896 1
      );
1897 1
    }
1898
1899 1
    return $str;
1900
  }
1901
1902
  /**
1903
   * Get character of a specific character.
1904
   *
1905
   * @param string $char
1906
   *
1907
   * @return string <p>'RTL' or 'LTR'</p>
1908
   */
1909 1
  public static function getCharDirection($char)
1910
  {
1911 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1912
      self::checkForSupport();
1913
    }
1914
1915 1
    if (self::$SUPPORT['intlChar'] === true) {
1916
      $tmpReturn = \IntlChar::charDirection($char);
1917
1918
      // from "IntlChar"-Class
1919
      $charDirection = array(
1920
          'RTL' => array(1, 13, 14, 15, 21),
1921
          'LTR' => array(0, 11, 12, 20),
1922
      );
1923
1924
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1925
        return 'LTR';
1926
      }
1927
1928
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1929
        return 'RTL';
1930
      }
1931
    }
1932
1933 1
    $c = static::chr_to_decimal($char);
1934
1935 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1936 1
      return 'LTR';
1937
    }
1938
1939 1
    if (0x85e >= $c) {
1940
1941 1
      if (0x5be === $c ||
1942 1
          0x5c0 === $c ||
1943 1
          0x5c3 === $c ||
1944 1
          0x5c6 === $c ||
1945 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1946 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1947 1
          0x608 === $c ||
1948 1
          0x60b === $c ||
1949 1
          0x60d === $c ||
1950 1
          0x61b === $c ||
1951 1
          (0x61e <= $c && 0x64a >= $c) ||
1952 1
          (0x66d <= $c && 0x66f >= $c) ||
1953 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1954 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1955 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1956 1
          (0x6fa <= $c && 0x70d >= $c) ||
1957 1
          0x710 === $c ||
1958 1
          (0x712 <= $c && 0x72f >= $c) ||
1959 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1960 1
          0x7b1 === $c ||
1961 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1962 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1963 1
          0x7fa === $c ||
1964 1
          (0x800 <= $c && 0x815 >= $c) ||
1965 1
          0x81a === $c ||
1966 1
          0x824 === $c ||
1967 1
          0x828 === $c ||
1968 1
          (0x830 <= $c && 0x83e >= $c) ||
1969 1
          (0x840 <= $c && 0x858 >= $c) ||
1970
          0x85e === $c
1971 1
      ) {
1972 1
        return 'RTL';
1973
      }
1974
1975 1
    } elseif (0x200f === $c) {
1976
1977
      return 'RTL';
1978
1979 1
    } elseif (0xfb1d <= $c) {
1980
1981 1
      if (0xfb1d === $c ||
1982 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1983 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1984 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1985 1
          0xfb3e === $c ||
1986 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1987 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1988 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1989 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1990 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1991 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1992 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1993 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1994 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1995 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1996 1
          0x10808 === $c ||
1997 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1998 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1999 1
          0x1083c === $c ||
2000 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2001 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2002 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2003 1
          (0x10920 <= $c && 0x10939 >= $c) ||
2004 1
          0x1093f === $c ||
2005 1
          0x10a00 === $c ||
2006 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2007 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2008 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2009 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2010 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2011 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2012 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2013 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2014 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2015
          (0x10b78 <= $c && 0x10b7f >= $c)
2016 1
      ) {
2017 1
        return 'RTL';
2018
      }
2019
    }
2020
2021 1
    return 'LTR';
2022
  }
2023
2024
  /**
2025
   * get data from "/data/*.ser"
2026
   *
2027
   * @param string $file
2028
   *
2029
   * @return bool|string|array|int <p>Will return false on error.</p>
2030
   */
2031 4
  private static function getData($file)
2032
  {
2033 4
    $file = __DIR__ . '/data/' . $file . '.php';
2034 4
    if (file_exists($file)) {
2035
      /** @noinspection PhpIncludeInspection */
2036 4
      return require $file;
2037
    }
2038
2039 1
    return false;
2040
  }
2041
2042
  /**
2043
   * Check for php-support.
2044
   *
2045
   * @param string|null $key
2046
   *
2047
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2048
   *               return bool-value, if $key is used and available<br>
2049
   *               otherwise return null</p>
2050
   */
2051 7
  public static function getSupportInfo($key = null)
2052
  {
2053 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2054
      self::checkForSupport();
2055
    }
2056
2057 7
    if ($key === null) {
2058 2
      return self::$SUPPORT;
2059
    }
2060
2061 5
    if (!isset(self::$SUPPORT[$key])) {
2062
      return null;
2063
    }
2064
2065 5
    return self::$SUPPORT[$key];
2066
  }
2067
2068
  /**
2069
   * alias for "UTF8::string_has_bom()"
2070
   *
2071
   * @see UTF8::string_has_bom()
2072
   *
2073
   * @param string $str
2074
   *
2075
   * @return bool
2076
   *
2077
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2078
   */
2079
  public static function hasBom($str)
2080
  {
2081
    return self::string_has_bom($str);
2082
  }
2083
2084
  /**
2085
   * Converts a hexadecimal-value into an UTF-8 character.
2086
   *
2087
   * @param string $hexdec <p>The hexadecimal value.</p>
2088
   *
2089
   * @return string|false <p>One single UTF-8 character.</p>
2090
   */
2091 2
  public static function hex_to_chr($hexdec)
2092
  {
2093 2
    return self::decimal_to_chr(hexdec($hexdec));
2094
  }
2095
2096
  /**
2097
   * Converts hexadecimal U+xxxx code point representation to integer.
2098
   *
2099
   * INFO: opposite to UTF8::int_to_hex()
2100
   *
2101
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2102
   *
2103
   * @return int|false <p>The code point, or false on failure.</p>
2104
   */
2105 1
  public static function hex_to_int($hexDec)
2106
  {
2107 1
    $hexDec = (string)$hexDec;
2108
2109 1
    if (!isset($hexDec[0])) {
2110 1
      return false;
2111
    }
2112
2113 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2114 1
      return intval($match[1], 16);
2115
    }
2116
2117 1
    return false;
2118
  }
2119
2120
  /**
2121
   * alias for "UTF8::html_entity_decode()"
2122
   *
2123
   * @see UTF8::html_entity_decode()
2124
   *
2125
   * @param string $str
2126
   * @param int    $flags
2127
   * @param string $encoding
2128
   *
2129
   * @return string
2130
   */
2131 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2132
  {
2133 1
    return self::html_entity_decode($str, $flags, $encoding);
2134
  }
2135
2136
  /**
2137
   * Converts a UTF-8 string to a series of HTML numbered entities.
2138
   *
2139
   * INFO: opposite to UTF8::html_decode()
2140
   *
2141
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2142
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2143
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2144
   *
2145
   * @return string <p>HTML numbered entities.</p>
2146
   */
2147 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2148
  {
2149
    // init
2150 2
    $str = (string)$str;
2151
2152 2
    if (!isset($str[0])) {
2153 1
      return '';
2154
    }
2155
2156 2
    if ($encoding !== 'UTF-8') {
2157 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2158 1
    }
2159
2160
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2161 2
    if (function_exists('mb_encode_numericentity')) {
2162
2163 2
      $startCode = 0x00;
2164 2
      if ($keepAsciiChars === true) {
2165 1
        $startCode = 0x80;
2166 1
      }
2167
2168 2
      return mb_encode_numericentity(
2169 2
          $str,
2170 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2171
          $encoding
2172 2
      );
2173
    }
2174
2175
    return implode(
2176
        '',
2177
        array_map(
2178
            function ($data) use ($keepAsciiChars, $encoding) {
2179
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2180
            },
2181
            self::split($str)
2182
        )
2183
    );
2184
  }
2185
2186
  /**
2187
   * UTF-8 version of html_entity_decode()
2188
   *
2189
   * The reason we are not using html_entity_decode() by itself is because
2190
   * while it is not technically correct to leave out the semicolon
2191
   * at the end of an entity most browsers will still interpret the entity
2192
   * correctly. html_entity_decode() does not convert entities without
2193
   * semicolons, so we are left with our own little solution here. Bummer.
2194
   *
2195
   * Convert all HTML entities to their applicable characters
2196
   *
2197
   * INFO: opposite to UTF8::html_encode()
2198
   *
2199
   * @link http://php.net/manual/en/function.html-entity-decode.php
2200
   *
2201
   * @param string $str      <p>
2202
   *                         The input string.
2203
   *                         </p>
2204
   * @param int    $flags    [optional] <p>
2205
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2206
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2207
   *                         <table>
2208
   *                         Available <i>flags</i> constants
2209
   *                         <tr valign="top">
2210
   *                         <td>Constant Name</td>
2211
   *                         <td>Description</td>
2212
   *                         </tr>
2213
   *                         <tr valign="top">
2214
   *                         <td><b>ENT_COMPAT</b></td>
2215
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2216
   *                         </tr>
2217
   *                         <tr valign="top">
2218
   *                         <td><b>ENT_QUOTES</b></td>
2219
   *                         <td>Will convert both double and single quotes.</td>
2220
   *                         </tr>
2221
   *                         <tr valign="top">
2222
   *                         <td><b>ENT_NOQUOTES</b></td>
2223
   *                         <td>Will leave both double and single quotes unconverted.</td>
2224
   *                         </tr>
2225
   *                         <tr valign="top">
2226
   *                         <td><b>ENT_HTML401</b></td>
2227
   *                         <td>
2228
   *                         Handle code as HTML 4.01.
2229
   *                         </td>
2230
   *                         </tr>
2231
   *                         <tr valign="top">
2232
   *                         <td><b>ENT_XML1</b></td>
2233
   *                         <td>
2234
   *                         Handle code as XML 1.
2235
   *                         </td>
2236
   *                         </tr>
2237
   *                         <tr valign="top">
2238
   *                         <td><b>ENT_XHTML</b></td>
2239
   *                         <td>
2240
   *                         Handle code as XHTML.
2241
   *                         </td>
2242
   *                         </tr>
2243
   *                         <tr valign="top">
2244
   *                         <td><b>ENT_HTML5</b></td>
2245
   *                         <td>
2246
   *                         Handle code as HTML 5.
2247
   *                         </td>
2248
   *                         </tr>
2249
   *                         </table>
2250
   *                         </p>
2251
   * @param string $encoding [optional] <p>Encoding to use.</p>
2252
   *
2253
   * @return string <p>The decoded string.</p>
2254
   */
2255 16
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2256
  {
2257
    // init
2258 16
    $str = (string)$str;
2259
2260 16
    if (!isset($str[0])) {
2261 5
      return '';
2262
    }
2263
2264 16
    if (!isset($str[3])) { // examples: &; || &x;
2265 9
      return $str;
2266
    }
2267
2268
    if (
2269 15
        strpos($str, '&') === false
2270 15
        ||
2271
        (
2272 15
            strpos($str, '&#') === false
2273 15
            &&
2274 9
            strpos($str, ';') === false
2275 9
        )
2276 15
    ) {
2277 8
      return $str;
2278
    }
2279
2280 15
    if ($encoding !== 'UTF-8') {
2281 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2282 2
    }
2283
2284 15
    if ($flags === null) {
2285 5
      if (Bootup::is_php('5.4') === true) {
2286
        $flags = ENT_QUOTES | ENT_HTML5;
2287
      } else {
2288 5
        $flags = ENT_QUOTES;
2289
      }
2290 5
    }
2291
2292 View Code Duplication
    if (
2293
        $encoding !== 'UTF-8'
2294 15
        &&
2295
        $encoding !== 'WINDOWS-1252'
2296 15
        &&
2297 2
        self::$SUPPORT['mbstring'] === false
2298 15
    ) {
2299
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2300
    }
2301
2302
    do {
2303 15
      $str_compare = $str;
2304
2305 15
      $str = preg_replace_callback(
2306 15
          "/&#\d{2,6};/",
2307
          function ($matches) use ($encoding) {
2308 13
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2309
2310 13
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2311 13
              return $returnTmp;
2312
            }
2313
2314 6
            return $matches[0];
2315 15
          },
2316
          $str
2317 15
      );
2318
2319
      // decode numeric & UTF16 two byte entities
2320 15
      $str = html_entity_decode(
2321 15
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2322 15
          $flags,
2323
          $encoding
2324 15
      );
2325
2326 15
    } while ($str_compare !== $str);
2327
2328 15
    return $str;
2329
  }
2330
2331
  /**
2332
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2333
   *
2334
   * @link http://php.net/manual/en/function.htmlentities.php
2335
   *
2336
   * @param string $str           <p>
2337
   *                              The input string.
2338
   *                              </p>
2339
   * @param int    $flags         [optional] <p>
2340
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2341
   *                              invalid code unit sequences and the used document type. The default is
2342
   *                              ENT_COMPAT | ENT_HTML401.
2343
   *                              <table>
2344
   *                              Available <i>flags</i> constants
2345
   *                              <tr valign="top">
2346
   *                              <td>Constant Name</td>
2347
   *                              <td>Description</td>
2348
   *                              </tr>
2349
   *                              <tr valign="top">
2350
   *                              <td><b>ENT_COMPAT</b></td>
2351
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_QUOTES</b></td>
2355
   *                              <td>Will convert both double and single quotes.</td>
2356
   *                              </tr>
2357
   *                              <tr valign="top">
2358
   *                              <td><b>ENT_NOQUOTES</b></td>
2359
   *                              <td>Will leave both double and single quotes unconverted.</td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_IGNORE</b></td>
2363
   *                              <td>
2364
   *                              Silently discard invalid code unit sequences instead of returning
2365
   *                              an empty string. Using this flag is discouraged as it
2366
   *                              may have security implications.
2367
   *                              </td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2371
   *                              <td>
2372
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2373
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2374
   *                              </td>
2375
   *                              </tr>
2376
   *                              <tr valign="top">
2377
   *                              <td><b>ENT_DISALLOWED</b></td>
2378
   *                              <td>
2379
   *                              Replace invalid code points for the given document type with a
2380
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2381
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2382
   *                              instance, to ensure the well-formedness of XML documents with
2383
   *                              embedded external content.
2384
   *                              </td>
2385
   *                              </tr>
2386
   *                              <tr valign="top">
2387
   *                              <td><b>ENT_HTML401</b></td>
2388
   *                              <td>
2389
   *                              Handle code as HTML 4.01.
2390
   *                              </td>
2391
   *                              </tr>
2392
   *                              <tr valign="top">
2393
   *                              <td><b>ENT_XML1</b></td>
2394
   *                              <td>
2395
   *                              Handle code as XML 1.
2396
   *                              </td>
2397
   *                              </tr>
2398
   *                              <tr valign="top">
2399
   *                              <td><b>ENT_XHTML</b></td>
2400
   *                              <td>
2401
   *                              Handle code as XHTML.
2402
   *                              </td>
2403
   *                              </tr>
2404
   *                              <tr valign="top">
2405
   *                              <td><b>ENT_HTML5</b></td>
2406
   *                              <td>
2407
   *                              Handle code as HTML 5.
2408
   *                              </td>
2409
   *                              </tr>
2410
   *                              </table>
2411
   *                              </p>
2412
   * @param string $encoding      [optional] <p>
2413
   *                              Like <b>htmlspecialchars</b>,
2414
   *                              <b>htmlentities</b> takes an optional third argument
2415
   *                              <i>encoding</i> which defines encoding used in
2416
   *                              conversion.
2417
   *                              Although this argument is technically optional, you are highly
2418
   *                              encouraged to specify the correct value for your code.
2419
   *                              </p>
2420
   * @param bool   $double_encode [optional] <p>
2421
   *                              When <i>double_encode</i> is turned off PHP will not
2422
   *                              encode existing html entities. The default is to convert everything.
2423
   *                              </p>
2424
   *
2425
   *
2426
   * @return string the encoded string.
2427
   * </p>
2428
   * <p>
2429
   * If the input <i>string</i> contains an invalid code unit
2430
   * sequence within the given <i>encoding</i> an empty string
2431
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2432
   * <b>ENT_SUBSTITUTE</b> flags are set.
2433
   */
2434 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2435
  {
2436 2
    if ($encoding !== 'UTF-8') {
2437 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2438 1
    }
2439
2440 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2441
2442
    /**
2443
     * PHP doesn't replace a backslash to its html entity since this is something
2444
     * that's mostly used to escape characters when inserting in a database. Since
2445
     * we're using a decent database layer, we don't need this shit and we're replacing
2446
     * the double backslashes by its' html entity equivalent.
2447
     *
2448
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2449
     */
2450 2
    $str = str_replace('\\', '&#92;', $str);
2451
2452 2
    if ($encoding !== 'UTF-8') {
2453 1
      return $str;
2454
    }
2455
2456 2
    $byteLengths = self::chr_size_list($str);
2457 2
    $search = array();
2458 2
    $replacements = array();
2459 2
    foreach ($byteLengths as $counter => $byteLength) {
2460 2
      if ($byteLength >= 3) {
2461 1
        $char = self::access($str, $counter);
2462
2463 1
        if (!isset($replacements[$char])) {
2464 1
          $search[$char] = $char;
2465 1
          $replacements[$char] = self::html_encode($char);
2466 1
        }
2467 1
      }
2468 2
    }
2469
2470 2
    return str_replace($search, $replacements, $str);
2471
  }
2472
2473
  /**
2474
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2475
   *
2476
   * INFO: Take a look at "UTF8::htmlentities()"
2477
   *
2478
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2479
   *
2480
   * @param string $str           <p>
2481
   *                              The string being converted.
2482
   *                              </p>
2483
   * @param int    $flags         [optional] <p>
2484
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2485
   *                              invalid code unit sequences and the used document type. The default is
2486
   *                              ENT_COMPAT | ENT_HTML401.
2487
   *                              <table>
2488
   *                              Available <i>flags</i> constants
2489
   *                              <tr valign="top">
2490
   *                              <td>Constant Name</td>
2491
   *                              <td>Description</td>
2492
   *                              </tr>
2493
   *                              <tr valign="top">
2494
   *                              <td><b>ENT_COMPAT</b></td>
2495
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2496
   *                              </tr>
2497
   *                              <tr valign="top">
2498
   *                              <td><b>ENT_QUOTES</b></td>
2499
   *                              <td>Will convert both double and single quotes.</td>
2500
   *                              </tr>
2501
   *                              <tr valign="top">
2502
   *                              <td><b>ENT_NOQUOTES</b></td>
2503
   *                              <td>Will leave both double and single quotes unconverted.</td>
2504
   *                              </tr>
2505
   *                              <tr valign="top">
2506
   *                              <td><b>ENT_IGNORE</b></td>
2507
   *                              <td>
2508
   *                              Silently discard invalid code unit sequences instead of returning
2509
   *                              an empty string. Using this flag is discouraged as it
2510
   *                              may have security implications.
2511
   *                              </td>
2512
   *                              </tr>
2513
   *                              <tr valign="top">
2514
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2515
   *                              <td>
2516
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2517
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2518
   *                              </td>
2519
   *                              </tr>
2520
   *                              <tr valign="top">
2521
   *                              <td><b>ENT_DISALLOWED</b></td>
2522
   *                              <td>
2523
   *                              Replace invalid code points for the given document type with a
2524
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2525
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2526
   *                              instance, to ensure the well-formedness of XML documents with
2527
   *                              embedded external content.
2528
   *                              </td>
2529
   *                              </tr>
2530
   *                              <tr valign="top">
2531
   *                              <td><b>ENT_HTML401</b></td>
2532
   *                              <td>
2533
   *                              Handle code as HTML 4.01.
2534
   *                              </td>
2535
   *                              </tr>
2536
   *                              <tr valign="top">
2537
   *                              <td><b>ENT_XML1</b></td>
2538
   *                              <td>
2539
   *                              Handle code as XML 1.
2540
   *                              </td>
2541
   *                              </tr>
2542
   *                              <tr valign="top">
2543
   *                              <td><b>ENT_XHTML</b></td>
2544
   *                              <td>
2545
   *                              Handle code as XHTML.
2546
   *                              </td>
2547
   *                              </tr>
2548
   *                              <tr valign="top">
2549
   *                              <td><b>ENT_HTML5</b></td>
2550
   *                              <td>
2551
   *                              Handle code as HTML 5.
2552
   *                              </td>
2553
   *                              </tr>
2554
   *                              </table>
2555
   *                              </p>
2556
   * @param string $encoding      [optional] <p>
2557
   *                              Defines encoding used in conversion.
2558
   *                              </p>
2559
   *                              <p>
2560
   *                              For the purposes of this function, the encodings
2561
   *                              ISO-8859-1, ISO-8859-15,
2562
   *                              UTF-8, cp866,
2563
   *                              cp1251, cp1252, and
2564
   *                              KOI8-R are effectively equivalent, provided the
2565
   *                              <i>string</i> itself is valid for the encoding, as
2566
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2567
   *                              the same positions in all of these encodings.
2568
   *                              </p>
2569
   * @param bool   $double_encode [optional] <p>
2570
   *                              When <i>double_encode</i> is turned off PHP will not
2571
   *                              encode existing html entities, the default is to convert everything.
2572
   *                              </p>
2573
   *
2574
   * @return string The converted string.
2575
   * </p>
2576
   * <p>
2577
   * If the input <i>string</i> contains an invalid code unit
2578
   * sequence within the given <i>encoding</i> an empty string
2579
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2580
   * <b>ENT_SUBSTITUTE</b> flags are set.
2581
   */
2582 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2583
  {
2584 1
    if ($encoding !== 'UTF-8') {
2585 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2586 1
    }
2587
2588 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2589
  }
2590
2591
  /**
2592
   * Checks whether iconv is available on the server.
2593
   *
2594
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2595
   */
2596 1
  public static function iconv_loaded()
2597
  {
2598 1
    $return = extension_loaded('iconv') ? true : false;
2599
2600
    // INFO: "default_charset" is already set by the "Bootup"-class
2601
2602 1
    if (Bootup::is_php('5.6') === false) {
2603
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2604 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2605 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2606 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2607 1
    }
2608
2609 1
    return $return;
2610
  }
2611
2612
  /**
2613
   * alias for "UTF8::decimal_to_chr()"
2614
   *
2615
   * @see UTF8::decimal_to_chr()
2616
   *
2617
   * @param mixed $int
2618
   *
2619
   * @return string
2620
   */
2621 2
  public static function int_to_chr($int)
2622
  {
2623 2
    return self::decimal_to_chr($int);
2624
  }
2625
2626
  /**
2627
   * Converts Integer to hexadecimal U+xxxx code point representation.
2628
   *
2629
   * INFO: opposite to UTF8::hex_to_int()
2630
   *
2631
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2632
   * @param string $pfix [optional]
2633
   *
2634
   * @return string <p>The code point, or empty string on failure.</p>
2635
   */
2636 3
  public static function int_to_hex($int, $pfix = 'U+')
2637
  {
2638 3
    if ((int)$int === $int) {
2639 3
      $hex = dechex($int);
2640
2641 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2642
2643 3
      return $pfix . $hex;
2644
    }
2645
2646 1
    return '';
2647
  }
2648
2649
  /**
2650
   * Checks whether intl-char is available on the server.
2651
   *
2652
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2653
   */
2654 1
  public static function intlChar_loaded()
2655
  {
2656
    return (
2657 1
        Bootup::is_php('7.0') === true
2658 1
        &&
2659
        class_exists('IntlChar') === true
2660 1
    );
2661
  }
2662
2663
  /**
2664
   * Checks whether intl is available on the server.
2665
   *
2666
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2667
   */
2668 4
  public static function intl_loaded()
2669
  {
2670 4
    return extension_loaded('intl') ? true : false;
2671
  }
2672
2673
  /**
2674
   * alias for "UTF8::is_ascii()"
2675
   *
2676
   * @see UTF8::is_ascii()
2677
   *
2678
   * @param string $str
2679
   *
2680
   * @return boolean
2681
   *
2682
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2683
   */
2684
  public static function isAscii($str)
2685
  {
2686
    return self::is_ascii($str);
2687
  }
2688
2689
  /**
2690
   * alias for "UTF8::is_base64()"
2691
   *
2692
   * @see UTF8::is_base64()
2693
   *
2694
   * @param string $str
2695
   *
2696
   * @return bool
2697
   *
2698
   * @deprecated <p>use "UTF8::is_base64()"</p>
2699
   */
2700
  public static function isBase64($str)
2701
  {
2702
    return self::is_base64($str);
2703
  }
2704
2705
  /**
2706
   * alias for "UTF8::is_binary()"
2707
   *
2708
   * @see UTF8::is_binary()
2709
   *
2710
   * @param string $str
2711
   *
2712
   * @return bool
2713
   *
2714
   * @deprecated <p>use "UTF8::is_binary()"</p>
2715
   */
2716
  public static function isBinary($str)
2717
  {
2718
    return self::is_binary($str);
2719
  }
2720
2721
  /**
2722
   * alias for "UTF8::is_bom()"
2723
   *
2724
   * @see UTF8::is_bom()
2725
   *
2726
   * @param string $utf8_chr
2727
   *
2728
   * @return boolean
2729
   *
2730
   * @deprecated <p>use "UTF8::is_bom()"</p>
2731
   */
2732
  public static function isBom($utf8_chr)
2733
  {
2734
    return self::is_bom($utf8_chr);
2735
  }
2736
2737
  /**
2738
   * alias for "UTF8::is_html()"
2739
   *
2740
   * @see UTF8::is_html()
2741
   *
2742
   * @param string $str
2743
   *
2744
   * @return boolean
2745
   *
2746
   * @deprecated <p>use "UTF8::is_html()"</p>
2747
   */
2748
  public static function isHtml($str)
2749
  {
2750
    return self::is_html($str);
2751
  }
2752
2753
  /**
2754
   * alias for "UTF8::is_json()"
2755
   *
2756
   * @see UTF8::is_json()
2757
   *
2758
   * @param string $str
2759
   *
2760
   * @return bool
2761
   *
2762
   * @deprecated <p>use "UTF8::is_json()"</p>
2763
   */
2764
  public static function isJson($str)
2765
  {
2766
    return self::is_json($str);
2767
  }
2768
2769
  /**
2770
   * alias for "UTF8::is_utf16()"
2771
   *
2772
   * @see UTF8::is_utf16()
2773
   *
2774
   * @param string $str
2775
   *
2776
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2777
   *
2778
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2779
   */
2780
  public static function isUtf16($str)
2781
  {
2782
    return self::is_utf16($str);
2783
  }
2784
2785
  /**
2786
   * alias for "UTF8::is_utf32()"
2787
   *
2788
   * @see UTF8::is_utf32()
2789
   *
2790
   * @param string $str
2791
   *
2792
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2793
   *
2794
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2795
   */
2796
  public static function isUtf32($str)
2797
  {
2798
    return self::is_utf32($str);
2799
  }
2800
2801
  /**
2802
   * alias for "UTF8::is_utf8()"
2803
   *
2804
   * @see UTF8::is_utf8()
2805
   *
2806
   * @param string $str
2807
   * @param bool   $strict
2808
   *
2809
   * @return bool
2810
   *
2811
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2812
   */
2813
  public static function isUtf8($str, $strict = false)
2814
  {
2815
    return self::is_utf8($str, $strict);
2816
  }
2817
2818
  /**
2819
   * Checks if a string is 7 bit ASCII.
2820
   *
2821
   * @param string $str <p>The string to check.</p>
2822
   *
2823
   * @return bool <p>
2824
   *              <strong>true</strong> if it is ASCII<br>
2825
   *              <strong>false</strong> otherwise
2826
   *              </p>
2827
   */
2828 53
  public static function is_ascii($str)
2829
  {
2830 53
    $str = (string)$str;
2831
2832 53
    if (!isset($str[0])) {
2833 6
      return true;
2834
    }
2835
2836 52
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2837
  }
2838
2839
  /**
2840
   * Returns true if the string is base64 encoded, false otherwise.
2841
   *
2842
   * @param string $str <p>The input string.</p>
2843
   *
2844
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2845
   */
2846 1
  public static function is_base64($str)
2847
  {
2848 1
    $str = (string)$str;
2849
2850 1
    if (!isset($str[0])) {
2851 1
      return false;
2852
    }
2853
2854 1
    $base64String = (string)base64_decode($str, true);
2855 1
    if ($base64String && base64_encode($base64String) === $str) {
2856 1
      return true;
2857
    }
2858
2859 1
    return false;
2860
  }
2861
2862
  /**
2863
   * Check if the input is binary... (is look like a hack).
2864
   *
2865
   * @param mixed $input
2866
   *
2867
   * @return bool
2868
   */
2869 16
  public static function is_binary($input)
2870
  {
2871 16
    $input = (string)$input;
2872
2873 16
    if (!isset($input[0])) {
2874 4
      return false;
2875
    }
2876
2877 16
    if (preg_match('~^[01]+$~', $input)) {
2878 4
      return true;
2879
    }
2880
2881 16
    $testLength = strlen($input);
2882 16
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2883 5
      return true;
2884
    }
2885
2886 15
    if (substr_count($input, "\x00") > 0) {
2887 1
      return true;
2888
    }
2889
2890 15
    return false;
2891
  }
2892
2893
  /**
2894
   * Check if the file is binary.
2895
   *
2896
   * @param string $file
2897
   *
2898
   * @return boolean
2899
   */
2900
  public static function is_binary_file($file)
2901
  {
2902
    try {
2903
      $fp = fopen($file, 'rb');
2904
      $block = fread($fp, 512);
2905
      fclose($fp);
2906
    } catch (\Exception $e) {
2907
      $block = '';
2908
    }
2909
2910
    return self::is_binary($block);
2911
  }
2912
2913
  /**
2914
   * Checks if the given string is equal to any "Byte Order Mark".
2915
   *
2916
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2917
   *
2918
   * @param string $str <p>The input string.</p>
2919
   *
2920
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2921
   */
2922 1
  public static function is_bom($str)
2923
  {
2924 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2925 1
      if ($str === $bomString) {
2926 1
        return true;
2927
      }
2928 1
    }
2929
2930 1
    return false;
2931
  }
2932
2933
  /**
2934
   * Check if the string contains any html-tags <lall>.
2935
   *
2936
   * @param string $str <p>The input string.</p>
2937
   *
2938
   * @return boolean
2939
   */
2940 1
  public static function is_html($str)
2941
  {
2942 1
    $str = (string)$str;
2943
2944 1
    if (!isset($str[0])) {
2945 1
      return false;
2946
    }
2947
2948
    // init
2949 1
    $matches = array();
2950
2951 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2952
2953 1
    if (count($matches) === 0) {
2954 1
      return false;
2955
    }
2956
2957 1
    return true;
2958
  }
2959
2960
  /**
2961
   * Try to check if "$str" is an json-string.
2962
   *
2963
   * @param string $str <p>The input string.</p>
2964
   *
2965
   * @return bool
2966
   */
2967 1
  public static function is_json($str)
2968
  {
2969 1
    $str = (string)$str;
2970
2971 1
    if (!isset($str[0])) {
2972
      return false;
2973
    }
2974
2975 1
    $json = self::json_decode($str);
2976
2977
    if (
2978
        (
2979 1
            is_object($json) === true
2980 1
            ||
2981 1
            is_array($json) === true
2982 1
        )
2983 1
        &&
2984 1
        json_last_error() === JSON_ERROR_NONE
2985 1
    ) {
2986 1
      return true;
2987
    }
2988
2989 1
    return false;
2990
  }
2991
2992
  /**
2993
   * Check if the string is UTF-16.
2994
   *
2995
   * @param string $str <p>The input string.</p>
2996
   *
2997
   * @return int|false <p>
2998
   *                   <strong>false</strong> if is't not UTF-16,<br>
2999
   *                   <strong>1</strong> for UTF-16LE,<br>
3000
   *                   <strong>2</strong> for UTF-16BE.
3001
   *                   </p>
3002
   */
3003 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3004
  {
3005 5
    $str = self::remove_bom($str);
3006
3007 5
    if (self::is_binary($str) === true) {
3008
3009 5
      $maybeUTF16LE = 0;
3010 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3011 5
      if ($test) {
3012 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3013 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3014 5
        if ($test3 === $test) {
3015 5
          $strChars = self::count_chars($str, true);
3016 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3017 4
            if (in_array($test3char, $strChars, true) === true) {
3018 2
              $maybeUTF16LE++;
3019 2
            }
3020 5
          }
3021 5
        }
3022 5
      }
3023
3024 5
      $maybeUTF16BE = 0;
3025 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3026 5
      if ($test) {
3027 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3028 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3029 5
        if ($test3 === $test) {
3030 5
          $strChars = self::count_chars($str, true);
3031 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3032 4
            if (in_array($test3char, $strChars, true) === true) {
3033 3
              $maybeUTF16BE++;
3034 3
            }
3035 5
          }
3036 5
        }
3037 5
      }
3038
3039 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3040 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3041 2
          return 1;
3042
        }
3043
3044 3
        return 2;
3045
      }
3046
3047 3
    }
3048
3049 3
    return false;
3050
  }
3051
3052
  /**
3053
   * Check if the string is UTF-32.
3054
   *
3055
   * @param string $str
3056
   *
3057
   * @return int|false <p>
3058
   *                   <strong>false</strong> if is't not UTF-32,<br>
3059
   *                   <strong>1</strong> for UTF-32LE,<br>
3060
   *                   <strong>2</strong> for UTF-32BE.
3061
   *                   </p>
3062
   */
3063 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3064
  {
3065 3
    $str = self::remove_bom($str);
3066
3067 3
    if (self::is_binary($str) === true) {
3068
3069 3
      $maybeUTF32LE = 0;
3070 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3071 3
      if ($test) {
3072 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3073 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3074 2
        if ($test3 === $test) {
3075 2
          $strChars = self::count_chars($str, true);
3076 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3077 2
            if (in_array($test3char, $strChars, true) === true) {
3078 1
              $maybeUTF32LE++;
3079 1
            }
3080 2
          }
3081 2
        }
3082 2
      }
3083
3084 3
      $maybeUTF32BE = 0;
3085 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3086 3
      if ($test) {
3087 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3088 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3089 2
        if ($test3 === $test) {
3090 2
          $strChars = self::count_chars($str, true);
3091 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3092 2
            if (in_array($test3char, $strChars, true) === true) {
3093 1
              $maybeUTF32BE++;
3094 1
            }
3095 2
          }
3096 2
        }
3097 2
      }
3098
3099 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3100 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3101 1
          return 1;
3102
        }
3103
3104 1
        return 2;
3105
      }
3106
3107 3
    }
3108
3109 3
    return false;
3110
  }
3111
3112
  /**
3113
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3114
   *
3115
   * @see    http://hsivonen.iki.fi/php-utf8/
3116
   *
3117
   * @param string $str    <p>The string to be checked.</p>
3118
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3119
   *
3120
   * @return bool
3121
   */
3122 60
  public static function is_utf8($str, $strict = false)
3123
  {
3124 60
    $str = (string)$str;
3125
3126 60
    if (!isset($str[0])) {
3127 3
      return true;
3128
    }
3129
3130 58
    if ($strict === true) {
3131 1
      if (self::is_utf16($str) !== false) {
3132 1
        return false;
3133
      }
3134
3135
      if (self::is_utf32($str) !== false) {
3136
        return false;
3137
      }
3138
    }
3139
3140 58
    if (self::pcre_utf8_support() !== true) {
3141
3142
      // If even just the first character can be matched, when the /u
3143
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3144
      // invalid, nothing at all will match, even if the string contains
3145
      // some valid sequences
3146
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3147
    }
3148
3149 58
    $mState = 0; // cached expected number of octets after the current octet
3150
    // until the beginning of the next UTF8 character sequence
3151 58
    $mUcs4 = 0; // cached Unicode character
3152 58
    $mBytes = 1; // cached expected number of octets in the current sequence
3153
3154 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3155
      self::checkForSupport();
3156
    }
3157
3158 58 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3159
      $len = \mb_strlen($str, '8BIT');
3160
    } else {
3161 58
      $len = strlen($str);
3162
    }
3163
3164
    /** @noinspection ForeachInvariantsInspection */
3165 58
    for ($i = 0; $i < $len; $i++) {
3166 58
      $in = ord($str[$i]);
3167 58
      if ($mState === 0) {
3168
        // When mState is zero we expect either a US-ASCII character or a
3169
        // multi-octet sequence.
3170 58
        if (0 === (0x80 & $in)) {
3171
          // US-ASCII, pass straight through.
3172 52
          $mBytes = 1;
3173 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3174
          // First octet of 2 octet sequence.
3175 48
          $mUcs4 = $in;
3176 48
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3177 48
          $mState = 1;
3178 48
          $mBytes = 2;
3179 55
        } elseif (0xE0 === (0xF0 & $in)) {
3180
          // First octet of 3 octet sequence.
3181 29
          $mUcs4 = $in;
3182 29
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3183 29
          $mState = 2;
3184 29
          $mBytes = 3;
3185 46 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3186
          // First octet of 4 octet sequence.
3187 11
          $mUcs4 = $in;
3188 11
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3189 11
          $mState = 3;
3190 11
          $mBytes = 4;
3191 22
        } elseif (0xF8 === (0xFC & $in)) {
3192
          /* First octet of 5 octet sequence.
3193
          *
3194
          * This is illegal because the encoded codepoint must be either
3195
          * (a) not the shortest form or
3196
          * (b) outside the Unicode range of 0-0x10FFFF.
3197
          * Rather than trying to resynchronize, we will carry on until the end
3198
          * of the sequence and let the later error handling code catch it.
3199
          */
3200 4
          $mUcs4 = $in;
3201 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3202 4
          $mState = 4;
3203 4
          $mBytes = 5;
3204 12 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3205
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3206 4
          $mUcs4 = $in;
3207 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3208 4
          $mState = 5;
3209 4
          $mBytes = 6;
3210 4
        } else {
3211
          /* Current octet is neither in the US-ASCII range nor a legal first
3212
           * octet of a multi-octet sequence.
3213
           */
3214 6
          return false;
3215
        }
3216 57
      } else {
3217
        // When mState is non-zero, we expect a continuation of the multi-octet
3218
        // sequence
3219 52
        if (0x80 === (0xC0 & $in)) {
3220
          // Legal continuation.
3221 48
          $shift = ($mState - 1) * 6;
3222 48
          $tmp = $in;
3223 48
          $tmp = ($tmp & 0x0000003F) << $shift;
3224 48
          $mUcs4 |= $tmp;
3225
          /**
3226
           * End of the multi-octet sequence. mUcs4 now contains the final
3227
           * Unicode code point to be output
3228
           */
3229 48
          if (0 === --$mState) {
3230
            /*
3231
            * Check for illegal sequences and code points.
3232
            */
3233
            // From Unicode 3.1, non-shortest form is illegal
3234
            if (
3235 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3236 48
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3237 48
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3238 48
                (4 < $mBytes) ||
3239
                // From Unicode 3.2, surrogate characters are illegal.
3240 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3241
                // Code points outside the Unicode range are illegal.
3242 48
                ($mUcs4 > 0x10FFFF)
3243 48
            ) {
3244 7
              return false;
3245
            }
3246
            // initialize UTF8 cache
3247 48
            $mState = 0;
3248 48
            $mUcs4 = 0;
3249 48
            $mBytes = 1;
3250 48
          }
3251 48
        } else {
3252
          /**
3253
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3254
           * Incomplete multi-octet sequence.
3255
           */
3256 26
          return false;
3257
        }
3258
      }
3259 57
    }
3260
3261 27
    return true;
3262
  }
3263
3264
  /**
3265
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3266
   * Decodes a JSON string
3267
   *
3268
   * @link http://php.net/manual/en/function.json-decode.php
3269
   *
3270
   * @param string $json    <p>
3271
   *                        The <i>json</i> string being decoded.
3272
   *                        </p>
3273
   *                        <p>
3274
   *                        This function only works with UTF-8 encoded strings.
3275
   *                        </p>
3276
   *                        <p>PHP implements a superset of
3277
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3278
   *                        only supports these values when they are nested inside an array or an object.
3279
   *                        </p>
3280
   * @param bool   $assoc   [optional] <p>
3281
   *                        When <b>TRUE</b>, returned objects will be converted into
3282
   *                        associative arrays.
3283
   *                        </p>
3284
   * @param int    $depth   [optional] <p>
3285
   *                        User specified recursion depth.
3286
   *                        </p>
3287
   * @param int    $options [optional] <p>
3288
   *                        Bitmask of JSON decode options. Currently only
3289
   *                        <b>JSON_BIGINT_AS_STRING</b>
3290
   *                        is supported (default is to cast large integers as floats)
3291
   *                        </p>
3292
   *
3293
   * @return mixed the value encoded in <i>json</i> in appropriate
3294
   * PHP type. Values true, false and
3295
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3296
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3297
   * <i>json</i> cannot be decoded or if the encoded
3298
   * data is deeper than the recursion limit.
3299
   */
3300 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3301
  {
3302 2
    $json = (string)self::filter($json);
3303
3304 2
    if (Bootup::is_php('5.4') === true) {
3305
      $json = json_decode($json, $assoc, $depth, $options);
3306
    } else {
3307 2
      $json = json_decode($json, $assoc, $depth);
3308
    }
3309
3310 2
    return $json;
3311
  }
3312
3313
  /**
3314
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3315
   * Returns the JSON representation of a value.
3316
   *
3317
   * @link http://php.net/manual/en/function.json-encode.php
3318
   *
3319
   * @param mixed $value   <p>
3320
   *                       The <i>value</i> being encoded. Can be any type except
3321
   *                       a resource.
3322
   *                       </p>
3323
   *                       <p>
3324
   *                       All string data must be UTF-8 encoded.
3325
   *                       </p>
3326
   *                       <p>PHP implements a superset of
3327
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3328
   *                       only supports these values when they are nested inside an array or an object.
3329
   *                       </p>
3330
   * @param int   $options [optional] <p>
3331
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3332
   *                       <b>JSON_HEX_TAG</b>,
3333
   *                       <b>JSON_HEX_AMP</b>,
3334
   *                       <b>JSON_HEX_APOS</b>,
3335
   *                       <b>JSON_NUMERIC_CHECK</b>,
3336
   *                       <b>JSON_PRETTY_PRINT</b>,
3337
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3338
   *                       <b>JSON_FORCE_OBJECT</b>,
3339
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3340
   *                       constants is described on
3341
   *                       the JSON constants page.
3342
   *                       </p>
3343
   * @param int   $depth   [optional] <p>
3344
   *                       Set the maximum depth. Must be greater than zero.
3345
   *                       </p>
3346
   *
3347
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3348
   */
3349 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3350
  {
3351 2
    $value = self::filter($value);
3352
3353 2
    if (Bootup::is_php('5.5') === true) {
3354
      $json = json_encode($value, $options, $depth);
3355
    } else {
3356 2
      $json = json_encode($value, $options);
3357
    }
3358
3359 2
    return $json;
3360
  }
3361
3362
  /**
3363
   * Makes string's first char lowercase.
3364
   *
3365
   * @param string $str <p>The input string</p>
3366
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3367
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3368
   *
3369
   * @return string <p>The resulting string</p>
3370
   */
3371 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3372
  {
3373 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3374 7
    if ($strPartTwo === false) {
3375
      $strPartTwo = '';
3376
    }
3377
3378 7
    $strPartOne = self::strtolower(
3379 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3380 7
        $encoding,
3381
        $cleanUtf8
3382 7
    );
3383
3384 7
    return $strPartOne . $strPartTwo;
3385
  }
3386
3387
  /**
3388
   * alias for "UTF8::lcfirst()"
3389
   *
3390
   * @see UTF8::lcfirst()
3391
   *
3392
   * @param string  $word
3393
   * @param string  $encoding
3394
   * @param boolean $cleanUtf8
3395
   *
3396
   * @return string
3397
   */
3398 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3399
  {
3400 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3401
  }
3402
3403
  /**
3404
   * Lowercase for all words in the string.
3405
   *
3406
   * @param string   $str        <p>The input string.</p>
3407
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3408
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3409
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3410
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3411
   *
3412
   * @return string
3413
   */
3414 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3415
  {
3416 1
    if (!$str) {
3417 1
      return '';
3418
    }
3419
3420 1
    $words = self::str_to_words($str, $charlist);
3421 1
    $newWords = array();
3422
3423 1
    if (count($exceptions) > 0) {
3424 1
      $useExceptions = true;
3425 1
    } else {
3426 1
      $useExceptions = false;
3427
    }
3428
3429 1 View Code Duplication
    foreach ($words as $word) {
3430
3431 1
      if (!$word) {
3432 1
        continue;
3433
      }
3434
3435
      if (
3436
          $useExceptions === false
3437 1
          ||
3438
          (
3439
              $useExceptions === true
3440 1
              &&
3441 1
              !in_array($word, $exceptions, true)
3442 1
          )
3443 1
      ) {
3444 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3445 1
      }
3446
3447 1
      $newWords[] = $word;
3448 1
    }
3449
3450 1
    return implode('', $newWords);
3451
  }
3452
3453
  /**
3454
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3455
   *
3456
   * @param string $str   <p>The string to be trimmed</p>
3457
   * @param string $chars <p>Optional characters to be stripped</p>
3458
   *
3459
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3460
   */
3461 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3462
  {
3463 24
    $str = (string)$str;
3464
3465 24
    if (!isset($str[0])) {
3466 2
      return '';
3467
    }
3468
3469
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3470 23
    if ($chars === INF || !$chars) {
3471 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3472
    }
3473
3474 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3475
  }
3476
3477
  /**
3478
   * Returns the UTF-8 character with the maximum code point in the given data.
3479
   *
3480
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3481
   *
3482
   * @return string <p>The character with the highest code point than others.</p>
3483
   */
3484 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3485
  {
3486 1
    if (is_array($arg) === true) {
3487 1
      $arg = implode('', $arg);
3488 1
    }
3489
3490 1
    return self::chr(max(self::codepoints($arg)));
3491
  }
3492
3493
  /**
3494
   * Calculates and returns the maximum number of bytes taken by any
3495
   * UTF-8 encoded character in the given string.
3496
   *
3497
   * @param string $str <p>The original Unicode string.</p>
3498
   *
3499
   * @return int <p>Max byte lengths of the given chars.</p>
3500
   */
3501 1
  public static function max_chr_width($str)
3502
  {
3503 1
    $bytes = self::chr_size_list($str);
3504 1
    if (count($bytes) > 0) {
3505 1
      return (int)max($bytes);
3506
    }
3507
3508 1
    return 0;
3509
  }
3510
3511
  /**
3512
   * Checks whether mbstring is available on the server.
3513
   *
3514
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3515
   */
3516 15
  public static function mbstring_loaded()
3517
  {
3518 15
    $return = extension_loaded('mbstring') ? true : false;
3519
3520 15
    if ($return === true) {
3521 15
      \mb_internal_encoding('UTF-8');
3522 15
    }
3523
3524 15
    return $return;
3525
  }
3526
3527
  /**
3528
   * Returns the UTF-8 character with the minimum code point in the given data.
3529
   *
3530
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3531
   *
3532
   * @return string <p>The character with the lowest code point than others.</p>
3533
   */
3534 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3535
  {
3536 1
    if (is_array($arg) === true) {
3537 1
      $arg = implode('', $arg);
3538 1
    }
3539
3540 1
    return self::chr(min(self::codepoints($arg)));
3541
  }
3542
3543
  /**
3544
   * alias for "UTF8::normalize_encoding()"
3545
   *
3546
   * @see UTF8::normalize_encoding()
3547
   *
3548
   * @param string $encoding
3549
   * @param mixed  $fallback
3550
   *
3551
   * @return string
3552
   *
3553
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3554
   */
3555
  public static function normalizeEncoding($encoding, $fallback = false)
3556
  {
3557
    return self::normalize_encoding($encoding, $fallback);
3558
  }
3559
3560
  /**
3561
   * Normalize the encoding-"name" input.
3562
   *
3563
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3564
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3565
   *
3566
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3567
   */
3568 77
  public static function normalize_encoding($encoding, $fallback = false)
3569
  {
3570 77
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3571
3572 77
    if (!$encoding) {
3573 3
      return $fallback;
3574
    }
3575
3576 76
    if ('UTF-8' === $encoding) {
3577 1
      return $encoding;
3578
    }
3579
3580 76
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3581 6
      return $encoding;
3582
    }
3583
3584 75
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3585 74
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3586
    }
3587
3588 5
    $encodingOrig = $encoding;
3589 5
    $encoding = strtoupper($encoding);
3590 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3591
3592
    $equivalences = array(
3593 5
        'ISO8859'     => 'ISO-8859-1',
3594 5
        'ISO88591'    => 'ISO-8859-1',
3595 5
        'ISO'         => 'ISO-8859-1',
3596 5
        'LATIN'       => 'ISO-8859-1',
3597 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3598 5
        'ISO88592'    => 'ISO-8859-2',
3599 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3600 5
        'ISO88593'    => 'ISO-8859-3',
3601 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3602 5
        'ISO88594'    => 'ISO-8859-4',
3603 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3604 5
        'ISO88595'    => 'ISO-8859-5',
3605 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3606 5
        'ISO88597'    => 'ISO-8859-7',
3607 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3608 5
        'ISO88599'    => 'ISO-8859-9',
3609 5
        'LATIN5'      => 'ISO-8859-9', // Turkish
3610 5
        'ISO885911'   => 'ISO-8859-11',
3611 5
        'TIS620'      => 'ISO-8859-11', // Thai
3612 5
        'ISO885910'   => 'ISO-8859-10',
3613 5
        'LATIN6'      => 'ISO-8859-10', // Nordic
3614 5
        'ISO885913'   => 'ISO-8859-13',
3615 5
        'LATIN7'      => 'ISO-8859-13', // Baltic
3616 5
        'ISO885914'   => 'ISO-8859-14',
3617 5
        'LATIN8'      => 'ISO-8859-14', // Celtic
3618 5
        'ISO885915'   => 'ISO-8859-15',
3619 5
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3620 5
        'ISO885916'   => 'ISO-8859-16',
3621 5
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3622 5
        'CP1250'      => 'WINDOWS-1250',
3623 5
        'WIN1250'     => 'WINDOWS-1250',
3624 5
        'WINDOWS1250' => 'WINDOWS-1250',
3625 5
        'CP1251'      => 'WINDOWS-1251',
3626 5
        'WIN1251'     => 'WINDOWS-1251',
3627 5
        'WINDOWS1251' => 'WINDOWS-1251',
3628 5
        'CP1252'      => 'WINDOWS-1252',
3629 5
        'WIN1252'     => 'WINDOWS-1252',
3630 5
        'WINDOWS1252' => 'WINDOWS-1252',
3631 5
        'CP1253'      => 'WINDOWS-1253',
3632 5
        'WIN1253'     => 'WINDOWS-1253',
3633 5
        'WINDOWS1253' => 'WINDOWS-1253',
3634 5
        'CP1254'      => 'WINDOWS-1254',
3635 5
        'WIN1254'     => 'WINDOWS-1254',
3636 5
        'WINDOWS1254' => 'WINDOWS-1254',
3637 5
        'CP1255'      => 'WINDOWS-1255',
3638 5
        'WIN1255'     => 'WINDOWS-1255',
3639 5
        'WINDOWS1255' => 'WINDOWS-1255',
3640 5
        'CP1256'      => 'WINDOWS-1256',
3641 5
        'WIN1256'     => 'WINDOWS-1256',
3642 5
        'WINDOWS1256' => 'WINDOWS-1256',
3643 5
        'CP1257'      => 'WINDOWS-1257',
3644 5
        'WIN1257'     => 'WINDOWS-1257',
3645 5
        'WINDOWS1257' => 'WINDOWS-1257',
3646 5
        'CP1258'      => 'WINDOWS-1258',
3647 5
        'WIN1258'     => 'WINDOWS-1258',
3648 5
        'WINDOWS1258' => 'WINDOWS-1258',
3649 5
        'UTF16'       => 'UTF-16',
3650 5
        'UTF32'       => 'UTF-32',
3651 5
        'UTF8'        => 'UTF-8',
3652 5
        'UTF'         => 'UTF-8',
3653 5
        'UTF7'        => 'UTF-7',
3654 5
        '8BIT'        => 'CP850',
3655 5
        'BINARY'      => 'CP850',
3656 5
    );
3657
3658 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3659 5
      $encoding = $equivalences[$encodingUpperHelper];
3660 5
    }
3661
3662 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3663
3664 5
    return $encoding;
3665
  }
3666
3667
  /**
3668
   * Normalize some MS Word special characters.
3669
   *
3670
   * @param string $str <p>The string to be normalized.</p>
3671
   *
3672
   * @return string
3673
   */
3674 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3675
  {
3676 16
    $str = (string)$str;
3677
3678 16
    if (!isset($str[0])) {
3679 1
      return '';
3680
    }
3681
3682 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3683 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3684
3685 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3686 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3687 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3688 1
    }
3689
3690 16
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3691
  }
3692
3693
  /**
3694
   * Normalize the whitespace.
3695
   *
3696
   * @param string $str                     <p>The string to be normalized.</p>
3697
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3698
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3699
   *                                        bidirectional text chars.</p>
3700
   *
3701
   * @return string
3702
   */
3703 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3704
  {
3705 37
    $str = (string)$str;
3706
3707 37
    if (!isset($str[0])) {
3708 4
      return '';
3709
    }
3710
3711 37
    static $WHITESPACE_CACHE = array();
3712 37
    $cacheKey = (int)$keepNonBreakingSpace;
3713
3714 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3715
3716 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3717
3718 2
      if ($keepNonBreakingSpace === true) {
3719
        /** @noinspection OffsetOperationsInspection */
3720 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3721 1
      }
3722
3723 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3724 2
    }
3725
3726 37
    if ($keepBidiUnicodeControls === false) {
3727 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3728
3729 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3730 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3731 1
      }
3732
3733 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3734 37
    }
3735
3736 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3737
  }
3738
3739
  /**
3740
   * Strip all whitespace characters. This includes tabs and newline
3741
   * characters, as well as multibyte whitespace such as the thin space
3742
   * and ideographic space.
3743
   *
3744
   * @param string $str
3745
   *
3746
   * @return string
3747
   */
3748 12
  public static function strip_whitespace($str)
3749
  {
3750 12
    $str = (string)$str;
3751
3752 12
    if (!isset($str[0])) {
3753 1
      return '';
3754
    }
3755
3756 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3757
  }
3758
3759
  /**
3760
   * Format a number with grouped thousands.
3761
   *
3762
   * @param float  $number
3763
   * @param int    $decimals
3764
   * @param string $dec_point
3765
   * @param string $thousands_sep
3766
   *
3767
   * @return string
3768
   *
3769
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3770
   */
3771
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3772
  {
3773
    $thousands_sep = (string)$thousands_sep;
3774
    $dec_point = (string)$dec_point;
3775
    $number = (float)$number;
3776
3777
    if (
3778
        isset($thousands_sep[1], $dec_point[1])
3779
        &&
3780
        Bootup::is_php('5.4') === true
3781
    ) {
3782
      return str_replace(
3783
          array(
3784
              '.',
3785
              ',',
3786
          ),
3787
          array(
3788
              $dec_point,
3789
              $thousands_sep,
3790
          ),
3791
          number_format($number, $decimals, '.', ',')
3792
      );
3793
    }
3794
3795
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3796
  }
3797
3798
  /**
3799
   * Calculates Unicode code point of the given UTF-8 encoded character.
3800
   *
3801
   * INFO: opposite to UTF8::chr()
3802
   *
3803
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3804
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3805
   *
3806
   * @return int <p>
3807
   *             Unicode code point of the given character,<br>
3808
   *             0 on invalid UTF-8 byte sequence.
3809
   *             </p>
3810
   */
3811 23
  public static function ord($chr, $encoding = 'UTF-8')
3812
  {
3813
    // init
3814 23
    static $CHAR_CACHE = array();
3815 23
    $encoding = (string)$encoding;
3816
3817
    // save the original string
3818 23
    $chr_orig = $chr;
3819
3820 23
    if ($encoding !== 'UTF-8') {
3821 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3822
3823
      // check again, if it's still not UTF-8
3824
      /** @noinspection NotOptimalIfConditionsInspection */
3825 1
      if ($encoding !== 'UTF-8') {
3826 1
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3827 1
      }
3828 1
    }
3829
3830 23
    $cacheKey = $chr_orig . $encoding;
3831 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3832 23
      return $CHAR_CACHE[$cacheKey];
3833
    }
3834
3835 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3836
      self::checkForSupport();
3837
    }
3838
3839 10
    if (self::$SUPPORT['intlChar'] === true) {
3840
      $code = \IntlChar::ord($chr);
3841
      if ($code) {
3842
        return $CHAR_CACHE[$cacheKey] = $code;
3843
      }
3844
    }
3845
3846
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3847 10
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3848 10
    $code = $chr ? $chr[1] : 0;
3849
3850 10
    if (0xF0 <= $code && isset($chr[4])) {
3851 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3852
    }
3853
3854 10
    if (0xE0 <= $code && isset($chr[3])) {
3855 4
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3856
    }
3857
3858 10
    if (0xC0 <= $code && isset($chr[2])) {
3859 6
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3860
    }
3861
3862 10
    return $CHAR_CACHE[$cacheKey] = $code;
3863
  }
3864
3865
  /**
3866
   * Parses the string into an array (into the the second parameter).
3867
   *
3868
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3869
   *          if the second parameter is not set!
3870
   *
3871
   * @link http://php.net/manual/en/function.parse-str.php
3872
   *
3873
   * @param string  $str       <p>The input string.</p>
3874
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3875
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3876
   *
3877
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3878
   */
3879 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3880
  {
3881 1
    if ($cleanUtf8 === true) {
3882 1
      $str = self::clean($str);
3883 1
    }
3884
3885
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3886 1
    $return = \mb_parse_str($str, $result);
3887 1
    if ($return === false || empty($result)) {
3888 1
      return false;
3889
    }
3890
3891 1
    return true;
3892
  }
3893
3894
  /**
3895
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3896
   *
3897
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3898
   */
3899 58
  public static function pcre_utf8_support()
3900
  {
3901
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3902 58
    return (bool)@preg_match('//u', '');
3903
  }
3904
3905
  /**
3906
   * Create an array containing a range of UTF-8 characters.
3907
   *
3908
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3909
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3910
   *
3911
   * @return array
3912
   */
3913 1
  public static function range($var1, $var2)
3914
  {
3915 1
    if (!$var1 || !$var2) {
3916 1
      return array();
3917
    }
3918
3919 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3920 1
      $start = (int)$var1;
3921 1
    } elseif (ctype_xdigit($var1)) {
3922
      $start = (int)self::hex_to_int($var1);
3923
    } else {
3924 1
      $start = self::ord($var1);
3925
    }
3926
3927 1
    if (!$start) {
3928
      return array();
3929
    }
3930
3931 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3932 1
      $end = (int)$var2;
3933 1
    } elseif (ctype_xdigit($var2)) {
3934
      $end = (int)self::hex_to_int($var2);
3935
    } else {
3936 1
      $end = self::ord($var2);
3937
    }
3938
3939 1
    if (!$end) {
3940
      return array();
3941
    }
3942
3943 1
    return array_map(
3944
        array(
3945 1
            '\\voku\\helper\\UTF8',
3946 1
            'chr',
3947 1
        ),
3948 1
        range($start, $end)
3949 1
    );
3950
  }
3951
3952
  /**
3953
   * Multi decode html entity & fix urlencoded-win1252-chars.
3954
   *
3955
   * e.g:
3956
   * 'test+test'                     => 'test+test'
3957
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3958
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3959
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3960
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3961
   * 'Düsseldorf'                   => 'Düsseldorf'
3962
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3963
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3964
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3965
   *
3966
   * @param string $str          <p>The input string.</p>
3967
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3968
   *
3969
   * @return string
3970
   */
3971 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3972
  {
3973 2
    $str = (string)$str;
3974
3975 2
    if (!isset($str[0])) {
3976 1
      return '';
3977
    }
3978
3979 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3980 2
    if (preg_match($pattern, $str)) {
3981 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3982 1
    }
3983
3984 2
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3985
3986
    do {
3987 2
      $str_compare = $str;
3988
3989 2
      $str = self::fix_simple_utf8(
3990 2
          rawurldecode(
3991 2
              self::html_entity_decode(
3992 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3993
                  $flags
3994 2
              )
3995 2
          )
3996 2
      );
3997
3998 2
    } while ($multi_decode === true && $str_compare !== $str);
3999
4000 2
    return (string)$str;
4001
  }
4002
4003
  /**
4004
   * alias for "UTF8::remove_bom()"
4005
   *
4006
   * @see UTF8::remove_bom()
4007
   *
4008
   * @param string $str
4009
   *
4010
   * @return string
4011
   *
4012
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4013
   */
4014
  public static function removeBOM($str)
4015
  {
4016
    return self::remove_bom($str);
4017
  }
4018
4019
  /**
4020
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4021
   *
4022
   * @param string $str <p>The input string.</p>
4023
   *
4024
   * @return string <p>String without UTF-BOM</p>
4025
   */
4026 40
  public static function remove_bom($str)
4027
  {
4028 40
    $str = (string)$str;
4029
4030 40
    if (!isset($str[0])) {
4031 5
      return '';
4032
    }
4033
4034 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
4035 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4036 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4037 5
        if ($strTmp === false) {
4038
          $strTmp = '';
4039
        }
4040 5
        $str = (string)$strTmp;
4041 5
      }
4042 40
    }
4043
4044 40
    return $str;
4045
  }
4046
4047
  /**
4048
   * Removes duplicate occurrences of a string in another string.
4049
   *
4050
   * @param string          $str  <p>The base string.</p>
4051
   * @param string|string[] $what <p>String to search for in the base string.</p>
4052
   *
4053
   * @return string <p>The result string with removed duplicates.</p>
4054
   */
4055 1
  public static function remove_duplicates($str, $what = ' ')
4056
  {
4057 1
    if (is_string($what) === true) {
4058 1
      $what = array($what);
4059 1
    }
4060
4061 1
    if (is_array($what) === true) {
4062
      /** @noinspection ForeachSourceInspection */
4063 1
      foreach ($what as $item) {
4064 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4065 1
      }
4066 1
    }
4067
4068 1
    return $str;
4069
  }
4070
4071
  /**
4072
   * Remove invisible characters from a string.
4073
   *
4074
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4075
   *
4076
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4077
   *
4078
   * @param string $str
4079
   * @param bool   $url_encoded
4080
   * @param string $replacement
4081
   *
4082
   * @return string
4083
   */
4084 57
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4085
  {
4086
    // init
4087 57
    $non_displayables = array();
4088
4089
    // every control character except newline (dec 10),
4090
    // carriage return (dec 13) and horizontal tab (dec 09)
4091 57
    if ($url_encoded) {
4092 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4093 57
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4094 57
    }
4095
4096 57
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4097
4098
    do {
4099 57
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4100 57
    } while ($count !== 0);
4101
4102 57
    return $str;
4103
  }
4104
4105
  /**
4106
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4107
   *
4108
   * @param string $str                <p>The input string</p>
4109
   * @param string $replacementChar    <p>The replacement character.</p>
4110
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4111
   *
4112
   * @return string
4113
   */
4114 57
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4115
  {
4116 57
    $str = (string)$str;
4117
4118 57
    if (!isset($str[0])) {
4119 4
      return '';
4120
    }
4121
4122 57
    if ($processInvalidUtf8 === true) {
4123 57
      $replacementCharHelper = $replacementChar;
4124 57
      if ($replacementChar === '') {
4125 57
        $replacementCharHelper = 'none';
4126 57
      }
4127
4128 57
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4129
        self::checkForSupport();
4130
      }
4131
4132 57
      $save = \mb_substitute_character();
4133 57
      \mb_substitute_character($replacementCharHelper);
4134 57
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4135 57
      \mb_substitute_character($save);
4136 57
    }
4137
4138 57
    return str_replace(
4139
        array(
4140 57
            "\xEF\xBF\xBD",
4141 57
            '�',
4142 57
        ),
4143
        array(
4144 57
            $replacementChar,
4145 57
            $replacementChar,
4146 57
        ),
4147
        $str
4148 57
    );
4149
  }
4150
4151
  /**
4152
   * Strip whitespace or other characters from end of a UTF-8 string.
4153
   *
4154
   * @param string $str   <p>The string to be trimmed.</p>
4155
   * @param string $chars <p>Optional characters to be stripped.</p>
4156
   *
4157
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4158
   */
4159 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4160
  {
4161 23
    $str = (string)$str;
4162
4163 23
    if (!isset($str[0])) {
4164 5
      return '';
4165
    }
4166
4167
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4168 19
    if ($chars === INF || !$chars) {
4169 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4170
    }
4171
4172 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4173
  }
4174
4175
  /**
4176
   * rxClass
4177
   *
4178
   * @param string $s
4179
   * @param string $class
4180
   *
4181
   * @return string
4182
   */
4183 60
  private static function rxClass($s, $class = '')
4184
  {
4185 60
    static $RX_CLASSS_CACHE = array();
4186
4187 60
    $cacheKey = $s . $class;
4188
4189 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4190 48
      return $RX_CLASSS_CACHE[$cacheKey];
4191
    }
4192
4193
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4194 19
    $class = array($class);
4195
4196
    /** @noinspection SuspiciousLoopInspection */
4197 19
    foreach (self::str_split($s) as $s) {
4198 18
      if ('-' === $s) {
4199
        $class[0] = '-' . $class[0];
4200 18
      } elseif (!isset($s[2])) {
4201 18
        $class[0] .= preg_quote($s, '/');
4202 18
      } elseif (1 === self::strlen($s)) {
4203 2
        $class[0] .= $s;
4204 2
      } else {
4205
        $class[] = $s;
4206
      }
4207 19
    }
4208
4209 19
    if ($class[0]) {
4210 19
      $class[0] = '[' . $class[0] . ']';
4211 19
    }
4212
4213 19
    if (1 === count($class)) {
4214 19
      $return = $class[0];
4215 19
    } else {
4216
      $return = '(?:' . implode('|', $class) . ')';
4217
    }
4218
4219 19
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4220
4221 19
    return $return;
4222
  }
4223
4224
  /**
4225
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4226
   */
4227
  public static function showSupport()
4228
  {
4229
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4230
      self::checkForSupport();
4231
    }
4232
4233
    foreach (self::$SUPPORT as $utf8Support) {
4234
      echo $utf8Support . "\n<br>";
4235
    }
4236
  }
4237
4238
  /**
4239
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4240
   *
4241
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4242
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4243
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4244
   *
4245
   * @return string <p>The HTML numbered entity.</p>
4246
   */
4247 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4248
  {
4249 1
    $char = (string)$char;
4250
4251 1
    if (!isset($char[0])) {
4252 1
      return '';
4253
    }
4254
4255
    if (
4256
        $keepAsciiChars === true
4257 1
        &&
4258 1
        self::is_ascii($char) === true
4259 1
    ) {
4260 1
      return $char;
4261
    }
4262
4263 1
    if ($encoding !== 'UTF-8') {
4264
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4265
    }
4266
4267 1
    return '&#' . self::ord($char, $encoding) . ';';
4268
  }
4269
4270
  /**
4271
   * Convert a string to an array of Unicode characters.
4272
   *
4273
   * @param string  $str       <p>The string to split into array.</p>
4274
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4275
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4276
   *
4277
   * @return string[] <p>An array containing chunks of the string.</p>
4278
   */
4279 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4280
  {
4281 39
    $str = (string)$str;
4282
4283 39
    if (!isset($str[0])) {
4284 3
      return array();
4285
    }
4286
4287
    // init
4288 38
    $ret = array();
4289
4290 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4291
      self::checkForSupport();
4292
    }
4293
4294 38
    if ($cleanUtf8 === true) {
4295 7
      $str = self::clean($str);
4296 7
    }
4297
4298 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4299
4300 38
      preg_match_all('/./us', $str, $retArray);
4301 38
      if (isset($retArray[0])) {
4302 38
        $ret = $retArray[0];
4303 38
      }
4304 38
      unset($retArray);
4305
4306 38
    } else {
4307
4308
      // fallback
4309
4310
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4311
        self::checkForSupport();
4312
      }
4313
4314 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4315
        $len = \mb_strlen($str, '8BIT');
4316
      } else {
4317
        $len = strlen($str);
4318
      }
4319
4320
      /** @noinspection ForeachInvariantsInspection */
4321
      for ($i = 0; $i < $len; $i++) {
4322
4323
        if (($str[$i] & "\x80") === "\x00") {
4324
4325
          $ret[] = $str[$i];
4326
4327
        } elseif (
4328
            isset($str[$i + 1])
4329
            &&
4330
            ($str[$i] & "\xE0") === "\xC0"
4331
        ) {
4332
4333
          if (($str[$i + 1] & "\xC0") === "\x80") {
4334
            $ret[] = $str[$i] . $str[$i + 1];
4335
4336
            $i++;
4337
          }
4338
4339 View Code Duplication
        } elseif (
4340
            isset($str[$i + 2])
4341
            &&
4342
            ($str[$i] & "\xF0") === "\xE0"
4343
        ) {
4344
4345
          if (
4346
              ($str[$i + 1] & "\xC0") === "\x80"
4347
              &&
4348
              ($str[$i + 2] & "\xC0") === "\x80"
4349
          ) {
4350
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4351
4352
            $i += 2;
4353
          }
4354
4355
        } elseif (
4356
            isset($str[$i + 3])
4357
            &&
4358
            ($str[$i] & "\xF8") === "\xF0"
4359
        ) {
4360
4361 View Code Duplication
          if (
4362
              ($str[$i + 1] & "\xC0") === "\x80"
4363
              &&
4364
              ($str[$i + 2] & "\xC0") === "\x80"
4365
              &&
4366
              ($str[$i + 3] & "\xC0") === "\x80"
4367
          ) {
4368
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4369
4370
            $i += 3;
4371
          }
4372
4373
        }
4374
      }
4375
    }
4376
4377 38
    if ($length > 1) {
4378 5
      $ret = array_chunk($ret, $length);
4379
4380 5
      return array_map(
4381
          function ($item) {
4382 5
            return implode('', $item);
4383 5
          }, $ret
4384 5
      );
4385
    }
4386
4387 34
    if (isset($ret[0]) && $ret[0] === '') {
4388
      return array();
4389
    }
4390
4391 34
    return $ret;
4392
  }
4393
4394
  /**
4395
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4396
   *
4397
   * @param string $str <p>The input string.</p>
4398
   *
4399
   * @return false|string <p>
4400
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4401
   *                      otherwise it will return false.
4402
   *                      </p>
4403
   */
4404 12
  public static function str_detect_encoding($str)
4405
  {
4406
    //
4407
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4408
    //
4409
4410 12
    if (self::is_binary($str) === true) {
4411
4412 3
      if (self::is_utf16($str) === 1) {
4413 1
        return 'UTF-16LE';
4414
      }
4415
4416 3
      if (self::is_utf16($str) === 2) {
4417 1
        return 'UTF-16BE';
4418
      }
4419
4420 2
      if (self::is_utf32($str) === 1) {
4421
        return 'UTF-32LE';
4422
      }
4423
4424 2
      if (self::is_utf32($str) === 2) {
4425
        return 'UTF-32BE';
4426
      }
4427
4428 2
    }
4429
4430
    //
4431
    // 2.) simple check for ASCII chars
4432
    //
4433
4434 12
    if (self::is_ascii($str) === true) {
4435 3
      return 'ASCII';
4436
    }
4437
4438
    //
4439
    // 3.) simple check for UTF-8 chars
4440
    //
4441
4442 12
    if (self::is_utf8($str) === true) {
4443 9
      return 'UTF-8';
4444
    }
4445
4446
    //
4447
    // 4.) check via "\mb_detect_encoding()"
4448
    //
4449
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4450
4451
    $detectOrder = array(
4452 7
        'ISO-8859-1',
4453 7
        'ISO-8859-2',
4454 7
        'ISO-8859-3',
4455 7
        'ISO-8859-4',
4456 7
        'ISO-8859-5',
4457 7
        'ISO-8859-6',
4458 7
        'ISO-8859-7',
4459 7
        'ISO-8859-8',
4460 7
        'ISO-8859-9',
4461 7
        'ISO-8859-10',
4462 7
        'ISO-8859-13',
4463 7
        'ISO-8859-14',
4464 7
        'ISO-8859-15',
4465 7
        'ISO-8859-16',
4466 7
        'WINDOWS-1251',
4467 7
        'WINDOWS-1252',
4468 7
        'WINDOWS-1254',
4469 7
        'ISO-2022-JP',
4470 7
        'JIS',
4471 7
        'EUC-JP',
4472 7
    );
4473
4474 7
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4475 7
    if ($encoding) {
4476 7
      return $encoding;
4477
    }
4478
4479
    //
4480
    // 5.) check via "iconv()"
4481
    //
4482
4483
    $md5 = md5($str);
4484
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4485
      # INFO: //IGNORE and //TRANSLIT still throw notice
4486
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4487
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4488
        return $encodingTmp;
4489
      }
4490
    }
4491
4492
    return false;
4493
  }
4494
4495
  /**
4496
   * Check if the string ends with the given substring.
4497
   *
4498
   * @param string $haystack <p>The string to search in.</p>
4499
   * @param string $needle   <p>The substring to search for.</p>
4500
   *
4501
   * @return bool
4502
   */
4503 2
  public static function str_ends_with($haystack, $needle)
4504
  {
4505 2
    $haystack = (string)$haystack;
4506 2
    $needle = (string)$needle;
4507
4508 2
    if (!isset($haystack[0], $needle[0])) {
4509 1
      return false;
4510
    }
4511
4512 2
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4513 2
    if ($haystackSub === false) {
4514
      return false;
4515
    }
4516
4517 2
    if ($needle === $haystackSub) {
4518 2
      return true;
4519
    }
4520
4521 2
    return false;
4522
  }
4523
4524
  /**
4525
   * Check if the string ends with the given substring, case insensitive.
4526
   *
4527
   * @param string $haystack <p>The string to search in.</p>
4528
   * @param string $needle   <p>The substring to search for.</p>
4529
   *
4530
   * @return bool
4531
   */
4532 2 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4533
  {
4534 2
    $haystack = (string)$haystack;
4535 2
    $needle = (string)$needle;
4536
4537 2
    if (!isset($haystack[0], $needle[0])) {
4538 1
      return false;
4539
    }
4540
4541 2
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4542 2
      return true;
4543
    }
4544
4545 2
    return false;
4546
  }
4547
4548
  /**
4549
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4550
   *
4551
   * @link  http://php.net/manual/en/function.str-ireplace.php
4552
   *
4553
   * @param mixed $search  <p>
4554
   *                       Every replacement with search array is
4555
   *                       performed on the result of previous replacement.
4556
   *                       </p>
4557
   * @param mixed $replace <p>
4558
   *                       </p>
4559
   * @param mixed $subject <p>
4560
   *                       If subject is an array, then the search and
4561
   *                       replace is performed with every entry of
4562
   *                       subject, and the return value is an array as
4563
   *                       well.
4564
   *                       </p>
4565
   * @param int   $count   [optional] <p>
4566
   *                       The number of matched and replaced needles will
4567
   *                       be returned in count which is passed by
4568
   *                       reference.
4569
   *                       </p>
4570
   *
4571
   * @return mixed <p>A string or an array of replacements.</p>
4572
   */
4573 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4574
  {
4575 26
    $search = (array)$search;
4576
4577
    /** @noinspection AlterInForeachInspection */
4578 26
    foreach ($search as &$s) {
4579 26
      if ('' === $s .= '') {
4580 2
        $s = '/^(?<=.)$/';
4581 2
      } else {
4582 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4583
      }
4584 26
    }
4585
4586 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4587 26
    $count = $replace; // used as reference parameter
4588
4589 26
    return $subject;
4590
  }
4591
4592
  /**
4593
   * Check if the string starts with the given substring, case insensitive.
4594
   *
4595
   * @param string $haystack <p>The string to search in.</p>
4596
   * @param string $needle   <p>The substring to search for.</p>
4597
   *
4598
   * @return bool
4599
   */
4600 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4601
  {
4602 2
    $haystack = (string)$haystack;
4603 2
    $needle = (string)$needle;
4604
4605 2
    if (!isset($haystack[0], $needle[0])) {
4606 1
      return false;
4607
    }
4608
4609 2
    if (self::stripos($haystack, $needle) === 0) {
4610 2
      return true;
4611
    }
4612
4613 2
    return false;
4614
  }
4615
4616
  /**
4617
   * Limit the number of characters in a string, but also after the next word.
4618
   *
4619
   * @param string $str
4620
   * @param int    $length
4621
   * @param string $strAddOn
4622
   *
4623
   * @return string
4624
   */
4625 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4626
  {
4627 1
    $str = (string)$str;
4628
4629 1
    if (!isset($str[0])) {
4630 1
      return '';
4631
    }
4632
4633 1
    $length = (int)$length;
4634
4635 1
    if (self::strlen($str) <= $length) {
4636 1
      return $str;
4637
    }
4638
4639 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4640 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4641
    }
4642
4643 1
    $str = (string)self::substr($str, 0, $length);
4644 1
    $array = explode(' ', $str);
4645 1
    array_pop($array);
4646 1
    $new_str = implode(' ', $array);
4647
4648 1
    if ($new_str === '') {
4649 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4650 1
    } else {
4651 1
      $str = $new_str . $strAddOn;
4652
    }
4653
4654 1
    return $str;
4655
  }
4656
4657
  /**
4658
   * Pad a UTF-8 string to given length with another string.
4659
   *
4660
   * @param string $str        <p>The input string.</p>
4661
   * @param int    $pad_length <p>The length of return string.</p>
4662
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4663
   * @param int    $pad_type   [optional] <p>
4664
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4665
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4666
   *                           </p>
4667
   *
4668
   * @return string <strong>Returns the padded string</strong>
4669
   */
4670 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4671
  {
4672 2
    $str_length = self::strlen($str);
4673
4674
    if (
4675 2
        is_int($pad_length) === true
4676 2
        &&
4677
        $pad_length > 0
4678 2
        &&
4679
        $pad_length >= $str_length
4680 2
    ) {
4681 2
      $ps_length = self::strlen($pad_string);
4682
4683 2
      $diff = $pad_length - $str_length;
4684
4685
      switch ($pad_type) {
4686 2 View Code Duplication
        case STR_PAD_LEFT:
4687 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4688 2
          $pre = (string)self::substr($pre, 0, $diff);
4689 2
          $post = '';
4690 2
          break;
4691
4692 2
        case STR_PAD_BOTH:
4693 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4694 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4695 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4696 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4697 2
          break;
4698
4699 2
        case STR_PAD_RIGHT:
4700 2 View Code Duplication
        default:
4701 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4702 2
          $post = (string)self::substr($post, 0, $diff);
4703 2
          $pre = '';
4704 2
      }
4705
4706 2
      return $pre . $str . $post;
4707
    }
4708
4709 2
    return $str;
4710
  }
4711
4712
  /**
4713
   * Repeat a string.
4714
   *
4715
   * @param string $str        <p>
4716
   *                           The string to be repeated.
4717
   *                           </p>
4718
   * @param int    $multiplier <p>
4719
   *                           Number of time the input string should be
4720
   *                           repeated.
4721
   *                           </p>
4722
   *                           <p>
4723
   *                           multiplier has to be greater than or equal to 0.
4724
   *                           If the multiplier is set to 0, the function
4725
   *                           will return an empty string.
4726
   *                           </p>
4727
   *
4728
   * @return string <p>The repeated string.</p>
4729
   */
4730 1
  public static function str_repeat($str, $multiplier)
4731
  {
4732 1
    $str = self::filter($str);
4733
4734 1
    return str_repeat($str, $multiplier);
4735
  }
4736
4737
  /**
4738
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4739
   *
4740
   * Replace all occurrences of the search string with the replacement string
4741
   *
4742
   * @link http://php.net/manual/en/function.str-replace.php
4743
   *
4744
   * @param mixed $search  <p>
4745
   *                       The value being searched for, otherwise known as the needle.
4746
   *                       An array may be used to designate multiple needles.
4747
   *                       </p>
4748
   * @param mixed $replace <p>
4749
   *                       The replacement value that replaces found search
4750
   *                       values. An array may be used to designate multiple replacements.
4751
   *                       </p>
4752
   * @param mixed $subject <p>
4753
   *                       The string or array being searched and replaced on,
4754
   *                       otherwise known as the haystack.
4755
   *                       </p>
4756
   *                       <p>
4757
   *                       If subject is an array, then the search and
4758
   *                       replace is performed with every entry of
4759
   *                       subject, and the return value is an array as
4760
   *                       well.
4761
   *                       </p>
4762
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4763
   *
4764
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4765
   */
4766 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4767
  {
4768 12
    return str_replace($search, $replace, $subject, $count);
4769
  }
4770
4771
  /**
4772
   * Replace the first "$search"-term with the "$replace"-term.
4773
   *
4774
   * @param string $search
4775
   * @param string $replace
4776
   * @param string $subject
4777
   *
4778
   * @return string
4779
   */
4780 1
  public static function str_replace_first($search, $replace, $subject)
4781
  {
4782 1
    $pos = self::strpos($subject, $search);
4783
4784 1
    if ($pos !== false) {
4785 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4786
    }
4787
4788 1
    return $subject;
4789
  }
4790
4791
  /**
4792
   * Shuffles all the characters in the string.
4793
   *
4794
   * @param string $str <p>The input string</p>
4795
   *
4796
   * @return string <p>The shuffled string.</p>
4797
   */
4798 1
  public static function str_shuffle($str)
4799
  {
4800 1
    $array = self::split($str);
4801
4802 1
    shuffle($array);
4803
4804 1
    return implode('', $array);
4805
  }
4806
4807
  /**
4808
   * Sort all characters according to code points.
4809
   *
4810
   * @param string $str    <p>A UTF-8 string.</p>
4811
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4812
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4813
   *
4814
   * @return string <p>String of sorted characters.</p>
4815
   */
4816 1
  public static function str_sort($str, $unique = false, $desc = false)
4817
  {
4818 1
    $array = self::codepoints($str);
4819
4820 1
    if ($unique) {
4821 1
      $array = array_flip(array_flip($array));
4822 1
    }
4823
4824 1
    if ($desc) {
4825 1
      arsort($array);
4826 1
    } else {
4827 1
      asort($array);
4828
    }
4829
4830 1
    return self::string($array);
4831
  }
4832
4833
  /**
4834
   * Split a string into an array.
4835
   *
4836
   * @param string $str
4837
   * @param int    $len
4838
   *
4839
   * @return array
4840
   */
4841 22
  public static function str_split($str, $len = 1)
4842
  {
4843 22
    $str = (string)$str;
4844
4845 22
    if (!isset($str[0])) {
4846 1
      return array();
4847
    }
4848
4849 21
    $len = (int)$len;
4850
4851 21
    if ($len < 1) {
4852
      return str_split($str, $len);
4853
    }
4854
4855
    /** @noinspection PhpInternalEntityUsedInspection */
4856 21
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4857 21
    $a = $a[0];
4858
4859 21
    if ($len === 1) {
4860 21
      return $a;
4861
    }
4862
4863 1
    $arrayOutput = array();
4864 1
    $p = -1;
4865
4866
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4867 1
    foreach ($a as $l => $a) {
4868 1
      if ($l % $len) {
4869 1
        $arrayOutput[$p] .= $a;
4870 1
      } else {
4871 1
        $arrayOutput[++$p] = $a;
4872
      }
4873 1
    }
4874
4875 1
    return $arrayOutput;
4876
  }
4877
4878
  /**
4879
   * Check if the string starts with the given substring.
4880
   *
4881
   * @param string $haystack <p>The string to search in.</p>
4882
   * @param string $needle   <p>The substring to search for.</p>
4883
   *
4884
   * @return bool
4885
   */
4886 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4887
  {
4888 2
    $haystack = (string)$haystack;
4889 2
    $needle = (string)$needle;
4890
4891 2
    if (!isset($haystack[0], $needle[0])) {
4892 1
      return false;
4893
    }
4894
4895 2
    if (self::strpos($haystack, $needle) === 0) {
4896 2
      return true;
4897
    }
4898
4899 2
    return false;
4900
  }
4901
4902
  /**
4903
   * Get a binary representation of a specific string.
4904
   *
4905
   * @param string $str <p>The input string.</p>
4906
   *
4907
   * @return string
4908
   */
4909 1
  public static function str_to_binary($str)
4910
  {
4911 1
    $str = (string)$str;
4912
4913 1
    $value = unpack('H*', $str);
4914
4915 1
    return base_convert($value[1], 16, 2);
4916
  }
4917
4918
  /**
4919
   * Convert a string into an array of words.
4920
   *
4921
   * @param string   $str
4922
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4923
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4924
   * @param null|int $removeShortValues
4925
   *
4926
   * @return array
4927
   */
4928 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4929
  {
4930 10
    $str = (string)$str;
4931
4932 10
    if ($removeShortValues !== null) {
4933 1
      $removeShortValues = (int)$removeShortValues;
4934 1
    }
4935
4936 10
    if (!isset($str[0])) {
4937 2
      if ($removeEmptyValues === true) {
4938
        return array();
4939
      }
4940
4941 2
      return array('');
4942
    }
4943
4944 10
    $charList = self::rxClass($charList, '\pL');
4945
4946 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4947
4948
    if (
4949
        $removeShortValues === null
4950 10
        &&
4951
        $removeEmptyValues === false
4952 10
    ) {
4953 10
      return $return;
4954
    }
4955
4956 1
    $tmpReturn = array();
4957 1
    foreach ($return as $returnValue) {
4958
      if (
4959
          $removeShortValues !== null
4960 1
          &&
4961 1
          self::strlen($returnValue) <= $removeShortValues
4962 1
      ) {
4963 1
        continue;
4964
      }
4965
4966
      if (
4967
          $removeEmptyValues === true
4968 1
          &&
4969 1
          trim($returnValue) === ''
4970 1
      ) {
4971 1
        continue;
4972
      }
4973
4974 1
      $tmpReturn[] = $returnValue;
4975 1
    }
4976
4977 1
    return $tmpReturn;
4978
  }
4979
4980
  /**
4981
   * alias for "UTF8::to_ascii()"
4982
   *
4983
   * @see UTF8::to_ascii()
4984
   *
4985
   * @param string $str
4986
   * @param string $unknown
4987
   * @param bool   $strict
4988
   *
4989
   * @return string
4990
   */
4991 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4992
  {
4993 7
    return self::to_ascii($str, $unknown, $strict);
4994
  }
4995
4996
  /**
4997
   * Counts number of words in the UTF-8 string.
4998
   *
4999
   * @param string $str      <p>The input string.</p>
5000
   * @param int    $format   [optional] <p>
5001
   *                         <strong>0</strong> => return a number of words (default)<br>
5002
   *                         <strong>1</strong> => return an array of words<br>
5003
   *                         <strong>2</strong> => return an array of words with word-offset as key
5004
   *                         </p>
5005
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5006
   *
5007
   * @return array|int <p>The number of words in the string</p>
5008
   */
5009 1
  public static function str_word_count($str, $format = 0, $charlist = '')
5010
  {
5011 1
    $strParts = self::str_to_words($str, $charlist);
5012
5013 1
    $len = count($strParts);
5014
5015 1
    if ($format === 1) {
5016
5017 1
      $numberOfWords = array();
5018 1
      for ($i = 1; $i < $len; $i += 2) {
5019 1
        $numberOfWords[] = $strParts[$i];
5020 1
      }
5021
5022 1
    } elseif ($format === 2) {
5023
5024 1
      $numberOfWords = array();
5025 1
      $offset = self::strlen($strParts[0]);
5026 1
      for ($i = 1; $i < $len; $i += 2) {
5027 1
        $numberOfWords[$offset] = $strParts[$i];
5028 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5029 1
      }
5030
5031 1
    } else {
5032
5033 1
      $numberOfWords = ($len - 1) / 2;
5034
5035
    }
5036
5037 1
    return $numberOfWords;
5038
  }
5039
5040
  /**
5041
   * Case-insensitive string comparison.
5042
   *
5043
   * INFO: Case-insensitive version of UTF8::strcmp()
5044
   *
5045
   * @param string $str1
5046
   * @param string $str2
5047
   *
5048
   * @return int <p>
5049
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5050
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5051
   *             <strong>0</strong> if they are equal.
5052
   *             </p>
5053
   */
5054 11
  public static function strcasecmp($str1, $str2)
5055
  {
5056 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5057
  }
5058
5059
  /**
5060
   * alias for "UTF8::strstr()"
5061
   *
5062
   * @see UTF8::strstr()
5063
   *
5064
   * @param string  $haystack
5065
   * @param string  $needle
5066
   * @param bool    $before_needle
5067
   * @param string  $encoding
5068
   * @param boolean $cleanUtf8
5069
   *
5070
   * @return string|false
5071
   */
5072 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5073
  {
5074 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5075
  }
5076
5077
  /**
5078
   * Case-sensitive string comparison.
5079
   *
5080
   * @param string $str1
5081
   * @param string $str2
5082
   *
5083
   * @return int  <p>
5084
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5085
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5086
   *              <strong>0</strong> if they are equal.
5087
   *              </p>
5088
   */
5089 14
  public static function strcmp($str1, $str2)
5090
  {
5091
    /** @noinspection PhpUndefinedClassInspection */
5092 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5093 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5094 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
5095 14
    );
5096
  }
5097
5098
  /**
5099
   * Find length of initial segment not matching mask.
5100
   *
5101
   * @param string $str
5102
   * @param string $charList
5103
   * @param int    $offset
5104
   * @param int    $length
5105
   *
5106
   * @return int|null
5107
   */
5108 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5109
  {
5110 15
    if ('' === $charList .= '') {
5111 1
      return null;
5112
    }
5113
5114 14 View Code Duplication
    if ($offset || $length !== null) {
5115 2
      $strTmp = self::substr($str, $offset, $length);
5116 2
      if ($strTmp === false) {
5117
        return null;
5118
      }
5119 2
      $str = (string)$strTmp;
5120 2
    }
5121
5122 14
    $str = (string)$str;
5123 14
    if (!isset($str[0])) {
5124 1
      return null;
5125
    }
5126
5127 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5128
      /** @noinspection OffsetOperationsInspection */
5129 13
      return self::strlen($length[1]);
5130
    }
5131
5132 1
    return self::strlen($str);
5133
  }
5134
5135
  /**
5136
   * alias for "UTF8::stristr()"
5137
   *
5138
   * @see UTF8::stristr()
5139
   *
5140
   * @param string  $haystack
5141
   * @param string  $needle
5142
   * @param bool    $before_needle
5143
   * @param string  $encoding
5144
   * @param boolean $cleanUtf8
5145
   *
5146
   * @return string|false
5147
   */
5148 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5149
  {
5150 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5151
  }
5152
5153
  /**
5154
   * Create a UTF-8 string from code points.
5155
   *
5156
   * INFO: opposite to UTF8::codepoints()
5157
   *
5158
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5159
   *
5160
   * @return string <p>UTF-8 encoded string.</p>
5161
   */
5162 2
  public static function string(array $array)
5163
  {
5164 2
    return implode(
5165 2
        '',
5166 2
        array_map(
5167
            array(
5168 2
                '\\voku\\helper\\UTF8',
5169 2
                'chr',
5170 2
            ),
5171
            $array
5172 2
        )
5173 2
    );
5174
  }
5175
5176
  /**
5177
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5178
   *
5179
   * @param string $str <p>The input string.</p>
5180
   *
5181
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5182
   */
5183 3
  public static function string_has_bom($str)
5184
  {
5185 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5186 3
      if (0 === strpos($str, $bomString)) {
5187 3
        return true;
5188
      }
5189 3
    }
5190
5191 3
    return false;
5192
  }
5193
5194
  /**
5195
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5196
   *
5197
   * @link http://php.net/manual/en/function.strip-tags.php
5198
   *
5199
   * @param string  $str            <p>
5200
   *                                The input string.
5201
   *                                </p>
5202
   * @param string  $allowable_tags [optional] <p>
5203
   *                                You can use the optional second parameter to specify tags which should
5204
   *                                not be stripped.
5205
   *                                </p>
5206
   *                                <p>
5207
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5208
   *                                can not be changed with allowable_tags.
5209
   *                                </p>
5210
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5211
   *
5212
   * @return string <p>The stripped string.</p>
5213
   */
5214 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5215
  {
5216 2
    $str = (string)$str;
5217
5218 2
    if (!isset($str[0])) {
5219 1
      return '';
5220
    }
5221
5222 2
    if ($cleanUtf8 === true) {
5223 1
      $str = self::clean($str);
5224 1
    }
5225
5226 2
    return strip_tags($str, $allowable_tags);
5227
  }
5228
5229
  /**
5230
   * Finds position of first occurrence of a string within another, case insensitive.
5231
   *
5232
   * @link http://php.net/manual/en/function.mb-stripos.php
5233
   *
5234
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5235
   * @param string  $needle    <p>The string to find in haystack.</p>
5236
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5237
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5238
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5239
   *
5240
   * @return int|false <p>
5241
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5242
   *                   or false if needle is not found.
5243
   *                   </p>
5244
   */
5245 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5246
  {
5247 10
    $haystack = (string)$haystack;
5248 10
    $needle = (string)$needle;
5249 10
    $offset = (int)$offset;
5250
5251 10
    if (!isset($haystack[0], $needle[0])) {
5252 3
      return false;
5253
    }
5254
5255 9
    if ($cleanUtf8 === true) {
5256
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5257
      // if invalid characters are found in $haystack before $needle
5258 1
      $haystack = self::clean($haystack);
5259 1
      $needle = self::clean($needle);
5260 1
    }
5261
5262 View Code Duplication
    if (
5263
        $encoding === 'UTF-8'
5264 9
        ||
5265 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5266 9
    ) {
5267 9
      $encoding = 'UTF-8';
5268 9
    } else {
5269 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5270
    }
5271
5272 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5273
      self::checkForSupport();
5274
    }
5275
5276
    if (
5277
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5278 9
        &&
5279 9
        self::$SUPPORT['intl'] === true
5280 9
        &&
5281 9
        Bootup::is_php('5.4') === true
5282 9
    ) {
5283
      return \grapheme_stripos($haystack, $needle, $offset);
5284
    }
5285
5286
    // fallback to "mb_"-function via polyfill
5287 9
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5288
  }
5289
5290
  /**
5291
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5292
   *
5293
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5294
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5295
   * @param bool    $before_needle [optional] <p>
5296
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5297
   *                               haystack before the first occurrence of the needle (excluding the needle).
5298
   *                               </p>
5299
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5300
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5301
   *
5302
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5303
   */
5304 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5305
  {
5306 17
    $haystack = (string)$haystack;
5307 17
    $needle = (string)$needle;
5308 17
    $before_needle = (bool)$before_needle;
5309
5310 17
    if (!isset($haystack[0], $needle[0])) {
5311 6
      return false;
5312
    }
5313
5314 11
    if ($encoding !== 'UTF-8') {
5315 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5316 1
    }
5317
5318 11
    if ($cleanUtf8 === true) {
5319
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5320
      // if invalid characters are found in $haystack before $needle
5321 1
      $needle = self::clean($needle);
5322 1
      $haystack = self::clean($haystack);
5323 1
    }
5324
5325 11
    if (!$needle) {
5326
      return $haystack;
5327
    }
5328
5329 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5330
      self::checkForSupport();
5331
    }
5332
5333 View Code Duplication
    if (
5334
        $encoding !== 'UTF-8'
5335 11
        &&
5336 1
        self::$SUPPORT['mbstring'] === false
5337 11
    ) {
5338
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5339
    }
5340
5341 11
    if (self::$SUPPORT['mbstring'] === true) {
5342 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5343
    }
5344
5345
    if (
5346
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5347
        &&
5348
        self::$SUPPORT['intl'] === true
5349
        &&
5350
        Bootup::is_php('5.4') === true
5351
    ) {
5352
      return \grapheme_stristr($haystack, $needle, $before_needle);
5353
    }
5354
5355
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
5356
      return stristr($haystack, $needle, $before_needle);
5357
    }
5358
5359
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5360
5361
    if (!isset($match[1])) {
5362
      return false;
5363
    }
5364
5365
    if ($before_needle) {
5366
      return $match[1];
5367
    }
5368
5369
    return self::substr($haystack, self::strlen($match[1]));
5370
  }
5371
5372
  /**
5373
   * Get the string length, not the byte-length!
5374
   *
5375
   * @link     http://php.net/manual/en/function.mb-strlen.php
5376
   *
5377
   * @param string  $str       <p>The string being checked for length.</p>
5378
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5379
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5380
   *
5381
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5382
   *             character counted as +1)</p>
5383
   */
5384 88
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5385
  {
5386 88
    $str = (string)$str;
5387
5388 88
    if (!isset($str[0])) {
5389 5
      return 0;
5390
    }
5391
5392 View Code Duplication
    if (
5393
        $encoding === 'UTF-8'
5394 87
        ||
5395 13
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5396 87
    ) {
5397 78
      $encoding = 'UTF-8';
5398 78
    } else {
5399 12
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5400
    }
5401
5402 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5403
      self::checkForSupport();
5404
    }
5405
5406
    switch ($encoding) {
5407 87
      case 'ASCII':
5408 87
      case 'CP850':
5409 87
      case '8BIT':
5410
        if (
5411
            $encoding === 'CP850'
5412 10
            &&
5413 10
            self::$SUPPORT['mbstring_func_overload'] === false
5414 10
        ) {
5415 10
          return strlen($str);
5416
        }
5417
5418
        return \mb_strlen($str, '8BIT');
5419
    }
5420
5421 79
    if ($cleanUtf8 === true) {
5422
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5423
      // if invalid characters are found in $str
5424 2
      $str = self::clean($str);
5425 2
    }
5426
5427 View Code Duplication
    if (
5428
        $encoding !== 'UTF-8'
5429 79
        &&
5430 2
        self::$SUPPORT['mbstring'] === false
5431 79
        &&
5432
        self::$SUPPORT['iconv'] === false
5433 79
    ) {
5434
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5435
    }
5436
5437
    if (
5438
        $encoding !== 'UTF-8'
5439 79
        &&
5440 2
        self::$SUPPORT['iconv'] === true
5441 79
        &&
5442 2
        self::$SUPPORT['mbstring'] === false
5443 79
    ) {
5444
      return \iconv_strlen($str, $encoding);
5445
    }
5446
5447 79
    if (self::$SUPPORT['mbstring'] === true) {
5448 79
      return \mb_strlen($str, $encoding);
5449
    }
5450
5451
    if (self::$SUPPORT['iconv'] === true) {
5452
      return \iconv_strlen($str, $encoding);
5453
    }
5454
5455
    if (
5456
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5457
        &&
5458
        self::$SUPPORT['intl'] === true
5459
        &&
5460
        Bootup::is_php('5.4') === true
5461
    ) {
5462
      return \grapheme_strlen($str);
5463
    }
5464
5465
    if (self::is_ascii($str)) {
5466
      return strlen($str);
5467
    }
5468
5469
    // fallback via vanilla php
5470
    preg_match_all('/./us', $str, $parts);
5471
    $returnTmp = count($parts[0]);
5472
    if ($returnTmp !== 0) {
5473
      return $returnTmp;
5474
    }
5475
5476
    // fallback to "mb_"-function via polyfill
5477
    return \mb_strlen($str, $encoding);
5478
  }
5479
5480
  /**
5481
   * Case insensitive string comparisons using a "natural order" algorithm.
5482
   *
5483
   * INFO: natural order version of UTF8::strcasecmp()
5484
   *
5485
   * @param string $str1 <p>The first string.</p>
5486
   * @param string $str2 <p>The second string.</p>
5487
   *
5488
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5489
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5490
   *             <strong>0</strong> if they are equal
5491
   */
5492 1
  public static function strnatcasecmp($str1, $str2)
5493
  {
5494 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5495
  }
5496
5497
  /**
5498
   * String comparisons using a "natural order" algorithm
5499
   *
5500
   * INFO: natural order version of UTF8::strcmp()
5501
   *
5502
   * @link  http://php.net/manual/en/function.strnatcmp.php
5503
   *
5504
   * @param string $str1 <p>The first string.</p>
5505
   * @param string $str2 <p>The second string.</p>
5506
   *
5507
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5508
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5509
   *             <strong>0</strong> if they are equal
5510
   */
5511 2
  public static function strnatcmp($str1, $str2)
5512
  {
5513 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5514
  }
5515
5516
  /**
5517
   * Case-insensitive string comparison of the first n characters.
5518
   *
5519
   * @link  http://php.net/manual/en/function.strncasecmp.php
5520
   *
5521
   * @param string $str1 <p>The first string.</p>
5522
   * @param string $str2 <p>The second string.</p>
5523
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5524
   *
5525
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5526
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5527
   *             <strong>0</strong> if they are equal
5528
   */
5529 1
  public static function strncasecmp($str1, $str2, $len)
5530
  {
5531 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5532
  }
5533
5534
  /**
5535
   * String comparison of the first n characters.
5536
   *
5537
   * @link  http://php.net/manual/en/function.strncmp.php
5538
   *
5539
   * @param string $str1 <p>The first string.</p>
5540
   * @param string $str2 <p>The second string.</p>
5541
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5542
   *
5543
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5544
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5545
   *             <strong>0</strong> if they are equal
5546
   */
5547 2
  public static function strncmp($str1, $str2, $len)
5548
  {
5549 2
    $str1 = (string)self::substr($str1, 0, $len);
5550 2
    $str2 = (string)self::substr($str2, 0, $len);
5551
5552 2
    return self::strcmp($str1, $str2);
5553
  }
5554
5555
  /**
5556
   * Search a string for any of a set of characters.
5557
   *
5558
   * @link  http://php.net/manual/en/function.strpbrk.php
5559
   *
5560
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5561
   * @param string $char_list <p>This parameter is case sensitive.</p>
5562
   *
5563
   * @return string String starting from the character found, or false if it is not found.
5564
   */
5565 1
  public static function strpbrk($haystack, $char_list)
5566
  {
5567 1
    $haystack = (string)$haystack;
5568 1
    $char_list = (string)$char_list;
5569
5570 1
    if (!isset($haystack[0], $char_list[0])) {
5571 1
      return false;
5572
    }
5573
5574 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5575 1
      return substr($haystack, strpos($haystack, $m[0]));
5576
    }
5577
5578 1
    return false;
5579
  }
5580
5581
  /**
5582
   * Find position of first occurrence of string in a string.
5583
   *
5584
   * @link http://php.net/manual/en/function.mb-strpos.php
5585
   *
5586
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5587
   * @param string  $needle    <p>The string to find in haystack.</p>
5588
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5589
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5590
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5591
   *
5592
   * @return int|false <p>
5593
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5594
   *                   If needle is not found it returns false.
5595
   *                   </p>
5596
   */
5597 58
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5598
  {
5599 58
    $haystack = (string)$haystack;
5600 58
    $needle = (string)$needle;
5601
5602 58
    if (!isset($haystack[0], $needle[0])) {
5603 3
      return false;
5604
    }
5605
5606
    // init
5607 57
    $offset = (int)$offset;
5608
5609
    // iconv and mbstring do not support integer $needle
5610
5611 57
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5612
      $needle = (string)self::chr($needle);
5613
    }
5614
5615 57
    if ($cleanUtf8 === true) {
5616
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5617
      // if invalid characters are found in $haystack before $needle
5618 2
      $needle = self::clean($needle);
5619 2
      $haystack = self::clean($haystack);
5620 2
    }
5621
5622 View Code Duplication
    if (
5623
        $encoding === 'UTF-8'
5624 57
        ||
5625 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5626 57
    ) {
5627 17
      $encoding = 'UTF-8';
5628 17
    } else {
5629 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5630
    }
5631
5632 57
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5633
      self::checkForSupport();
5634
    }
5635
5636
    if (
5637
        $encoding === 'CP850'
5638 57
        &&
5639 41
        self::$SUPPORT['mbstring_func_overload'] === false
5640 57
    ) {
5641 41
      return strpos($haystack, $needle, $offset);
5642
    }
5643
5644 View Code Duplication
    if (
5645
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5646 17
        &
5647 17
        self::$SUPPORT['iconv'] === true
5648 17
        &&
5649 1
        self::$SUPPORT['mbstring'] === false
5650 17
    ) {
5651
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5652
    }
5653
5654
    if (
5655
        $offset >= 0 // iconv_strpos() can't handle negative offset
5656 17
        &&
5657
        $encoding !== 'UTF-8'
5658 17
        &&
5659 1
        self::$SUPPORT['mbstring'] === false
5660 17
        &&
5661
        self::$SUPPORT['iconv'] === true
5662 17
    ) {
5663
      // ignore invalid negative offset to keep compatibility
5664
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5665
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5666
    }
5667
5668 17
    if (self::$SUPPORT['mbstring'] === true) {
5669 17
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5670
    }
5671
5672
    if (
5673
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5674
        &&
5675
        self::$SUPPORT['intl'] === true
5676
        &&
5677
        Bootup::is_php('5.4') === true
5678
    ) {
5679
      return \grapheme_strpos($haystack, $needle, $offset);
5680
    }
5681
5682
    if (
5683
        $offset >= 0 // iconv_strpos() can't handle negative offset
5684
        &&
5685
        self::$SUPPORT['iconv'] === true
5686
    ) {
5687
      // ignore invalid negative offset to keep compatibility
5688
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5689
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5690
    }
5691
5692
    $haystackIsAscii = self::is_ascii($haystack);
5693
    if ($haystackIsAscii && self::is_ascii($needle)) {
5694
      return strpos($haystack, $needle, $offset);
5695
    }
5696
5697
    // fallback via vanilla php
5698
5699
    if ($haystackIsAscii) {
5700
      $haystackTmp = substr($haystack, $offset);
5701
    } else {
5702
      $haystackTmp = self::substr($haystack, $offset);
5703
    }
5704
    if ($haystackTmp === false) {
5705
      $haystackTmp = '';
5706
    }
5707
    $haystack = (string)$haystackTmp;
5708
5709
    if ($offset < 0) {
5710
      $offset = 0;
5711
    }
5712
5713
    $pos = strpos($haystack, $needle);
5714
    if ($pos === false) {
5715
      return false;
5716
    }
5717
5718
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5719
    if ($returnTmp !== false) {
5720
      return $returnTmp;
5721
    }
5722
5723
    // fallback to "mb_"-function via polyfill
5724
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5725
  }
5726
5727
  /**
5728
   * Finds the last occurrence of a character in a string within another.
5729
   *
5730
   * @link http://php.net/manual/en/function.mb-strrchr.php
5731
   *
5732
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5733
   * @param string $needle        <p>The string to find in haystack</p>
5734
   * @param bool   $before_needle [optional] <p>
5735
   *                              Determines which portion of haystack
5736
   *                              this function returns.
5737
   *                              If set to true, it returns all of haystack
5738
   *                              from the beginning to the last occurrence of needle.
5739
   *                              If set to false, it returns all of haystack
5740
   *                              from the last occurrence of needle to the end,
5741
   *                              </p>
5742
   * @param string $encoding      [optional] <p>
5743
   *                              Character encoding name to use.
5744
   *                              If it is omitted, internal character encoding is used.
5745
   *                              </p>
5746
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5747
   *
5748
   * @return string|false The portion of haystack or false if needle is not found.
5749
   */
5750 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5751
  {
5752 1
    if ($encoding !== 'UTF-8') {
5753 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5754 1
    }
5755
5756 1
    if ($cleanUtf8 === true) {
5757
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5758
      // if invalid characters are found in $haystack before $needle
5759
      $needle = self::clean($needle);
5760
      $haystack = self::clean($haystack);
5761
    }
5762
5763
    // fallback to "mb_"-function via polyfill
5764 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5765
  }
5766
5767
  /**
5768
   * Reverses characters order in the string.
5769
   *
5770
   * @param string $str The input string
5771
   *
5772
   * @return string The string with characters in the reverse sequence
5773
   */
5774 4
  public static function strrev($str)
5775
  {
5776 4
    $str = (string)$str;
5777
5778 4
    if (!isset($str[0])) {
5779 2
      return '';
5780
    }
5781
5782 3
    return implode('', array_reverse(self::split($str)));
5783
  }
5784
5785
  /**
5786
   * Finds the last occurrence of a character in a string within another, case insensitive.
5787
   *
5788
   * @link http://php.net/manual/en/function.mb-strrichr.php
5789
   *
5790
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5791
   * @param string  $needle        <p>The string to find in haystack.</p>
5792
   * @param bool    $before_needle [optional] <p>
5793
   *                               Determines which portion of haystack
5794
   *                               this function returns.
5795
   *                               If set to true, it returns all of haystack
5796
   *                               from the beginning to the last occurrence of needle.
5797
   *                               If set to false, it returns all of haystack
5798
   *                               from the last occurrence of needle to the end,
5799
   *                               </p>
5800
   * @param string  $encoding      [optional] <p>
5801
   *                               Character encoding name to use.
5802
   *                               If it is omitted, internal character encoding is used.
5803
   *                               </p>
5804
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5805
   *
5806
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5807
   */
5808 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5809
  {
5810 1
    if ($encoding !== 'UTF-8') {
5811 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5812 1
    }
5813
5814 1
    if ($cleanUtf8 === true) {
5815
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5816
      // if invalid characters are found in $haystack before $needle
5817
      $needle = self::clean($needle);
5818
      $haystack = self::clean($haystack);
5819
    }
5820
5821 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5822
  }
5823
5824
  /**
5825
   * Find position of last occurrence of a case-insensitive string.
5826
   *
5827
   * @param string  $haystack  <p>The string to look in.</p>
5828
   * @param string  $needle    <p>The string to look for.</p>
5829
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5830
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5831
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5832
   *
5833
   * @return int|false <p>
5834
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5835
   *                   not found, it returns false.
5836
   *                   </p>
5837
   */
5838 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5839
  {
5840 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5841
      $needle = (string)self::chr($needle);
5842
    }
5843
5844
    // init
5845 1
    $haystack = (string)$haystack;
5846 1
    $needle = (string)$needle;
5847 1
    $offset = (int)$offset;
5848
5849 1
    if (!isset($haystack[0], $needle[0])) {
5850
      return false;
5851
    }
5852
5853 View Code Duplication
    if (
5854
        $cleanUtf8 === true
5855 1
        ||
5856
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5857 1
    ) {
5858
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5859
5860
      $needle = self::clean($needle);
5861
      $haystack = self::clean($haystack);
5862
    }
5863
5864 View Code Duplication
    if (
5865
        $encoding === 'UTF-8'
5866 1
        ||
5867
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5868 1
    ) {
5869 1
      $encoding = 'UTF-8';
5870 1
    } else {
5871
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5872
    }
5873
5874 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5875
      self::checkForSupport();
5876
    }
5877
5878 View Code Duplication
    if (
5879
        $encoding !== 'UTF-8'
5880 1
        &&
5881
        self::$SUPPORT['mbstring'] === false
5882 1
    ) {
5883
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5884
    }
5885
5886 1
    if (self::$SUPPORT['mbstring'] === true) {
5887 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5888
    }
5889
5890
    if (
5891
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5892
        &&
5893
        self::$SUPPORT['intl'] === true
5894
        &&
5895
        Bootup::is_php('5.4') === true
5896
    ) {
5897
      return \grapheme_strripos($haystack, $needle, $offset);
5898
    }
5899
5900
    // fallback via vanilla php
5901
5902
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5903
  }
5904
5905
  /**
5906
   * Find position of last occurrence of a string in a string.
5907
   *
5908
   * @link http://php.net/manual/en/function.mb-strrpos.php
5909
   *
5910
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5911
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5912
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5913
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5914
   *                              the end of the string.
5915
   *                              </p>
5916
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5917
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5918
   *
5919
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5920
   *                   is not found, it returns false.</p>
5921
   */
5922 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5923
  {
5924 10
    if ((int)$needle === $needle && $needle >= 0) {
5925 2
      $needle = (string)self::chr($needle);
5926 2
    }
5927
5928
    // init
5929 10
    $haystack = (string)$haystack;
5930 10
    $needle = (string)$needle;
5931 10
    $offset = (int)$offset;
5932
5933 10
    if (!isset($haystack[0], $needle[0])) {
5934 2
      return false;
5935
    }
5936
5937 View Code Duplication
    if (
5938
        $cleanUtf8 === true
5939 9
        ||
5940
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5941 9
    ) {
5942
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5943 3
      $needle = self::clean($needle);
5944 3
      $haystack = self::clean($haystack);
5945 3
    }
5946
5947 View Code Duplication
    if (
5948
        $encoding === 'UTF-8'
5949 9
        ||
5950 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5951 9
    ) {
5952 9
      $encoding = 'UTF-8';
5953 9
    } else {
5954 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5955
    }
5956
5957 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5958
      self::checkForSupport();
5959
    }
5960
5961 View Code Duplication
    if (
5962
        $encoding !== 'UTF-8'
5963 9
        &&
5964 1
        self::$SUPPORT['mbstring'] === false
5965 9
    ) {
5966
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5967
    }
5968
5969 9
    if (self::$SUPPORT['mbstring'] === true) {
5970 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5971
    }
5972
5973
    if (
5974
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5975
        &&
5976
        self::$SUPPORT['intl'] === true
5977
        &&
5978
        Bootup::is_php('5.4') === true
5979
    ) {
5980
      return \grapheme_strrpos($haystack, $needle, $offset);
5981
    }
5982
5983
    // fallback via vanilla php
5984
5985
    $haystackTmp = null;
5986
    if ($offset > 0) {
5987
      $haystackTmp = self::substr($haystack, $offset);
5988
    } elseif ($offset < 0) {
5989
      $haystackTmp = self::substr($haystack, 0, $offset);
5990
      $offset = 0;
5991
    }
5992
5993
    if ($haystackTmp !== null) {
5994
      if ($haystackTmp === false) {
5995
        $haystackTmp = '';
5996
      }
5997
      $haystack = (string)$haystackTmp;
5998
    }
5999
6000
    $pos = strrpos($haystack, $needle);
6001
    if ($pos === false) {
6002
      return false;
6003
    }
6004
6005
    return $offset + self::strlen(substr($haystack, 0, $pos));
6006
  }
6007
6008
  /**
6009
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6010
   * mask.
6011
   *
6012
   * @param string $str    <p>The input string.</p>
6013
   * @param string $mask   <p>The mask of chars</p>
6014
   * @param int    $offset [optional]
6015
   * @param int    $length [optional]
6016
   *
6017
   * @return int
6018
   */
6019 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
6020
  {
6021 10 View Code Duplication
    if ($offset || $length !== null) {
6022 2
      $strTmp = self::substr($str, $offset, $length);
6023 2
      if ($strTmp === false) {
6024
        $strTmp = '';
6025
      }
6026 2
      $str = (string)$strTmp;
6027 2
    }
6028
6029 10
    $str = (string)$str;
6030 10
    if (!isset($str[0], $mask[0])) {
6031 2
      return 0;
6032
    }
6033
6034 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6035
  }
6036
6037
  /**
6038
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6039
   *
6040
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6041
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6042
   * @param bool    $before_needle [optional] <p>
6043
   *                               If <b>TRUE</b>, strstr() returns the part of the
6044
   *                               haystack before the first occurrence of the needle (excluding the needle).
6045
   *                               </p>
6046
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6047
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6048
   *
6049
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6050
   */
6051 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6052
  {
6053 2
    $haystack = (string)$haystack;
6054 2
    $needle = (string)$needle;
6055
6056 2
    if (!isset($haystack[0], $needle[0])) {
6057 1
      return false;
6058
    }
6059
6060 2
    if ($cleanUtf8 === true) {
6061
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6062
      // if invalid characters are found in $haystack before $needle
6063
      $needle = self::clean($needle);
6064
      $haystack = self::clean($haystack);
6065
    }
6066
6067 2
    if ($encoding !== 'UTF-8') {
6068 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6069 1
    }
6070
6071 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6072
      self::checkForSupport();
6073
    }
6074
6075 View Code Duplication
    if (
6076
        $encoding !== 'UTF-8'
6077 2
        &&
6078 1
        self::$SUPPORT['mbstring'] === false
6079 2
    ) {
6080
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6081
    }
6082
6083 2
    if (self::$SUPPORT['mbstring'] === true) {
6084 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6085
    }
6086
6087
    if (
6088
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6089
        &&
6090
        self::$SUPPORT['intl'] === true
6091
        &&
6092
        Bootup::is_php('5.4') === true
6093
    ) {
6094
      return \grapheme_strstr($haystack, $needle, $before_needle);
6095
    }
6096
6097
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6098
6099
    if (!isset($match[1])) {
6100
      return false;
6101
    }
6102
6103
    if ($before_needle) {
6104
      return $match[1];
6105
    }
6106
6107
    return self::substr($haystack, self::strlen($match[1]));
6108
  }
6109
6110
  /**
6111
   * Unicode transformation for case-less matching.
6112
   *
6113
   * @link http://unicode.org/reports/tr21/tr21-5.html
6114
   *
6115
   * @param string  $str       <p>The input string.</p>
6116
   * @param bool    $full      [optional] <p>
6117
   *                           <b>true</b>, replace full case folding chars (default)<br>
6118
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6119
   *                           </p>
6120
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6121
   *
6122
   * @return string
6123
   */
6124 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6125
  {
6126
    // init
6127 13
    $str = (string)$str;
6128
6129 13
    if (!isset($str[0])) {
6130 4
      return '';
6131
    }
6132
6133 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6134 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6135
6136 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6137 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6138 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6139 1
    }
6140
6141 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6142
6143 12
    if ($full) {
6144
6145 12
      static $FULL_CASE_FOLD = null;
6146
6147 12
      if ($FULL_CASE_FOLD === null) {
6148 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6149 1
      }
6150
6151
      /** @noinspection OffsetOperationsInspection */
6152 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6153 12
    }
6154
6155 12
    if ($cleanUtf8 === true) {
6156 1
      $str = self::clean($str);
6157 1
    }
6158
6159 12
    return self::strtolower($str);
6160
  }
6161
6162
  /**
6163
   * Make a string lowercase.
6164
   *
6165
   * @link http://php.net/manual/en/function.mb-strtolower.php
6166
   *
6167
   * @param string      $str       <p>The string being lowercased.</p>
6168
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6169
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6170
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6171
   *
6172
   * @return string str with all alphabetic characters converted to lowercase.
6173
   */
6174 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6175
  {
6176
    // init
6177 25
    $str = (string)$str;
6178
6179 25
    if (!isset($str[0])) {
6180 3
      return '';
6181
    }
6182
6183 23
    if ($cleanUtf8 === true) {
6184
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6185
      // if invalid characters are found in $haystack before $needle
6186 1
      $str = self::clean($str);
6187 1
    }
6188
6189 23
    if ($encoding !== 'UTF-8') {
6190 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6191 2
    }
6192
6193 23
    if ($lang !== null) {
6194
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6195
        self::checkForSupport();
6196
      }
6197
6198
      if (
6199
          self::$SUPPORT['intl'] === true
6200
          &&
6201
          Bootup::is_php('5.4') === true
6202
      ) {
6203
6204
        $langCode = $lang . '-Lower';
6205
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6206
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6207
6208
          $langCode = 'Any-Lower';
6209
        }
6210
6211
        return transliterator_transliterate($langCode, $str);
6212
      }
6213
6214
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6215
    }
6216
6217 23
    return \mb_strtolower($str, $encoding);
6218
  }
6219
6220
  /**
6221
   * Generic case sensitive transformation for collation matching.
6222
   *
6223
   * @param string $str <p>The input string</p>
6224
   *
6225
   * @return string
6226
   */
6227 3
  private static function strtonatfold($str)
6228
  {
6229
    /** @noinspection PhpUndefinedClassInspection */
6230 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6231
  }
6232
6233
  /**
6234
   * Make a string uppercase.
6235
   *
6236
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6237
   *
6238
   * @param string      $str       <p>The string being uppercased.</p>
6239
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6240
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6241
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6242
   *
6243
   * @return string str with all alphabetic characters converted to uppercase.
6244
   */
6245 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6246
  {
6247 19
    $str = (string)$str;
6248
6249 19
    if (!isset($str[0])) {
6250 3
      return '';
6251
    }
6252
6253 17
    if ($cleanUtf8 === true) {
6254
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6255
      // if invalid characters are found in $haystack before $needle
6256 1
      $str = self::clean($str);
6257 1
    }
6258
6259 17
    if ($encoding !== 'UTF-8') {
6260 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6261 2
    }
6262
6263 17
    if ($lang !== null) {
6264
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6265
        self::checkForSupport();
6266
      }
6267
6268
      if (
6269
          self::$SUPPORT['intl'] === true
6270
          &&
6271
          Bootup::is_php('5.4') === true
6272
      ) {
6273
6274
        $langCode = $lang . '-Upper';
6275
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6276
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6277
6278
          $langCode = 'Any-Upper';
6279
        }
6280
6281
        return transliterator_transliterate($langCode, $str);
6282
      }
6283
6284
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6285
    }
6286
6287 17
    return \mb_strtoupper($str, $encoding);
6288
  }
6289
6290
  /**
6291
   * Translate characters or replace sub-strings.
6292
   *
6293
   * @link  http://php.net/manual/en/function.strtr.php
6294
   *
6295
   * @param string          $str  <p>The string being translated.</p>
6296
   * @param string|string[] $from <p>The string replacing from.</p>
6297
   * @param string|string[] $to   <p>The string being translated to to.</p>
6298
   *
6299
   * @return string <p>
6300
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6301
   *                corresponding character in to.
6302
   *                </p>
6303
   */
6304 1
  public static function strtr($str, $from, $to = INF)
6305
  {
6306 1
    $str = (string)$str;
6307
6308 1
    if (!isset($str[0])) {
6309
      return '';
6310
    }
6311
6312 1
    if ($from === $to) {
6313
      return $str;
6314
    }
6315
6316 1
    if (INF !== $to) {
6317 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6317 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6318 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6318 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6319 1
      $countFrom = count($from);
6320 1
      $countTo = count($to);
6321
6322 1
      if ($countFrom > $countTo) {
6323 1
        $from = array_slice($from, 0, $countTo);
6324 1
      } elseif ($countFrom < $countTo) {
6325 1
        $to = array_slice($to, 0, $countFrom);
6326 1
      }
6327
6328 1
      $from = array_combine($from, $to);
6329 1
    }
6330
6331 1
    if (is_string($from)) {
6332 1
      return str_replace($from, '', $str);
6333
    }
6334
6335 1
    return strtr($str, $from);
6336
  }
6337
6338
  /**
6339
   * Return the width of a string.
6340
   *
6341
   * @param string  $str       <p>The input string.</p>
6342
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6343
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6344
   *
6345
   * @return int
6346
   */
6347 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6348
  {
6349 1
    if ($encoding !== 'UTF-8') {
6350 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6351 1
    }
6352
6353 1
    if ($cleanUtf8 === true) {
6354
      // iconv and mbstring are not tolerant to invalid encoding
6355
      // further, their behaviour is inconsistent with that of PHP's substr
6356 1
      $str = self::clean($str);
6357 1
    }
6358
6359
    // fallback to "mb_"-function via polyfill
6360 1
    return \mb_strwidth($str, $encoding);
6361
  }
6362
6363
  /**
6364
   * Changes all keys in an array.
6365
   *
6366
   * @param array $array <p>The array to work on</p>
6367
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6368
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6369
   *
6370
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6371
   *                     input is not an array.</p>
6372
   */
6373 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
6374
  {
6375 1
    if (!is_array($array)) {
6376
      return false;
6377
    }
6378
6379
    if (
6380
        $case !== CASE_LOWER
6381 1
        &&
6382
        $case !== CASE_UPPER
6383 1
    ) {
6384
      $case = CASE_UPPER;
6385
    }
6386
6387 1
    $return = array();
6388 1
    foreach ($array as $key => $value) {
6389 1
      if ($case  === CASE_LOWER) {
6390 1
        $key = self::strtolower($key);
6391 1
      } else {
6392 1
        $key = self::strtoupper($key);
6393
      }
6394
6395 1
      $return[$key] = $value;
6396 1
    }
6397
6398 1
    return $return;
6399
  }
6400
6401
  /**
6402
   * Get part of a string.
6403
   *
6404
   * @link http://php.net/manual/en/function.mb-substr.php
6405
   *
6406
   * @param string  $str       <p>The string being checked.</p>
6407
   * @param int     $offset    <p>The first position used in str.</p>
6408
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6409
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6410
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6411
   *
6412
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6413
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6414
   *                      characters long, <b>FALSE</b> will be returned.</p>
6415
   */
6416 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6417
  {
6418
    // init
6419 76
    $str = (string)$str;
6420
6421 76
    if (!isset($str[0])) {
6422 10
      return '';
6423
    }
6424
6425
    // Empty string
6426 74
    if ($length === 0) {
6427 3
      return '';
6428
    }
6429
6430 73
    if ($cleanUtf8 === true) {
6431
      // iconv and mbstring are not tolerant to invalid encoding
6432
      // further, their behaviour is inconsistent with that of PHP's substr
6433 1
      $str = self::clean($str);
6434 1
    }
6435
6436
    // Whole string
6437 73
    if (!$offset && $length === null) {
6438 1
      return $str;
6439
    }
6440
6441 72
    $str_length = 0;
6442 72
    if ($offset || $length === null) {
6443 49
      $str_length = (int)self::strlen($str, $encoding);
6444 49
    }
6445
6446
    // Impossible
6447 72
    if ($offset && $offset > $str_length) {
6448 2
      return false;
6449
    }
6450
6451 70
    if ($length === null) {
6452 34
      $length = $str_length;
6453 34
    } else {
6454 59
      $length = (int)$length;
6455
    }
6456
6457 View Code Duplication
    if (
6458
        $encoding === 'UTF-8'
6459 70
        ||
6460 23
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6461 70
    ) {
6462 49
      $encoding = 'UTF-8';
6463 49
    } else {
6464 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6465
    }
6466
6467 70
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6468
      self::checkForSupport();
6469
    }
6470
6471
    if (
6472
        $encoding === 'CP850'
6473 70
        &&
6474 21
        self::$SUPPORT['mbstring_func_overload'] === false
6475 70
    ) {
6476 21
      return substr($str, $offset, $length === null ? $str_length : $length);
6477
    }
6478
6479 View Code Duplication
    if (
6480
        $encoding !== 'UTF-8'
6481 49
        &&
6482 1
        self::$SUPPORT['mbstring'] === false
6483 49
    ) {
6484
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6485
    }
6486
6487 49
    if (self::$SUPPORT['mbstring'] === true) {
6488 49
      return \mb_substr($str, $offset, $length, $encoding);
6489
    }
6490
6491
    if (
6492
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6493
        &&
6494
        self::$SUPPORT['intl'] === true
6495
        &&
6496
        Bootup::is_php('5.4') === true
6497
    ) {
6498
      return \grapheme_substr($str, $offset, $length);
6499
    }
6500
6501
    if (
6502
        $length >= 0 // "iconv_substr()" can't handle negative length
6503
        &&
6504
        self::$SUPPORT['iconv'] === true
6505
    ) {
6506
      return \iconv_substr($str, $offset, $length);
6507
    }
6508
6509
    if (self::is_ascii($str)) {
6510
      return ($length === null) ?
6511
          substr($str, $offset) :
6512
          substr($str, $offset, $length);
6513
    }
6514
6515
    // fallback via vanilla php
6516
6517
    // split to array, and remove invalid characters
6518
    $array = self::split($str);
6519
6520
    // extract relevant part, and join to make sting again
6521
    return implode('', array_slice($array, $offset, $length));
6522
  }
6523
6524
  /**
6525
   * Binary safe comparison of two strings from an offset, up to length characters.
6526
   *
6527
   * @param string  $str1               <p>The main string being compared.</p>
6528
   * @param string  $str2               <p>The secondary string being compared.</p>
6529
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6530
   *                                    counting from the end of the string.</p>
6531
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6532
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6533
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6534
   *                                    insensitive.</p>
6535
   *
6536
   * @return int <p>
6537
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6538
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6539
   *             <strong>0</strong> if they are equal.
6540
   *             </p>
6541
   */
6542 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6543
  {
6544
    if (
6545
        $offset !== 0
6546 1
        ||
6547
        $length !== null
6548 1
    ) {
6549 1
      $str1Tmp = self::substr($str1, $offset, $length);
6550 1
      if ($str1Tmp === false) {
6551
        $str1Tmp = '';
6552
      }
6553 1
      $str1 = (string)$str1Tmp;
6554
6555 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6556 1
      if ($str2Tmp === false) {
6557
        $str2Tmp = '';
6558
      }
6559 1
      $str2 = (string)$str2Tmp;
6560 1
    }
6561
6562 1
    if ($case_insensitivity === true) {
6563 1
      return self::strcasecmp($str1, $str2);
6564
    }
6565
6566 1
    return self::strcmp($str1, $str2);
6567
  }
6568
6569
  /**
6570
   * Count the number of substring occurrences.
6571
   *
6572
   * @link  http://php.net/manual/en/function.substr-count.php
6573
   *
6574
   * @param string  $haystack  <p>The string to search in.</p>
6575
   * @param string  $needle    <p>The substring to search for.</p>
6576
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6577
   * @param int     $length    [optional] <p>
6578
   *                           The maximum length after the specified offset to search for the
6579
   *                           substring. It outputs a warning if the offset plus the length is
6580
   *                           greater than the haystack length.
6581
   *                           </p>
6582
   * @param string  $encoding  <p>Set the charset.</p>
6583
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6584
   *
6585
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6586
   */
6587 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6588
  {
6589
    // init
6590 1
    $haystack = (string)$haystack;
6591 1
    $needle = (string)$needle;
6592
6593 1
    if (!isset($haystack[0], $needle[0])) {
6594 1
      return false;
6595
    }
6596
6597 1
    if ($offset || $length !== null) {
6598
6599 1
      if ($length === null) {
6600 1
        $length = (int)self::strlen($haystack);
6601 1
      }
6602
6603 1
      $offset = (int)$offset;
6604 1
      $length = (int)$length;
6605
6606
      if (
6607
          (
6608
              $length !== 0
6609 1
              &&
6610
              $offset !== 0
6611 1
          )
6612 1
          &&
6613 1
          $length + $offset <= 0
6614 1
          &&
6615 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6616 1
      ) {
6617 1
        return false;
6618
      }
6619
6620 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6621 1
      if ($haystackTmp === false) {
6622
        $haystackTmp = '';
6623
      }
6624 1
      $haystack = (string)$haystackTmp;
6625 1
    }
6626
6627 1
    if ($encoding !== 'UTF-8') {
6628 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6629 1
    }
6630
6631 1
    if ($cleanUtf8 === true) {
6632
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6633
      // if invalid characters are found in $haystack before $needle
6634
      $needle = self::clean($needle);
6635
      $haystack = self::clean($haystack);
6636
    }
6637
6638 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6639
      self::checkForSupport();
6640
    }
6641
6642 View Code Duplication
    if (
6643
        $encoding !== 'UTF-8'
6644 1
        &&
6645 1
        self::$SUPPORT['mbstring'] === false
6646 1
    ) {
6647
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6648
    }
6649
6650 1
    if (self::$SUPPORT['mbstring'] === true) {
6651 1
      return \mb_substr_count($haystack, $needle, $encoding);
6652
    }
6653
6654
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6655
6656
    return count($matches);
6657
  }
6658
6659
  /**
6660
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6661
   *
6662
   * @param string $haystack <p>The string to search in.</p>
6663
   * @param string $needle   <p>The substring to search for.</p>
6664
   *
6665
   * @return string <p>Return the sub-string.</p>
6666
   */
6667 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6668
  {
6669
    // init
6670 1
    $haystack = (string)$haystack;
6671 1
    $needle = (string)$needle;
6672
6673 1
    if (!isset($haystack[0])) {
6674 1
      return '';
6675
    }
6676
6677 1
    if (!isset($needle[0])) {
6678 1
      return $haystack;
6679
    }
6680
6681 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6682 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6683 1
      if ($haystackTmp === false) {
6684
        $haystackTmp = '';
6685
      }
6686 1
      $haystack = (string)$haystackTmp;
6687 1
    }
6688
6689 1
    return $haystack;
6690
  }
6691
6692
  /**
6693
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6694
   *
6695
   * @param string $haystack <p>The string to search in.</p>
6696
   * @param string $needle   <p>The substring to search for.</p>
6697
   *
6698
   * @return string <p>Return the sub-string.</p>
6699
   */
6700 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6701
  {
6702
    // init
6703 1
    $haystack = (string)$haystack;
6704 1
    $needle = (string)$needle;
6705
6706 1
    if (!isset($haystack[0])) {
6707 1
      return '';
6708
    }
6709
6710 1
    if (!isset($needle[0])) {
6711 1
      return $haystack;
6712
    }
6713
6714 1
    if (self::str_iends_with($haystack, $needle) === true) {
6715 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6716 1
      if ($haystackTmp === false) {
6717
        $haystackTmp = '';
6718
      }
6719 1
      $haystack = (string)$haystackTmp;
6720 1
    }
6721
6722 1
    return $haystack;
6723
  }
6724
6725
  /**
6726
   * Removes an prefix ($needle) from start of the string ($haystack).
6727
   *
6728
   * @param string $haystack <p>The string to search in.</p>
6729
   * @param string $needle   <p>The substring to search for.</p>
6730
   *
6731
   * @return string <p>Return the sub-string.</p>
6732
   */
6733 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6734
  {
6735
    // init
6736 1
    $haystack = (string)$haystack;
6737 1
    $needle = (string)$needle;
6738
6739 1
    if (!isset($haystack[0])) {
6740 1
      return '';
6741
    }
6742
6743 1
    if (!isset($needle[0])) {
6744 1
      return $haystack;
6745
    }
6746
6747 1
    if (self::str_starts_with($haystack, $needle) === true) {
6748 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6749 1
      if ($haystackTmp === false) {
6750
        $haystackTmp = '';
6751
      }
6752 1
      $haystack = (string)$haystackTmp;
6753 1
    }
6754
6755 1
    return $haystack;
6756
  }
6757
6758
  /**
6759
   * Replace text within a portion of a string.
6760
   *
6761
   * source: https://gist.github.com/stemar/8287074
6762
   *
6763
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6764
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6765
   * @param int|int[]       $offset           <p>
6766
   *                                          If start is positive, the replacing will begin at the start'th offset
6767
   *                                          into string.
6768
   *                                          <br><br>
6769
   *                                          If start is negative, the replacing will begin at the start'th character
6770
   *                                          from the end of string.
6771
   *                                          </p>
6772
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6773
   *                                          portion of string which is to be replaced. If it is negative, it
6774
   *                                          represents the number of characters from the end of string at which to
6775
   *                                          stop replacing. If it is not given, then it will default to strlen(
6776
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6777
   *                                          length is zero then this function will have the effect of inserting
6778
   *                                          replacement into string at the given start offset.</p>
6779
   *
6780
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6781
   */
6782 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6783
  {
6784 7
    if (is_array($str) === true) {
6785 1
      $num = count($str);
6786
6787
      // the replacement
6788 1
      if (is_array($replacement) === true) {
6789 1
        $replacement = array_slice($replacement, 0, $num);
6790 1
      } else {
6791 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6792
      }
6793
6794
      // the offset
6795 1 View Code Duplication
      if (is_array($offset) === true) {
6796 1
        $offset = array_slice($offset, 0, $num);
6797 1
        foreach ($offset as &$valueTmp) {
6798 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6799 1
        }
6800 1
        unset($valueTmp);
6801 1
      } else {
6802 1
        $offset = array_pad(array($offset), $num, $offset);
6803
      }
6804
6805
      // the length
6806 1
      if (!isset($length)) {
6807 1
        $length = array_fill(0, $num, 0);
6808 1 View Code Duplication
      } elseif (is_array($length) === true) {
6809 1
        $length = array_slice($length, 0, $num);
6810 1
        foreach ($length as &$valueTmpV2) {
6811 1
          if (isset($valueTmpV2)) {
6812 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6813 1
          } else {
6814
            $valueTmpV2 = 0;
6815
          }
6816 1
        }
6817 1
        unset($valueTmpV2);
6818 1
      } else {
6819 1
        $length = array_pad(array($length), $num, $length);
6820
      }
6821
6822
      // recursive call
6823 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6824
    }
6825
6826 7
    if (is_array($replacement) === true) {
6827 1
      if (count($replacement) > 0) {
6828 1
        $replacement = $replacement[0];
6829 1
      } else {
6830 1
        $replacement = '';
6831
      }
6832 1
    }
6833
6834
    // init
6835 7
    $str = (string)$str;
6836 7
    $replacement = (string)$replacement;
6837
6838 7
    if (!isset($str[0])) {
6839 1
      return $replacement;
6840
    }
6841
6842 6
    if (self::is_ascii($str)) {
6843 3
      return ($length === null) ?
6844 3
          substr_replace($str, $replacement, $offset) :
6845 3
          substr_replace($str, $replacement, $offset, $length);
6846
    }
6847
6848 5
    preg_match_all('/./us', $str, $smatches);
6849 5
    preg_match_all('/./us', $replacement, $rmatches);
6850
6851 5
    if ($length === null) {
6852 3
      $length = (int)self::strlen($str);
6853 3
    }
6854
6855 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6856
6857 5
    return implode('', $smatches[0]);
6858
  }
6859
6860
  /**
6861
   * Removes an suffix ($needle) from end of the string ($haystack).
6862
   *
6863
   * @param string $haystack <p>The string to search in.</p>
6864
   * @param string $needle   <p>The substring to search for.</p>
6865
   *
6866
   * @return string <p>Return the sub-string.</p>
6867
   */
6868 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6869
  {
6870 1
    $haystack = (string)$haystack;
6871 1
    $needle = (string)$needle;
6872
6873 1
    if (!isset($haystack[0])) {
6874 1
      return '';
6875
    }
6876
6877 1
    if (!isset($needle[0])) {
6878 1
      return $haystack;
6879
    }
6880
6881 1
    if (self::str_ends_with($haystack, $needle) === true) {
6882 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6883 1
      if ($haystackTmp === false) {
6884
        $haystackTmp = '';
6885
      }
6886 1
      $haystack = (string)$haystackTmp;
6887 1
    }
6888
6889 1
    return $haystack;
6890
  }
6891
6892
  /**
6893
   * Returns a case swapped version of the string.
6894
   *
6895
   * @param string  $str       <p>The input string.</p>
6896
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6897
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6898
   *
6899
   * @return string <p>Each character's case swapped.</p>
6900
   */
6901 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6902
  {
6903 1
    $str = (string)$str;
6904
6905 1
    if (!isset($str[0])) {
6906 1
      return '';
6907
    }
6908
6909 1
    if ($encoding !== 'UTF-8') {
6910 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6911 1
    }
6912
6913 1
    if ($cleanUtf8 === true) {
6914
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6915
      // if invalid characters are found in $haystack before $needle
6916 1
      $str = self::clean($str);
6917 1
    }
6918
6919 1
    $strSwappedCase = preg_replace_callback(
6920 1
        '/[\S]/u',
6921
        function ($match) use ($encoding) {
6922 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6923
6924 1
          if ($match[0] === $marchToUpper) {
6925 1
            return UTF8::strtolower($match[0], $encoding);
6926
          }
6927
6928 1
          return $marchToUpper;
6929 1
        },
6930
        $str
6931 1
    );
6932
6933 1
    return $strSwappedCase;
6934
  }
6935
6936
  /**
6937
   * alias for "UTF8::to_ascii()"
6938
   *
6939
   * @see UTF8::to_ascii()
6940
   *
6941
   * @param string $s
6942
   * @param string $subst_chr
6943
   * @param bool   $strict
6944
   *
6945
   * @return string
6946
   *
6947
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6948
   */
6949
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6950
  {
6951
    return self::to_ascii($s, $subst_chr, $strict);
6952
  }
6953
6954
  /**
6955
   * alias for "UTF8::to_iso8859()"
6956
   *
6957
   * @see UTF8::to_iso8859()
6958
   *
6959
   * @param string $str
6960
   *
6961
   * @return string|string[]
6962
   *
6963
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6964
   */
6965
  public static function toIso8859($str)
6966
  {
6967
    return self::to_iso8859($str);
6968
  }
6969
6970
  /**
6971
   * alias for "UTF8::to_latin1()"
6972
   *
6973
   * @see UTF8::to_latin1()
6974
   *
6975
   * @param $str
6976
   *
6977
   * @return string
6978
   *
6979
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6980
   */
6981
  public static function toLatin1($str)
6982
  {
6983
    return self::to_latin1($str);
6984
  }
6985
6986
  /**
6987
   * alias for "UTF8::to_utf8()"
6988
   *
6989
   * @see UTF8::to_utf8()
6990
   *
6991
   * @param string $str
6992
   *
6993
   * @return string
6994
   *
6995
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6996
   */
6997
  public static function toUTF8($str)
6998
  {
6999
    return self::to_utf8($str);
7000
  }
7001
7002
  /**
7003
   * Convert a string into ASCII.
7004
   *
7005
   * @param string $str     <p>The input string.</p>
7006
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7007
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7008
   *                        performance</p>
7009
   *
7010
   * @return string
7011
   */
7012 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
7013
  {
7014 21
    static $UTF8_TO_ASCII;
7015
7016
    // init
7017 21
    $str = (string)$str;
7018
7019 21
    if (!isset($str[0])) {
7020 4
      return '';
7021
    }
7022
7023
    // check if we only have ASCII, first (better performance)
7024 18
    if (self::is_ascii($str) === true) {
7025 6
      return $str;
7026
    }
7027
7028 13
    $str = self::clean($str, true, true, true);
7029
7030
    // check again, if we only have ASCII, now ...
7031 13
    if (self::is_ascii($str) === true) {
7032 7
      return $str;
7033
    }
7034
7035 7
    if ($strict === true) {
7036
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7037
        self::checkForSupport();
7038
      }
7039
7040
      if (
7041
          self::$SUPPORT['intl'] === true
7042
          &&
7043
          Bootup::is_php('5.4') === true
7044
      ) {
7045
7046
        // HACK for issue from "transliterator_transliterate()"
7047
        $str = str_replace(
7048
            'ℌ',
7049
            'H',
7050
            $str
7051
        );
7052
7053
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7054
7055
        // check again, if we only have ASCII, now ...
7056
        if (self::is_ascii($str) === true) {
7057
          return $str;
7058
        }
7059
7060
      }
7061
    }
7062
7063 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7064 7
    $chars = $ar[0];
7065 7
    foreach ($chars as &$c) {
7066
7067 7
      $ordC0 = ord($c[0]);
7068
7069 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7070 7
        continue;
7071
      }
7072
7073 7
      $ordC1 = ord($c[1]);
7074
7075
      // ASCII - next please
7076 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7077 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7078 7
      }
7079
7080 7
      if ($ordC0 >= 224) {
7081 2
        $ordC2 = ord($c[2]);
7082
7083 2
        if ($ordC0 <= 239) {
7084 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7085 2
        }
7086
7087 2
        if ($ordC0 >= 240) {
7088 1
          $ordC3 = ord($c[3]);
7089
7090 1
          if ($ordC0 <= 247) {
7091 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7092 1
          }
7093
7094 1
          if ($ordC0 >= 248) {
7095
            $ordC4 = ord($c[4]);
7096
7097 View Code Duplication
            if ($ordC0 <= 251) {
7098
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7099
            }
7100
7101
            if ($ordC0 >= 252) {
7102
              $ordC5 = ord($c[5]);
7103
7104 View Code Duplication
              if ($ordC0 <= 253) {
7105
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7106
              }
7107
            }
7108
          }
7109 1
        }
7110 2
      }
7111
7112 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7113
        $c = $unknown;
7114
        continue;
7115
      }
7116
7117 7
      if (!isset($ord)) {
7118
        $c = $unknown;
7119
        continue;
7120
      }
7121
7122 7
      $bank = $ord >> 8;
7123 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7124 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7125 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7126 1
          $UTF8_TO_ASCII[$bank] = array();
7127 1
        }
7128 3
      }
7129
7130 7
      $newchar = $ord & 255;
7131
7132 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7133
7134
        // keep for debugging
7135
        /*
7136
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7137
        echo "char: " . $c . "\n";
7138
        echo "ord: " . $ord . "\n";
7139
        echo "newchar: " . $newchar . "\n";
7140
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7141
        echo "bank:" . $bank . "\n\n";
7142
        */
7143
7144 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7145 7
      } else {
7146
7147
        // keep for debugging missing chars
7148
        /*
7149
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7150
        echo "char: " . $c . "\n";
7151
        echo "ord: " . $ord . "\n";
7152
        echo "newchar: " . $newchar . "\n";
7153
        echo "bank:" . $bank . "\n\n";
7154
        */
7155
7156 1
        $c = $unknown;
7157
      }
7158 7
    }
7159
7160 7
    return implode('', $chars);
7161
  }
7162
7163
  /**
7164
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7165
   *
7166
   * @param string|string[] $str
7167
   *
7168
   * @return string|string[]
7169
   */
7170 3
  public static function to_iso8859($str)
7171
  {
7172 3
    if (is_array($str) === true) {
7173
7174
      /** @noinspection ForeachSourceInspection */
7175 1
      foreach ($str as $k => $v) {
7176
        /** @noinspection AlterInForeachInspection */
7177
        /** @noinspection OffsetOperationsInspection */
7178 1
        $str[$k] = self::to_iso8859($v);
7179 1
      }
7180
7181 1
      return $str;
7182
    }
7183
7184 3
    $str = (string)$str;
7185
7186 3
    if (!isset($str[0])) {
7187 1
      return '';
7188
    }
7189
7190 3
    return self::utf8_decode($str);
7191
  }
7192
7193
  /**
7194
   * alias for "UTF8::to_iso8859()"
7195
   *
7196
   * @see UTF8::to_iso8859()
7197
   *
7198
   * @param string|string[] $str
7199
   *
7200
   * @return string|string[]
7201
   */
7202 1
  public static function to_latin1($str)
7203
  {
7204 1
    return self::to_iso8859($str);
7205
  }
7206
7207
  /**
7208
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7209
   *
7210
   * <ul>
7211
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7212
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7213
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7214
   * case.</li>
7215
   * </ul>
7216
   *
7217
   * @param string|string[] $str                    <p>Any string or array.</p>
7218
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7219
   *
7220
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7221
   */
7222 22
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7223
  {
7224 22
    if (is_array($str) === true) {
7225
      /** @noinspection ForeachSourceInspection */
7226 2
      foreach ($str as $k => $v) {
7227
        /** @noinspection AlterInForeachInspection */
7228
        /** @noinspection OffsetOperationsInspection */
7229 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7230 2
      }
7231
7232 2
      return $str;
7233
    }
7234
7235 22
    $str = (string)$str;
7236
7237 22
    if (!isset($str[0])) {
7238 3
      return $str;
7239
    }
7240
7241 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7242
      self::checkForSupport();
7243
    }
7244
7245 22 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7246
      $max = \mb_strlen($str, '8BIT');
7247
    } else {
7248 22
      $max = strlen($str);
7249
    }
7250
7251 22
    $buf = '';
7252
7253
    /** @noinspection ForeachInvariantsInspection */
7254 22
    for ($i = 0; $i < $max; $i++) {
7255 22
      $c1 = $str[$i];
7256
7257 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7258
7259 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7260
7261 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7262
7263 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7264 18
            $buf .= $c1 . $c2;
7265 18
            $i++;
7266 18
          } else { // not valid UTF8 - convert it
7267 8
            $buf .= self::to_utf8_convert($c1);
7268
          }
7269
7270 22
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7271
7272 21
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7273 21
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7274
7275 21
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7276 15
            $buf .= $c1 . $c2 . $c3;
7277 15
            $i += 2;
7278 15
          } else { // not valid UTF8 - convert it
7279 11
            $buf .= self::to_utf8_convert($c1);
7280
          }
7281
7282 22
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7283
7284 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7285 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7286 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7287
7288 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7289 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7290 5
            $i += 3;
7291 5
          } else { // not valid UTF8 - convert it
7292 9
            $buf .= self::to_utf8_convert($c1);
7293
          }
7294
7295 12
        } else { // doesn't look like UTF8, but should be converted
7296 9
          $buf .= self::to_utf8_convert($c1);
7297
        }
7298
7299 22
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7300
7301 5
        $buf .= self::to_utf8_convert($c1);
7302
7303 5
      } else { // it doesn't need conversion
7304 20
        $buf .= $c1;
7305
      }
7306 22
    }
7307
7308
    // decode unicode escape sequences
7309 22
    $buf = preg_replace_callback(
7310 22
        '/\\\\u([0-9a-f]{4})/i',
7311 22
        function ($match) {
7312 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7313 22
        },
7314
        $buf
7315 22
    );
7316
7317
    // decode UTF-8 codepoints
7318 22
    if ($decodeHtmlEntityToUtf8 === true) {
7319 1
      $buf = self::html_entity_decode($buf);
7320 1
    }
7321
7322 22
    return $buf;
7323
  }
7324
7325
  /**
7326
   * @param int $int
7327
   *
7328
   * @return string
7329
   */
7330 16
  private static function to_utf8_convert($int)
7331
  {
7332 16
    $buf = '';
7333
7334 16
    $ordC1 = ord($int);
7335 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7336 2
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7337 2
    } else {
7338 16
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7339 16
      $cc2 = ($int & "\x3F") | "\x80";
7340 16
      $buf .= $cc1 . $cc2;
7341
    }
7342
7343 16
    return $buf;
7344
  }
7345
7346
  /**
7347
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7348
   *
7349
   * INFO: This is slower then "trim()"
7350
   *
7351
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7352
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7353
   *
7354
   * @param string $str   <p>The string to be trimmed</p>
7355
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7356
   *
7357
   * @return string <p>The trimmed string.</p>
7358
   */
7359 26
  public static function trim($str = '', $chars = INF)
7360
  {
7361 26
    $str = (string)$str;
7362
7363 26
    if (!isset($str[0])) {
7364 5
      return '';
7365
    }
7366
7367
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7368 22
    if ($chars === INF || !$chars) {
7369 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7370
    }
7371
7372 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7373
  }
7374
7375
  /**
7376
   * Makes string's first char uppercase.
7377
   *
7378
   * @param string  $str       <p>The input string.</p>
7379
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7380
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7381
   *
7382
   * @return string <p>The resulting string</p>
7383
   */
7384 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7385
  {
7386 14
    if ($cleanUtf8 === true) {
7387
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7388
      // if invalid characters are found in $haystack before $needle
7389
      $str = self::clean($str);
7390
    }
7391
7392 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7393 14
    if ($strPartTwo === false) {
7394
      $strPartTwo = '';
7395
    }
7396
7397 14
    $strPartOne = self::strtoupper(
7398 14
        (string)self::substr($str, 0, 1, $encoding),
7399 14
        $encoding,
7400
        $cleanUtf8
7401 14
    );
7402
7403 14
    return $strPartOne . $strPartTwo;
7404
  }
7405
7406
  /**
7407
   * alias for "UTF8::ucfirst()"
7408
   *
7409
   * @see UTF8::ucfirst()
7410
   *
7411
   * @param string  $word
7412
   * @param string  $encoding
7413
   * @param boolean $cleanUtf8
7414
   *
7415
   * @return string
7416
   */
7417 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7418
  {
7419 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7420
  }
7421
7422
  /**
7423
   * Uppercase for all words in the string.
7424
   *
7425
   * @param string   $str        <p>The input string.</p>
7426
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7427
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7428
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7429
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7430
   *
7431
   * @return string
7432
   */
7433 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7434
  {
7435 8
    if (!$str) {
7436 2
      return '';
7437
    }
7438
7439
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7440
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7441
7442 7
    if ($cleanUtf8 === true) {
7443
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7444
      // if invalid characters are found in $haystack before $needle
7445
      $str = self::clean($str);
7446
    }
7447
7448 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7449
7450
    if (
7451
        $usePhpDefaultFunctions === true
7452 7
        &&
7453 7
        self::is_ascii($str) === true
7454 7
    ) {
7455
      return ucwords($str);
7456
    }
7457
7458 7
    $words = self::str_to_words($str, $charlist);
7459 7
    $newWords = array();
7460
7461 7
    if (count($exceptions) > 0) {
7462 1
      $useExceptions = true;
7463 1
    } else {
7464 7
      $useExceptions = false;
7465
    }
7466
7467 7 View Code Duplication
    foreach ($words as $word) {
7468
7469 7
      if (!$word) {
7470 7
        continue;
7471
      }
7472
7473
      if (
7474
          $useExceptions === false
7475 7
          ||
7476
          (
7477
              $useExceptions === true
7478 1
              &&
7479 1
              !in_array($word, $exceptions, true)
7480 1
          )
7481 7
      ) {
7482 7
        $word = self::ucfirst($word, $encoding);
7483 7
      }
7484
7485 7
      $newWords[] = $word;
7486 7
    }
7487
7488 7
    return implode('', $newWords);
7489
  }
7490
7491
  /**
7492
   * Multi decode html entity & fix urlencoded-win1252-chars.
7493
   *
7494
   * e.g:
7495
   * 'test+test'                     => 'test test'
7496
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7497
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7498
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7499
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7500
   * 'Düsseldorf'                   => 'Düsseldorf'
7501
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7502
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7503
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7504
   *
7505
   * @param string $str          <p>The input string.</p>
7506
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7507
   *
7508
   * @return string
7509
   */
7510 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7511
  {
7512 1
    $str = (string)$str;
7513
7514 1
    if (!isset($str[0])) {
7515 1
      return '';
7516
    }
7517
7518 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7519 1
    if (preg_match($pattern, $str)) {
7520 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7521 1
    }
7522
7523 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7524
7525
    do {
7526 1
      $str_compare = $str;
7527
7528 1
      $str = self::fix_simple_utf8(
7529 1
          urldecode(
7530 1
              self::html_entity_decode(
7531 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7532
                  $flags
7533 1
              )
7534 1
          )
7535 1
      );
7536
7537 1
    } while ($multi_decode === true && $str_compare !== $str);
7538
7539 1
    return (string)$str;
7540
  }
7541
7542
  /**
7543
   * Return a array with "urlencoded"-win1252 -> UTF-8
7544
   *
7545
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7546
   *
7547
   * @return array
7548
   */
7549
  public static function urldecode_fix_win1252_chars()
7550
  {
7551
    return array(
7552
        '%20' => ' ',
7553
        '%21' => '!',
7554
        '%22' => '"',
7555
        '%23' => '#',
7556
        '%24' => '$',
7557
        '%25' => '%',
7558
        '%26' => '&',
7559
        '%27' => "'",
7560
        '%28' => '(',
7561
        '%29' => ')',
7562
        '%2A' => '*',
7563
        '%2B' => '+',
7564
        '%2C' => ',',
7565
        '%2D' => '-',
7566
        '%2E' => '.',
7567
        '%2F' => '/',
7568
        '%30' => '0',
7569
        '%31' => '1',
7570
        '%32' => '2',
7571
        '%33' => '3',
7572
        '%34' => '4',
7573
        '%35' => '5',
7574
        '%36' => '6',
7575
        '%37' => '7',
7576
        '%38' => '8',
7577
        '%39' => '9',
7578
        '%3A' => ':',
7579
        '%3B' => ';',
7580
        '%3C' => '<',
7581
        '%3D' => '=',
7582
        '%3E' => '>',
7583
        '%3F' => '?',
7584
        '%40' => '@',
7585
        '%41' => 'A',
7586
        '%42' => 'B',
7587
        '%43' => 'C',
7588
        '%44' => 'D',
7589
        '%45' => 'E',
7590
        '%46' => 'F',
7591
        '%47' => 'G',
7592
        '%48' => 'H',
7593
        '%49' => 'I',
7594
        '%4A' => 'J',
7595
        '%4B' => 'K',
7596
        '%4C' => 'L',
7597
        '%4D' => 'M',
7598
        '%4E' => 'N',
7599
        '%4F' => 'O',
7600
        '%50' => 'P',
7601
        '%51' => 'Q',
7602
        '%52' => 'R',
7603
        '%53' => 'S',
7604
        '%54' => 'T',
7605
        '%55' => 'U',
7606
        '%56' => 'V',
7607
        '%57' => 'W',
7608
        '%58' => 'X',
7609
        '%59' => 'Y',
7610
        '%5A' => 'Z',
7611
        '%5B' => '[',
7612
        '%5C' => '\\',
7613
        '%5D' => ']',
7614
        '%5E' => '^',
7615
        '%5F' => '_',
7616
        '%60' => '`',
7617
        '%61' => 'a',
7618
        '%62' => 'b',
7619
        '%63' => 'c',
7620
        '%64' => 'd',
7621
        '%65' => 'e',
7622
        '%66' => 'f',
7623
        '%67' => 'g',
7624
        '%68' => 'h',
7625
        '%69' => 'i',
7626
        '%6A' => 'j',
7627
        '%6B' => 'k',
7628
        '%6C' => 'l',
7629
        '%6D' => 'm',
7630
        '%6E' => 'n',
7631
        '%6F' => 'o',
7632
        '%70' => 'p',
7633
        '%71' => 'q',
7634
        '%72' => 'r',
7635
        '%73' => 's',
7636
        '%74' => 't',
7637
        '%75' => 'u',
7638
        '%76' => 'v',
7639
        '%77' => 'w',
7640
        '%78' => 'x',
7641
        '%79' => 'y',
7642
        '%7A' => 'z',
7643
        '%7B' => '{',
7644
        '%7C' => '|',
7645
        '%7D' => '}',
7646
        '%7E' => '~',
7647
        '%7F' => '',
7648
        '%80' => '`',
7649
        '%81' => '',
7650
        '%82' => '‚',
7651
        '%83' => 'ƒ',
7652
        '%84' => '„',
7653
        '%85' => '…',
7654
        '%86' => '†',
7655
        '%87' => '‡',
7656
        '%88' => 'ˆ',
7657
        '%89' => '‰',
7658
        '%8A' => 'Š',
7659
        '%8B' => '‹',
7660
        '%8C' => 'Œ',
7661
        '%8D' => '',
7662
        '%8E' => 'Ž',
7663
        '%8F' => '',
7664
        '%90' => '',
7665
        '%91' => '‘',
7666
        '%92' => '’',
7667
        '%93' => '“',
7668
        '%94' => '”',
7669
        '%95' => '•',
7670
        '%96' => '–',
7671
        '%97' => '—',
7672
        '%98' => '˜',
7673
        '%99' => '™',
7674
        '%9A' => 'š',
7675
        '%9B' => '›',
7676
        '%9C' => 'œ',
7677
        '%9D' => '',
7678
        '%9E' => 'ž',
7679
        '%9F' => 'Ÿ',
7680
        '%A0' => '',
7681
        '%A1' => '¡',
7682
        '%A2' => '¢',
7683
        '%A3' => '£',
7684
        '%A4' => '¤',
7685
        '%A5' => '¥',
7686
        '%A6' => '¦',
7687
        '%A7' => '§',
7688
        '%A8' => '¨',
7689
        '%A9' => '©',
7690
        '%AA' => 'ª',
7691
        '%AB' => '«',
7692
        '%AC' => '¬',
7693
        '%AD' => '',
7694
        '%AE' => '®',
7695
        '%AF' => '¯',
7696
        '%B0' => '°',
7697
        '%B1' => '±',
7698
        '%B2' => '²',
7699
        '%B3' => '³',
7700
        '%B4' => '´',
7701
        '%B5' => 'µ',
7702
        '%B6' => '¶',
7703
        '%B7' => '·',
7704
        '%B8' => '¸',
7705
        '%B9' => '¹',
7706
        '%BA' => 'º',
7707
        '%BB' => '»',
7708
        '%BC' => '¼',
7709
        '%BD' => '½',
7710
        '%BE' => '¾',
7711
        '%BF' => '¿',
7712
        '%C0' => 'À',
7713
        '%C1' => 'Á',
7714
        '%C2' => 'Â',
7715
        '%C3' => 'Ã',
7716
        '%C4' => 'Ä',
7717
        '%C5' => 'Å',
7718
        '%C6' => 'Æ',
7719
        '%C7' => 'Ç',
7720
        '%C8' => 'È',
7721
        '%C9' => 'É',
7722
        '%CA' => 'Ê',
7723
        '%CB' => 'Ë',
7724
        '%CC' => 'Ì',
7725
        '%CD' => 'Í',
7726
        '%CE' => 'Î',
7727
        '%CF' => 'Ï',
7728
        '%D0' => 'Ð',
7729
        '%D1' => 'Ñ',
7730
        '%D2' => 'Ò',
7731
        '%D3' => 'Ó',
7732
        '%D4' => 'Ô',
7733
        '%D5' => 'Õ',
7734
        '%D6' => 'Ö',
7735
        '%D7' => '×',
7736
        '%D8' => 'Ø',
7737
        '%D9' => 'Ù',
7738
        '%DA' => 'Ú',
7739
        '%DB' => 'Û',
7740
        '%DC' => 'Ü',
7741
        '%DD' => 'Ý',
7742
        '%DE' => 'Þ',
7743
        '%DF' => 'ß',
7744
        '%E0' => 'à',
7745
        '%E1' => 'á',
7746
        '%E2' => 'â',
7747
        '%E3' => 'ã',
7748
        '%E4' => 'ä',
7749
        '%E5' => 'å',
7750
        '%E6' => 'æ',
7751
        '%E7' => 'ç',
7752
        '%E8' => 'è',
7753
        '%E9' => 'é',
7754
        '%EA' => 'ê',
7755
        '%EB' => 'ë',
7756
        '%EC' => 'ì',
7757
        '%ED' => 'í',
7758
        '%EE' => 'î',
7759
        '%EF' => 'ï',
7760
        '%F0' => 'ð',
7761
        '%F1' => 'ñ',
7762
        '%F2' => 'ò',
7763
        '%F3' => 'ó',
7764
        '%F4' => 'ô',
7765
        '%F5' => 'õ',
7766
        '%F6' => 'ö',
7767
        '%F7' => '÷',
7768
        '%F8' => 'ø',
7769
        '%F9' => 'ù',
7770
        '%FA' => 'ú',
7771
        '%FB' => 'û',
7772
        '%FC' => 'ü',
7773
        '%FD' => 'ý',
7774
        '%FE' => 'þ',
7775
        '%FF' => 'ÿ',
7776
    );
7777
  }
7778
7779
  /**
7780
   * Decodes an UTF-8 string to ISO-8859-1.
7781
   *
7782
   * @param string $str <p>The input string.</p>
7783
   *
7784
   * @return string
7785
   */
7786 6
  public static function utf8_decode($str)
7787
  {
7788
    // init
7789 6
    $str = (string)$str;
7790
7791 6
    if (!isset($str[0])) {
7792 3
      return '';
7793
    }
7794
7795 6
    $str = (string)self::to_utf8($str);
7796
7797 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7798 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7799
7800 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7801 1
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7802 1
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7803 1
    }
7804
7805
    /** @noinspection PhpInternalEntityUsedInspection */
7806 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7807
7808 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7809
      self::checkForSupport();
7810
    }
7811
7812 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7813
      $len = \mb_strlen($str, '8BIT');
7814
    } else {
7815 6
      $len = strlen($str);
7816
    }
7817
7818
    /** @noinspection ForeachInvariantsInspection */
7819 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7820 6
      switch ($str[$i] & "\xF0") {
7821 6
        case "\xC0":
7822 6
        case "\xD0":
7823 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7824 6
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7825 6
          break;
7826
7827
        /** @noinspection PhpMissingBreakStatementInspection */
7828 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7829
          ++$i;
7830 6
        case "\xE0":
7831 4
          $str[$j] = '?';
7832 4
          $i += 2;
7833 4
          break;
7834
7835 6
        default:
7836 6
          $str[$j] = $str[$i];
7837 6
      }
7838 6
    }
7839
7840 6
    return (string)self::substr($str, 0, $j, '8BIT');
7841
  }
7842
7843
  /**
7844
   * Encodes an ISO-8859-1 string to UTF-8.
7845
   *
7846
   * @param string $str <p>The input string.</p>
7847
   *
7848
   * @return string
7849
   */
7850 7
  public static function utf8_encode($str)
7851
  {
7852
    // init
7853 7
    $str = (string)$str;
7854
7855 7
    if (!isset($str[0])) {
7856 7
      return '';
7857
    }
7858
7859 7
    $strTmp = \utf8_encode($str);
7860 7
    if ($strTmp === false) {
7861
      return '';
7862
    }
7863
7864 7
    $str = (string)$strTmp;
7865 7
    if (false === strpos($str, "\xC2")) {
7866 3
      return $str;
7867
    }
7868
7869 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7870 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7871
7872 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7873 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7874 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7875 1
    }
7876
7877 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7878
  }
7879
7880
  /**
7881
   * fix -> utf8-win1252 chars
7882
   *
7883
   * @param string $str <p>The input string.</p>
7884
   *
7885
   * @return string
7886
   *
7887
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7888
   */
7889
  public static function utf8_fix_win1252_chars($str)
7890
  {
7891
    return self::fix_simple_utf8($str);
7892
  }
7893
7894
  /**
7895
   * Returns an array with all utf8 whitespace characters.
7896
   *
7897
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7898
   *
7899
   * @author: Derek E. [email protected]
7900
   *
7901
   * @return array <p>
7902
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7903
   *               as defined in above URL.
7904
   *               </p>
7905
   */
7906 1
  public static function whitespace_table()
7907
  {
7908 1
    return self::$WHITESPACE_TABLE;
7909
  }
7910
7911
  /**
7912
   * Limit the number of words in a string.
7913
   *
7914
   * @param string $str      <p>The input string.</p>
7915
   * @param int    $limit    <p>The limit of words as integer.</p>
7916
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7917
   *
7918
   * @return string
7919
   */
7920 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7921
  {
7922 1
    $str = (string)$str;
7923
7924 1
    if (!isset($str[0])) {
7925 1
      return '';
7926
    }
7927
7928
    // init
7929 1
    $limit = (int)$limit;
7930
7931 1
    if ($limit < 1) {
7932 1
      return '';
7933
    }
7934
7935 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7936
7937
    if (
7938 1
        !isset($matches[0])
7939 1
        ||
7940 1
        self::strlen($str) === self::strlen($matches[0])
7941 1
    ) {
7942 1
      return $str;
7943
    }
7944
7945 1
    return self::rtrim($matches[0]) . $strAddOn;
7946
  }
7947
7948
  /**
7949
   * Wraps a string to a given number of characters
7950
   *
7951
   * @link  http://php.net/manual/en/function.wordwrap.php
7952
   *
7953
   * @param string $str   <p>The input string.</p>
7954
   * @param int    $width [optional] <p>The column width.</p>
7955
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7956
   * @param bool   $cut   [optional] <p>
7957
   *                      If the cut is set to true, the string is
7958
   *                      always wrapped at or before the specified width. So if you have
7959
   *                      a word that is larger than the given width, it is broken apart.
7960
   *                      </p>
7961
   *
7962
   * @return string <p>The given string wrapped at the specified column.</p>
7963
   */
7964 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7965
  {
7966 10
    $str = (string)$str;
7967 10
    $break = (string)$break;
7968
7969 10
    if (!isset($str[0], $break[0])) {
7970 3
      return '';
7971
    }
7972
7973 8
    $w = '';
7974 8
    $strSplit = explode($break, $str);
7975 8
    $count = count($strSplit);
7976
7977 8
    $chars = array();
7978
    /** @noinspection ForeachInvariantsInspection */
7979 8
    for ($i = 0; $i < $count; ++$i) {
7980
7981 8
      if ($i) {
7982 1
        $chars[] = $break;
7983 1
        $w .= '#';
7984 1
      }
7985
7986 8
      $c = $strSplit[$i];
7987 8
      unset($strSplit[$i]);
7988
7989 8
      foreach (self::split($c) as $c) {
7990 8
        $chars[] = $c;
7991 8
        $w .= ' ' === $c ? ' ' : '?';
7992 8
      }
7993 8
    }
7994
7995 8
    $strReturn = '';
7996 8
    $j = 0;
7997 8
    $b = $i = -1;
7998 8
    $w = wordwrap($w, $width, '#', $cut);
7999
8000 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8001 6
      for (++$i; $i < $b; ++$i) {
8002 6
        $strReturn .= $chars[$j];
8003 6
        unset($chars[$j++]);
8004 6
      }
8005
8006 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8007 3
        unset($chars[$j++]);
8008 3
      }
8009
8010 6
      $strReturn .= $break;
8011 6
    }
8012
8013 8
    return $strReturn . implode('', $chars);
8014
  }
8015
8016
  /**
8017
   * Returns an array of Unicode White Space characters.
8018
   *
8019
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
8020
   */
8021 1
  public static function ws()
8022
  {
8023 1
    return self::$WHITESPACE;
8024
  }
8025
8026
}
8027