Completed
Push — master ( b25a6d...3859ba )
by Lars
03:21
created

UTF8::substr_replace()   C

Complexity

Conditions 17
Paths 27

Size

Total Lines 77
Code Lines 46

Duplication

Lines 20
Ratio 25.97 %

Code Coverage

Tests 47
CRAP Score 17.1392

Importance

Changes 0
Metric Value
dl 20
loc 77
ccs 47
cts 51
cp 0.9216
rs 5.1861
c 0
b 0
f 0
cc 17
eloc 46
nc 27
nop 4
crap 17.1392

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942 1
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943 1
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965
    // init
966 10
    static $CHAR_CACHE = array();
967
968 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
969
      self::checkForSupport();
970
    }
971
972 10
    if ($encoding !== 'UTF-8') {
973 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
974 2
    }
975
976 View Code Duplication
    if (
977
        $encoding !== 'UTF-8'
978 10
        &&
979
        $encoding !== 'WINDOWS-1252'
980 10
        &&
981 1
        self::$SUPPORT['mbstring'] === false
982 10
    ) {
983
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
984
    }
985
986 10
    $cacheKey = $code_point . $encoding;
987 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
988 8
      return $CHAR_CACHE[$cacheKey];
989
    }
990
991 9
    if (self::$SUPPORT['intlChar'] === true) {
992
      $str = \IntlChar::chr($code_point);
993
994
      if ($encoding !== 'UTF-8') {
995
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
996
      }
997
998
      $CHAR_CACHE[$cacheKey] = $str;
999
      return $str;
1000
    }
1001
1002
    // check type of code_point, only if there is no support for "\IntlChar"
1003 9
    if ((int)$code_point !== $code_point) {
1004 1
      $CHAR_CACHE[$cacheKey] = null;
1005 1
      return null;
1006
    }
1007
1008 9
    if ($code_point <= 0x7F) {
1009 7
      $str = self::chr_and_parse_int($code_point);
1010 9
    } elseif ($code_point <= 0x7FF) {
1011 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
1012 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1013 7
    } elseif ($code_point <= 0xFFFF) {
1014 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
1015 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1016 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1017 7
    } else {
1018 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
1019 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1020 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1021 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1022
    }
1023
1024 9
    if ($encoding !== 'UTF-8') {
1025 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1026 1
    }
1027
1028
    // add into static cache
1029 9
    $CHAR_CACHE[$cacheKey] = $str;
1030
1031 9
    return $str;
1032
  }
1033
1034
  /**
1035
   * @param int $int
1036
   *
1037
   * @return string
1038
   */
1039 24
  private static function chr_and_parse_int($int)
1040
  {
1041 24
    return chr((int)$int);
1042
  }
1043
1044
  /**
1045
   * Applies callback to all characters of a string.
1046
   *
1047
   * @param string|array $callback <p>The callback function.</p>
1048
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1049
   *
1050
   * @return array <p>The outcome of callback.</p>
1051
   */
1052 1
  public static function chr_map($callback, $str)
1053
  {
1054 1
    $chars = self::split($str);
1055
1056 1
    return array_map($callback, $chars);
1057
  }
1058
1059
  /**
1060
   * Generates an array of byte length of each character of a Unicode string.
1061
   *
1062
   * 1 byte => U+0000  - U+007F
1063
   * 2 byte => U+0080  - U+07FF
1064
   * 3 byte => U+0800  - U+FFFF
1065
   * 4 byte => U+10000 - U+10FFFF
1066
   *
1067
   * @param string $str <p>The original Unicode string.</p>
1068
   *
1069
   * @return array <p>An array of byte lengths of each character.</p>
1070
   */
1071 4
  public static function chr_size_list($str)
1072
  {
1073 4
    $str = (string)$str;
1074
1075 4
    if (!isset($str[0])) {
1076 3
      return array();
1077
    }
1078
1079 4
    return array_map(
1080
        function ($data) {
1081 4
          return self::strlen($data, '8BIT');
1082 4
        },
1083 4
        self::split($str)
1084 4
    );
1085
  }
1086
1087
  /**
1088
   * Get a decimal code representation of a specific character.
1089
   *
1090
   * @param string $char <p>The input character.</p>
1091
   *
1092
   * @return int
1093
   */
1094 2
  public static function chr_to_decimal($char)
1095
  {
1096 2
    $char = (string)$char;
1097 2
    $code = self::ord($char[0]);
1098 2
    $bytes = 1;
1099
1100 2
    if (!($code & 0x80)) {
1101
      // 0xxxxxxx
1102 2
      return $code;
1103
    }
1104
1105 2
    if (($code & 0xe0) === 0xc0) {
1106
      // 110xxxxx
1107 2
      $bytes = 2;
1108 2
      $code &= ~0xc0;
1109 2
    } elseif (($code & 0xf0) === 0xe0) {
1110
      // 1110xxxx
1111 2
      $bytes = 3;
1112 2
      $code &= ~0xe0;
1113 2
    } elseif (($code & 0xf8) === 0xf0) {
1114
      // 11110xxx
1115 1
      $bytes = 4;
1116 1
      $code &= ~0xf0;
1117 1
    }
1118
1119 2
    for ($i = 2; $i <= $bytes; $i++) {
1120
      // 10xxxxxx
1121 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1122 2
    }
1123
1124 2
    return $code;
1125
  }
1126
1127
  /**
1128
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1129
   *
1130
   * @param string $char <p>The input character</p>
1131
   * @param string $pfix [optional]
1132
   *
1133
   * @return string <p>The code point encoded as U+xxxx<p>
1134
   */
1135 1
  public static function chr_to_hex($char, $pfix = 'U+')
1136
  {
1137 1
    $char = (string)$char;
1138
1139 1
    if (!isset($char[0])) {
1140 1
      return '';
1141
    }
1142
1143 1
    if ($char === '&#0;') {
1144 1
      $char = '';
1145 1
    }
1146
1147 1
    return self::int_to_hex(self::ord($char), $pfix);
1148
  }
1149
1150
  /**
1151
   * alias for "UTF8::chr_to_decimal()"
1152
   *
1153
   * @see UTF8::chr_to_decimal()
1154
   *
1155
   * @param string $chr
1156
   *
1157
   * @return int
1158
   */
1159 1
  public static function chr_to_int($chr)
1160
  {
1161 1
    return self::chr_to_decimal($chr);
1162
  }
1163
1164
  /**
1165
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1166
   *
1167
   * @param string $body     <p>The original string to be split.</p>
1168
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1169
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1170
   *
1171
   * @return string <p>The chunked string</p>
1172
   */
1173 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1174
  {
1175 1
    return implode($end, self::split($body, $chunklen));
1176
  }
1177
1178
  /**
1179
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1180
   *
1181
   * @param string $str                     <p>The string to be sanitized.</p>
1182
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1183
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1184
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1185
   *                                        => "..."</p>
1186
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1187
   *                                        $normalize_whitespace</p>
1188
   *
1189
   * @return string <p>Clean UTF-8 encoded string.</p>
1190
   */
1191 56
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1192
  {
1193
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1194
    // caused connection reset problem on larger strings
1195
1196
    $regx = '/
1197
      (
1198
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1199
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1200
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1201
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1202
        ){1,100}                      # ...one or more times
1203
      )
1204
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1205
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1206 56
    /x';
1207 56
    $str = preg_replace($regx, '$1', $str);
1208
1209 56
    $str = self::replace_diamond_question_mark($str, '');
1210 56
    $str = self::remove_invisible_characters($str);
1211
1212 56
    if ($normalize_whitespace === true) {
1213 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1214 36
    }
1215
1216 56
    if ($normalize_msword === true) {
1217 15
      $str = self::normalize_msword($str);
1218 15
    }
1219
1220 56
    if ($remove_bom === true) {
1221 35
      $str = self::remove_bom($str);
1222 35
    }
1223
1224 56
    return $str;
1225
  }
1226
1227
  /**
1228
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1229
   *
1230
   * @param string $str <p>The input string.</p>
1231
   *
1232
   * @return string
1233
   */
1234 22 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1235
  {
1236 22
    $str = (string)$str;
1237
1238 22
    if (!isset($str[0])) {
1239 2
      return '';
1240
    }
1241
1242
    // fixed ISO <-> UTF-8 Errors
1243 22
    $str = self::fix_simple_utf8($str);
1244
1245
    // remove all none UTF-8 symbols
1246
    // && remove diamond question mark (�)
1247
    // && remove remove invisible characters (e.g. "\0")
1248
    // && remove BOM
1249
    // && normalize whitespace chars (but keep non-breaking-spaces)
1250 22
    $str = self::clean($str, true, true, false, true);
1251
1252 22
    return (string)$str;
1253
  }
1254
1255
  /**
1256
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1257
   *
1258
   * INFO: opposite to UTF8::string()
1259
   *
1260
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1261
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1262
   *                                    default, code points will be returned as integers.</p>
1263
   *
1264
   * @return array <p>The array of code points.</p>
1265
   */
1266 7
  public static function codepoints($arg, $u_style = false)
1267
  {
1268 7
    if (is_string($arg) === true) {
1269 7
      $arg = self::split($arg);
1270 7
    }
1271
1272 7
    $arg = array_map(
1273
        array(
1274 7
            '\\voku\\helper\\UTF8',
1275 7
            'ord',
1276 7
        ),
1277
        $arg
1278 7
    );
1279
1280 7
    if ($u_style) {
1281 1
      $arg = array_map(
1282
          array(
1283 1
              '\\voku\\helper\\UTF8',
1284 1
              'int_to_hex',
1285 1
          ),
1286
          $arg
1287 1
      );
1288 1
    }
1289
1290 7
    return $arg;
1291
  }
1292
1293
  /**
1294
   * Returns count of characters used in a string.
1295
   *
1296
   * @param string $str       <p>The input string.</p>
1297
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1298
   *
1299
   * @return array <p>An associative array of Character as keys and
1300
   *               their count as values.</p>
1301
   */
1302 7
  public static function count_chars($str, $cleanUtf8 = false)
1303
  {
1304 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1305
  }
1306
1307
  /**
1308
   * Converts a int-value into an UTF-8 character.
1309
   *
1310
   * @param mixed $int
1311
   *
1312
   * @return string
1313
   */
1314 5
  public static function decimal_to_chr($int)
1315
  {
1316 5
    if (Bootup::is_php('5.4') === true) {
1317 5
      $flags = ENT_QUOTES | ENT_HTML5;
1318 5
    } else {
1319
      $flags = ENT_QUOTES;
1320
    }
1321
1322 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1323
  }
1324
1325
  /**
1326
   * Encode a string with a new charset-encoding.
1327
   *
1328
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1329
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1330
   *
1331
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1332
   * @param string $str      <p>The input string</p>
1333
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1334
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1335
   *
1336
   * @return string
1337
   */
1338 12
  public static function encode($encoding, $str, $force = true)
1339
  {
1340 12
    $str = (string)$str;
1341 12
    $encoding = (string)$encoding;
1342
1343 12
    if (!isset($str[0], $encoding[0])) {
1344 5
      return $str;
1345
    }
1346
1347 12
    if ($encoding !== 'UTF-8') {
1348 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1349 2
    }
1350
1351 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1352
      self::checkForSupport();
1353
    }
1354
1355 12
    $encodingDetected = self::str_detect_encoding($str);
1356
1357
    if (
1358
        $encodingDetected !== false
1359 12
        &&
1360
        (
1361
            $force === true
1362 12
            ||
1363
            $encodingDetected !== $encoding
1364 4
        )
1365 12
    ) {
1366
1367 View Code Duplication
      if (
1368
          $encoding === 'UTF-8'
1369 12
          &&
1370
          (
1371
              $force === true
1372 12
              || $encodingDetected === 'UTF-8'
1373 3
              || $encodingDetected === 'WINDOWS-1252'
1374 12
              || $encodingDetected === 'ISO-8859-1'
1375 9
1376
          )
1377
      ) {
1378
        return self::to_utf8($str);
1379
      }
1380 5
1381 View Code Duplication
      if (
1382
          $encoding === 'ISO-8859-1'
1383 2
          &&
1384 1
          (
1385 5
              $force === true
1386 1
              || $encodingDetected === 'ISO-8859-1'
1387
              || $encodingDetected === 'UTF-8'
1388
          )
1389
      ) {
1390
        return self::to_iso8859($str);
1391 5
      }
1392
1393 5 View Code Duplication
      if (
1394 2
          $encoding !== 'UTF-8'
1395 5
          &&
1396
          $encoding !== 'WINDOWS-1252'
1397
          &&
1398
          self::$SUPPORT['mbstring'] === false
1399 5
      ) {
1400 5
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1401 5
      }
1402
1403 5
      $strEncoded = \mb_convert_encoding(
1404
          $str,
1405 5
          $encoding,
1406 5
          $encodingDetected
1407
      );
1408
1409
      if ($strEncoded) {
1410 2
        return $strEncoded;
1411
      }
1412
    }
1413
1414
    return $str;
1415
  }
1416
1417
  /**
1418
   * Reads entire file into a string.
1419
   *
1420
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1421
   *
1422
   * @link http://php.net/manual/en/function.file-get-contents.php
1423
   *
1424
   * @param string        $filename      <p>
1425
   *                                     Name of the file to read.
1426
   *                                     </p>
1427
   * @param int|false     $flags         [optional] <p>
1428
   *                                     Prior to PHP 6, this parameter is called
1429
   *                                     use_include_path and is a bool.
1430
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1431
   *                                     to trigger include path
1432
   *                                     search.
1433
   *                                     </p>
1434
   *                                     <p>
1435
   *                                     The value of flags can be any combination of
1436
   *                                     the following flags (with some restrictions), joined with the
1437
   *                                     binary OR (|)
1438
   *                                     operator.
1439
   *                                     </p>
1440
   *                                     <p>
1441
   *                                     <table>
1442
   *                                     Available flags
1443
   *                                     <tr valign="top">
1444
   *                                     <td>Flag</td>
1445
   *                                     <td>Description</td>
1446
   *                                     </tr>
1447
   *                                     <tr valign="top">
1448
   *                                     <td>
1449
   *                                     FILE_USE_INCLUDE_PATH
1450
   *                                     </td>
1451
   *                                     <td>
1452
   *                                     Search for filename in the include directory.
1453
   *                                     See include_path for more
1454
   *                                     information.
1455
   *                                     </td>
1456
   *                                     </tr>
1457
   *                                     <tr valign="top">
1458
   *                                     <td>
1459
   *                                     FILE_TEXT
1460
   *                                     </td>
1461
   *                                     <td>
1462
   *                                     As of PHP 6, the default encoding of the read
1463
   *                                     data is UTF-8. You can specify a different encoding by creating a
1464
   *                                     custom context or by changing the default using
1465
   *                                     stream_default_encoding. This flag cannot be
1466
   *                                     used with FILE_BINARY.
1467
   *                                     </td>
1468
   *                                     </tr>
1469
   *                                     <tr valign="top">
1470
   *                                     <td>
1471
   *                                     FILE_BINARY
1472
   *                                     </td>
1473
   *                                     <td>
1474
   *                                     With this flag, the file is read in binary mode. This is the default
1475
   *                                     setting and cannot be used with FILE_TEXT.
1476
   *                                     </td>
1477
   *                                     </tr>
1478
   *                                     </table>
1479
   *                                     </p>
1480
   * @param resource|null $context       [optional] <p>
1481
   *                                     A valid context resource created with
1482
   *                                     stream_context_create. If you don't need to use a
1483
   *                                     custom context, you can skip this parameter by &null;.
1484
   *                                     </p>
1485
   * @param int|null      $offset        [optional] <p>
1486
   *                                     The offset where the reading starts.
1487
   *                                     </p>
1488
   * @param int|null      $maxLength     [optional] <p>
1489
   *                                     Maximum length of data read. The default is to read until end
1490
   *                                     of file is reached.
1491
   *                                     </p>
1492
   * @param int           $timeout       <p>The time in seconds for the timeout.</p>
1493
   *
1494
   * @param boolean       $convertToUtf8 <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1495 4
   *                                     or pdf, because they used non default utf-8 chars</p>
1496
   *
1497
   * @return string <p>The function returns the read data or false on failure.</p>
1498 4
   */
1499 4
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1500
  {
1501 4
    // init
1502 3
    $timeout = (int)$timeout;
1503
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1504
1505
    if ($timeout && $context === null) {
1506 3
      $context = stream_context_create(
1507 3
          array(
1508
              'http' =>
1509 3
                  array(
1510 3
                      'timeout' => $timeout,
1511
                  ),
1512 4
          )
1513 4
      );
1514 4
    }
1515
1516 4
    if (!$flags) {
1517 4
      $flags = false;
1518 4
    }
1519
1520 4
    if ($offset === null) {
1521 1
      $offset = 0;
1522 1
    }
1523 4
1524
    if (is_int($maxLength) === true) {
1525
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1526
    } else {
1527 4
      $data = file_get_contents($filename, $flags, $context, $offset);
1528 1
    }
1529
1530
    // return false on error
1531 3
    if ($data === false) {
1532 3
      return false;
1533 3
    }
1534 3
1535
    if ($convertToUtf8 === true) {
1536 3
      $data = self::encode('UTF-8', $data, false);
1537
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1538
    }
1539
1540
    return $data;
1541
  }
1542
1543
  /**
1544
   * Checks if a file starts with BOM (Byte Order Mark) character.
1545
   *
1546 1
   * @param string $file_path <p>Path to a valid file.</p>
1547
   *
1548 1
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1549
   */
1550
  public static function file_has_bom($file_path)
1551
  {
1552
    return self::string_has_bom(file_get_contents($file_path));
1553
  }
1554
1555
  /**
1556
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1557
   *
1558
   * @param mixed  $var
1559
   * @param int    $normalization_form
1560 9
   * @param string $leading_combining
1561
   *
1562 9
   * @return mixed
1563 9
   */
1564 3
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1565
  {
1566 3
    switch (gettype($var)) {
1567 3 View Code Duplication
      case 'array':
1568 3
        foreach ($var as $k => $v) {
1569 9
          /** @noinspection AlterInForeachInspection */
1570 2
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1571 2
        }
1572 2
        break;
1573 2 View Code Duplication
      case 'object':
1574 9
        foreach ($var as $k => $v) {
1575
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1576 8
        }
1577
        break;
1578 2
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1579 2
1580
        if (false !== strpos($var, "\r")) {
1581 8
          // Workaround https://bugs.php.net/65732
1582
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1583 8
        }
1584 6
1585 6
        if (self::is_ascii($var) === false) {
1586
          /** @noinspection PhpUndefinedClassInspection */
1587 6
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1588
            $n = '-';
1589 6
          } else {
1590 3
            /** @noinspection PhpUndefinedClassInspection */
1591 3
            $n = \Normalizer::normalize($var, $normalization_form);
1592 5
1593
            if (isset($n[0])) {
1594
              $var = $n;
1595
            } else {
1596
              $var = self::encode('UTF-8', $var, true);
1597 8
            }
1598 8
          }
1599 6
1600 8
          if (
1601 5
              $var[0] >= "\x80"
1602 8
              &&
1603
              isset($n[0], $leading_combining[0])
1604
              &&
1605 2
              preg_match('/^\p{Mn}/u', $var)
1606 2
          ) {
1607 8
            // Prevent leading combining chars
1608
            // for NFC-safe concatenations.
1609 8
            $var = $leading_combining . $var;
1610 9
          }
1611
        }
1612 9
1613
        break;
1614
    }
1615
1616
    return $var;
1617
  }
1618
1619
  /**
1620
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1621
   *
1622
   * Gets a specific external variable by name and optionally filters it
1623
   *
1624
   * @link  http://php.net/manual/en/function.filter-input.php
1625
   *
1626
   * @param int    $type          <p>
1627
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1628
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1629
   *                              <b>INPUT_ENV</b>.
1630
   *                              </p>
1631
   * @param string $variable_name <p>
1632
   *                              Name of a variable to get.
1633
   *                              </p>
1634
   * @param int    $filter        [optional] <p>
1635
   *                              The ID of the filter to apply. The
1636
   *                              manual page lists the available filters.
1637
   *                              </p>
1638
   * @param mixed  $options       [optional] <p>
1639
   *                              Associative array of options or bitwise disjunction of flags. If filter
1640
   *                              accepts options, flags can be provided in "flags" field of array.
1641
   *                              </p>
1642
   *
1643
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1644
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1645
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1646
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1647
   * @since 5.2.0
1648
   */
1649 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1650
  {
1651
    if (4 > func_num_args()) {
1652
      $var = filter_input($type, $variable_name, $filter);
1653
    } else {
1654
      $var = filter_input($type, $variable_name, $filter, $options);
1655
    }
1656
1657
    return self::filter($var);
1658
  }
1659
1660
  /**
1661
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1662
   *
1663
   * Gets external variables and optionally filters them
1664
   *
1665
   * @link  http://php.net/manual/en/function.filter-input-array.php
1666
   *
1667
   * @param int   $type       <p>
1668
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1669
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1670
   *                          <b>INPUT_ENV</b>.
1671
   *                          </p>
1672
   * @param mixed $definition [optional] <p>
1673
   *                          An array defining the arguments. A valid key is a string
1674
   *                          containing a variable name and a valid value is either a filter type, or an array
1675
   *                          optionally specifying the filter, flags and options. If the value is an
1676
   *                          array, valid keys are filter which specifies the
1677
   *                          filter type,
1678
   *                          flags which specifies any flags that apply to the
1679
   *                          filter, and options which specifies any options that
1680
   *                          apply to the filter. See the example below for a better understanding.
1681
   *                          </p>
1682
   *                          <p>
1683
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1684
   *                          input array are filtered by this filter.
1685
   *                          </p>
1686
   * @param bool  $add_empty  [optional] <p>
1687
   *                          Add missing keys as <b>NULL</b> to the return value.
1688
   *                          </p>
1689
   *
1690
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1691
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1692
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1693
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1694
   * fails.
1695
   * @since 5.2.0
1696
   */
1697 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1698
  {
1699
    if (2 > func_num_args()) {
1700
      $a = filter_input_array($type);
1701
    } else {
1702
      $a = filter_input_array($type, $definition, $add_empty);
1703
    }
1704
1705
    return self::filter($a);
1706
  }
1707
1708
  /**
1709
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1710
   *
1711
   * Filters a variable with a specified filter
1712
   *
1713
   * @link  http://php.net/manual/en/function.filter-var.php
1714
   *
1715
   * @param mixed $variable <p>
1716
   *                        Value to filter.
1717
   *                        </p>
1718
   * @param int   $filter   [optional] <p>
1719
   *                        The ID of the filter to apply. The
1720
   *                        manual page lists the available filters.
1721
   *                        </p>
1722
   * @param mixed $options  [optional] <p>
1723
   *                        Associative array of options or bitwise disjunction of flags. If filter
1724
   *                        accepts options, flags can be provided in "flags" field of array. For
1725
   *                        the "callback" filter, callable type should be passed. The
1726
   *                        callback must accept one argument, the value to be filtered, and return
1727
   *                        the value after filtering/sanitizing it.
1728
   *                        </p>
1729
   *                        <p>
1730
   *                        <code>
1731
   *                        // for filters that accept options, use this format
1732
   *                        $options = array(
1733
   *                        'options' => array(
1734
   *                        'default' => 3, // value to return if the filter fails
1735
   *                        // other options here
1736
   *                        'min_range' => 0
1737
   *                        ),
1738
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1739
   *                        );
1740
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1741
   *                        // for filter that only accept flags, you can pass them directly
1742
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1743
   *                        // for filter that only accept flags, you can also pass as an array
1744
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1745
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1746
   *                        // callback validate filter
1747
   *                        function foo($value)
1748
   *                        {
1749
   *                        // Expected format: Surname, GivenNames
1750
   *                        if (strpos($value, ", ") === false) return false;
1751
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1752
   *                        $empty = (empty($surname) || empty($givennames));
1753
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1754
   *                        if ($empty || $notstrings) {
1755
   *                        return false;
1756
   *                        } else {
1757
   *                        return $value;
1758
   *                        }
1759
   *                        }
1760
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1761
   *                        </code>
1762
   *                        </p>
1763 1
   *
1764
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1765 1
   * @since 5.2.0
1766 1
   */
1767 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1768 1
  {
1769
    if (3 > func_num_args()) {
1770
      $variable = filter_var($variable, $filter);
1771 1
    } else {
1772
      $variable = filter_var($variable, $filter, $options);
1773
    }
1774
1775
    return self::filter($variable);
1776
  }
1777
1778
  /**
1779
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1780
   *
1781
   * Gets multiple variables and optionally filters them
1782
   *
1783
   * @link  http://php.net/manual/en/function.filter-var-array.php
1784
   *
1785
   * @param array $data       <p>
1786
   *                          An array with string keys containing the data to filter.
1787
   *                          </p>
1788
   * @param mixed $definition [optional] <p>
1789
   *                          An array defining the arguments. A valid key is a string
1790
   *                          containing a variable name and a valid value is either a
1791
   *                          filter type, or an
1792
   *                          array optionally specifying the filter, flags and options.
1793
   *                          If the value is an array, valid keys are filter
1794
   *                          which specifies the filter type,
1795
   *                          flags which specifies any flags that apply to the
1796
   *                          filter, and options which specifies any options that
1797
   *                          apply to the filter. See the example below for a better understanding.
1798
   *                          </p>
1799
   *                          <p>
1800
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1801
   *                          input array are filtered by this filter.
1802
   *                          </p>
1803
   * @param bool  $add_empty  [optional] <p>
1804
   *                          Add missing keys as <b>NULL</b> to the return value.
1805
   *                          </p>
1806
   *
1807
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1808 1
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1809
   * the variable is not set.
1810 1
   * @since 5.2.0
1811 1
   */
1812 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1813 1
  {
1814
    if (2 > func_num_args()) {
1815
      $a = filter_var_array($data);
1816 1
    } else {
1817
      $a = filter_var_array($data, $definition, $add_empty);
1818
    }
1819
1820
    return self::filter($a);
1821
  }
1822
1823
  /**
1824
   * Check if the number of unicode characters are not more than the specified integer.
1825
   *
1826
   * @param string $str      The original string to be checked.
1827 1
   * @param int    $box_size The size in number of chars to be checked against string.
1828
   *
1829 1
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1830
   */
1831
  public static function fits_inside($str, $box_size)
1832
  {
1833
    return (self::strlen($str) <= $box_size);
1834
  }
1835
1836
  /**
1837
   * Try to fix simple broken UTF-8 strings.
1838
   *
1839
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1840
   *
1841
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1842
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1843
   * See: http://en.wikipedia.org/wiki/Windows-1252
1844
   *
1845 27
   * @param string $str <p>The input string</p>
1846
   *
1847
   * @return string
1848 27
   */
1849 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1850 27
  {
1851 2
    // init
1852
    $str = (string)$str;
1853
1854 27
    if (!isset($str[0])) {
1855 27
      return '';
1856
    }
1857 27
1858 1
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1859 1
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1860 1
1861
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1862 27
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1863
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1864
    }
1865
1866
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1867
  }
1868
1869
  /**
1870
   * Fix a double (or multiple) encoded UTF8 string.
1871
   *
1872
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1873 1
   *
1874
   * @return string|string[] <p>Will return the fixed input-"array" or
1875 1
   *                         the fixed input-"string".</p>
1876
   */
1877
  public static function fix_utf8($str)
1878 1
  {
1879
    if (is_array($str) === true) {
1880
1881 1
      /** @noinspection ForeachSourceInspection */
1882 1
      foreach ($str as $k => $v) {
1883
        /** @noinspection AlterInForeachInspection */
1884 1
        /** @noinspection OffsetOperationsInspection */
1885
        $str[$k] = self::fix_utf8($v);
1886
      }
1887 1
1888 1
      return $str;
1889 1
    }
1890 1
1891 1
    $last = '';
1892 1
    while ($last !== $str) {
1893 1
      $last = $str;
1894
      $str = self::to_utf8(
1895 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1894 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1896
      );
1897
    }
1898
1899
    return $str;
1900
  }
1901
1902
  /**
1903
   * Get character of a specific character.
1904
   *
1905 1
   * @param string $char
1906
   *
1907 1
   * @return string <p>'RTL' or 'LTR'</p>
1908
   */
1909
  public static function getCharDirection($char)
1910
  {
1911 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1912
      self::checkForSupport();
1913
    }
1914
1915
    if (self::$SUPPORT['intlChar'] === true) {
1916
      $tmpReturn = \IntlChar::charDirection($char);
1917
1918
      // from "IntlChar"-Class
1919
      $charDirection = array(
1920
          'RTL' => array(1, 13, 14, 15, 21),
1921
          'LTR' => array(0, 11, 12, 20),
1922
      );
1923
1924
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1925
        return 'LTR';
1926
      }
1927
1928
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1929 1
        return 'RTL';
1930
      }
1931 1
    }
1932 1
1933
    $c = static::chr_to_decimal($char);
1934
1935 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1936
      return 'LTR';
1937 1
    }
1938 1
1939 1
    if (0x85e >= $c) {
1940 1
1941 1
      if (0x5be === $c ||
1942 1
          0x5c0 === $c ||
1943 1
          0x5c3 === $c ||
1944 1
          0x5c6 === $c ||
1945 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1946 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1947 1
          0x608 === $c ||
1948 1
          0x60b === $c ||
1949 1
          0x60d === $c ||
1950 1
          0x61b === $c ||
1951 1
          (0x61e <= $c && 0x64a >= $c) ||
1952 1
          (0x66d <= $c && 0x66f >= $c) ||
1953 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1954 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1955 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1956 1
          (0x6fa <= $c && 0x70d >= $c) ||
1957 1
          0x710 === $c ||
1958 1
          (0x712 <= $c && 0x72f >= $c) ||
1959 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1960 1
          0x7b1 === $c ||
1961 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1962 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1963 1
          0x7fa === $c ||
1964 1
          (0x800 <= $c && 0x815 >= $c) ||
1965 1
          0x81a === $c ||
1966
          0x824 === $c ||
1967 1
          0x828 === $c ||
1968 1
          (0x830 <= $c && 0x83e >= $c) ||
1969
          (0x840 <= $c && 0x858 >= $c) ||
1970
          0x85e === $c
1971 1
      ) {
1972
        return 'RTL';
1973
      }
1974
1975 1
    } elseif (0x200f === $c) {
1976
1977 1
      return 'RTL';
1978 1
1979 1
    } elseif (0xfb1d <= $c) {
1980 1
1981 1
      if (0xfb1d === $c ||
1982 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1983 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1984 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1985 1
          0xfb3e === $c ||
1986 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1987 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1988 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1989 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1990 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1991 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1992 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1993 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1994 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1995 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1996 1
          0x10808 === $c ||
1997 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1998 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1999 1
          0x1083c === $c ||
2000 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2001 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2002 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2003 1
          (0x10920 <= $c && 0x10939 >= $c) ||
2004 1
          0x1093f === $c ||
2005 1
          0x10a00 === $c ||
2006 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2007 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2008 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2009 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2010 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2011
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2012 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2013 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2014
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2015
          (0x10b78 <= $c && 0x10b7f >= $c)
2016
      ) {
2017 1
        return 'RTL';
2018
      }
2019
    }
2020
2021
    return 'LTR';
2022
  }
2023
2024
  /**
2025
   * get data from "/data/*.ser"
2026
   *
2027 4
   * @param string $file
2028
   *
2029 4
   * @return bool|string|array|int <p>Will return false on error.</p>
2030 4
   */
2031
  private static function getData($file)
2032 4
  {
2033
    $file = __DIR__ . '/data/' . $file . '.php';
2034
    if (file_exists($file)) {
2035 1
      /** @noinspection PhpIncludeInspection */
2036
      return require $file;
2037
    }
2038
2039
    return false;
2040
  }
2041
2042
  /**
2043
   * Check for php-support.
2044
   *
2045
   * @param string|null $key
2046
   *
2047 7
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2048
   *               return bool-value, if $key is used and available<br>
2049 7
   *               otherwise return null</p>
2050
   */
2051
  public static function getSupportInfo($key = null)
2052
  {
2053 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2054 2
      self::checkForSupport();
2055
    }
2056
2057 5
    if ($key === null) {
2058
      return self::$SUPPORT;
2059
    }
2060
2061 5
    if (!isset(self::$SUPPORT[$key])) {
2062
      return null;
2063
    }
2064
2065
    return self::$SUPPORT[$key];
2066
  }
2067
2068
  /**
2069
   * alias for "UTF8::string_has_bom()"
2070
   *
2071
   * @see UTF8::string_has_bom()
2072
   *
2073
   * @param string $str
2074
   *
2075
   * @return bool
2076
   *
2077
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2078
   */
2079
  public static function hasBom($str)
2080
  {
2081
    return self::string_has_bom($str);
2082
  }
2083
2084
  /**
2085
   * Converts a hexadecimal-value into an UTF-8 character.
2086
   *
2087 2
   * @param string $hexdec <p>The hexadecimal value.</p>
2088
   *
2089 2
   * @return string|false <p>One single UTF-8 character.</p>
2090
   */
2091
  public static function hex_to_chr($hexdec)
2092
  {
2093
    return self::decimal_to_chr(hexdec($hexdec));
2094
  }
2095
2096
  /**
2097
   * Converts hexadecimal U+xxxx code point representation to integer.
2098
   *
2099
   * INFO: opposite to UTF8::int_to_hex()
2100
   *
2101 1
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2102
   *
2103 1
   * @return int|false <p>The code point, or false on failure.</p>
2104
   */
2105 1
  public static function hex_to_int($hexDec)
2106 1
  {
2107
    $hexDec = (string)$hexDec;
2108
2109 1
    if (!isset($hexDec[0])) {
2110 1
      return false;
2111
    }
2112
2113 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2114
      return intval($match[1], 16);
2115
    }
2116
2117
    return false;
2118
  }
2119
2120
  /**
2121
   * alias for "UTF8::html_entity_decode()"
2122
   *
2123
   * @see UTF8::html_entity_decode()
2124
   *
2125
   * @param string $str
2126
   * @param int    $flags
2127 1
   * @param string $encoding
2128
   *
2129 1
   * @return string
2130
   */
2131
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2132
  {
2133
    return self::html_entity_decode($str, $flags, $encoding);
2134
  }
2135
2136
  /**
2137
   * Converts a UTF-8 string to a series of HTML numbered entities.
2138
   *
2139
   * INFO: opposite to UTF8::html_decode()
2140
   *
2141
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2142
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2143 2
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2144
   *
2145
   * @return string <p>HTML numbered entities.</p>
2146 2
   */
2147
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2148 2
  {
2149 1
    // init
2150
    $str = (string)$str;
2151
2152 2
    if (!isset($str[0])) {
2153 1
      return '';
2154 1
    }
2155
2156
    if ($encoding !== 'UTF-8') {
2157 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2158
    }
2159 2
2160 2
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2161 1
    if (function_exists('mb_encode_numericentity')) {
2162 1
2163
      $startCode = 0x00;
2164 2
      if ($keepAsciiChars === true) {
2165 2
        $startCode = 0x80;
2166 2
      }
2167
2168 2
      return mb_encode_numericentity(
2169
          $str,
2170
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2171
          $encoding
2172
      );
2173
    }
2174
2175
    return implode(
2176
        '',
2177
        array_map(
2178
            function ($data) use ($keepAsciiChars, $encoding) {
2179
              return self::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2180
            },
2181
            self::split($str)
2182
        )
2183
    );
2184
  }
2185
2186
  /**
2187
   * UTF-8 version of html_entity_decode()
2188
   *
2189
   * The reason we are not using html_entity_decode() by itself is because
2190
   * while it is not technically correct to leave out the semicolon
2191
   * at the end of an entity most browsers will still interpret the entity
2192
   * correctly. html_entity_decode() does not convert entities without
2193
   * semicolons, so we are left with our own little solution here. Bummer.
2194
   *
2195
   * Convert all HTML entities to their applicable characters
2196
   *
2197
   * INFO: opposite to UTF8::html_encode()
2198
   *
2199
   * @link http://php.net/manual/en/function.html-entity-decode.php
2200
   *
2201
   * @param string $str      <p>
2202
   *                         The input string.
2203
   *                         </p>
2204
   * @param int    $flags    [optional] <p>
2205
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2206
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2207
   *                         <table>
2208
   *                         Available <i>flags</i> constants
2209
   *                         <tr valign="top">
2210
   *                         <td>Constant Name</td>
2211
   *                         <td>Description</td>
2212
   *                         </tr>
2213
   *                         <tr valign="top">
2214
   *                         <td><b>ENT_COMPAT</b></td>
2215
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2216
   *                         </tr>
2217
   *                         <tr valign="top">
2218
   *                         <td><b>ENT_QUOTES</b></td>
2219
   *                         <td>Will convert both double and single quotes.</td>
2220
   *                         </tr>
2221
   *                         <tr valign="top">
2222
   *                         <td><b>ENT_NOQUOTES</b></td>
2223
   *                         <td>Will leave both double and single quotes unconverted.</td>
2224
   *                         </tr>
2225
   *                         <tr valign="top">
2226
   *                         <td><b>ENT_HTML401</b></td>
2227
   *                         <td>
2228
   *                         Handle code as HTML 4.01.
2229
   *                         </td>
2230
   *                         </tr>
2231
   *                         <tr valign="top">
2232
   *                         <td><b>ENT_XML1</b></td>
2233
   *                         <td>
2234
   *                         Handle code as XML 1.
2235
   *                         </td>
2236
   *                         </tr>
2237
   *                         <tr valign="top">
2238
   *                         <td><b>ENT_XHTML</b></td>
2239
   *                         <td>
2240
   *                         Handle code as XHTML.
2241
   *                         </td>
2242
   *                         </tr>
2243
   *                         <tr valign="top">
2244
   *                         <td><b>ENT_HTML5</b></td>
2245
   *                         <td>
2246
   *                         Handle code as HTML 5.
2247
   *                         </td>
2248
   *                         </tr>
2249
   *                         </table>
2250
   *                         </p>
2251 17
   * @param string $encoding [optional] <p>Encoding to use.</p>
2252
   *
2253
   * @return string <p>The decoded string.</p>
2254 17
   */
2255
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2256 17
  {
2257 6
    // init
2258
    $str = (string)$str;
2259
2260 17
    if (!isset($str[0])) {
2261 10
      return '';
2262
    }
2263
2264
    if (!isset($str[3])) { // examples: &; || &x;
2265 16
      return $str;
2266 16
    }
2267
2268 16
    if (
2269 16
        strpos($str, '&') === false
2270 10
        ||
2271 10
        (
2272 16
            strpos($str, '&#') === false
2273 9
            &&
2274
            strpos($str, ';') === false
2275
        )
2276 16
    ) {
2277 2
      return $str;
2278 2
    }
2279
2280 16
    if ($encoding !== 'UTF-8') {
2281 5
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2282 5
    }
2283 5
2284
    if ($flags === null) {
2285
      if (Bootup::is_php('5.4') === true) {
2286 5
        $flags = ENT_QUOTES | ENT_HTML5;
2287
      } else {
2288
        $flags = ENT_QUOTES;
2289
      }
2290 16
    }
2291
2292 16 View Code Duplication
    if (
2293 2
        $encoding !== 'UTF-8'
2294 16
        &&
2295
        $encoding !== 'WINDOWS-1252'
2296
        &&
2297
        self::$SUPPORT['mbstring'] === false
2298
    ) {
2299 16
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2300
    }
2301 16
2302 16
    do {
2303
      $str_compare = $str;
2304 14
2305
      $str = preg_replace_callback(
2306 14
          "/&#\d{2,6};/",
2307 13
          function ($matches) use ($encoding) {
2308
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2309
2310 7
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2311 16
              return $returnTmp;
2312
            }
2313 16
2314
            return $matches[0];
2315
          },
2316 16
          $str
2317 16
      );
2318 16
2319
      // decode numeric & UTF16 two byte entities
2320 16
      $str = html_entity_decode(
2321
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2322 16
          $flags,
2323
          $encoding
2324 16
      );
2325
2326
    } while ($str_compare !== $str);
2327
2328
    return $str;
2329
  }
2330
2331
  /**
2332
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2333
   *
2334
   * @link http://php.net/manual/en/function.htmlentities.php
2335
   *
2336
   * @param string $str           <p>
2337
   *                              The input string.
2338
   *                              </p>
2339
   * @param int    $flags         [optional] <p>
2340
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2341
   *                              invalid code unit sequences and the used document type. The default is
2342
   *                              ENT_COMPAT | ENT_HTML401.
2343
   *                              <table>
2344
   *                              Available <i>flags</i> constants
2345
   *                              <tr valign="top">
2346
   *                              <td>Constant Name</td>
2347
   *                              <td>Description</td>
2348
   *                              </tr>
2349
   *                              <tr valign="top">
2350
   *                              <td><b>ENT_COMPAT</b></td>
2351
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_QUOTES</b></td>
2355
   *                              <td>Will convert both double and single quotes.</td>
2356
   *                              </tr>
2357
   *                              <tr valign="top">
2358
   *                              <td><b>ENT_NOQUOTES</b></td>
2359
   *                              <td>Will leave both double and single quotes unconverted.</td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_IGNORE</b></td>
2363
   *                              <td>
2364
   *                              Silently discard invalid code unit sequences instead of returning
2365
   *                              an empty string. Using this flag is discouraged as it
2366
   *                              may have security implications.
2367
   *                              </td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2371
   *                              <td>
2372
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2373
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2374
   *                              </td>
2375
   *                              </tr>
2376
   *                              <tr valign="top">
2377
   *                              <td><b>ENT_DISALLOWED</b></td>
2378
   *                              <td>
2379
   *                              Replace invalid code points for the given document type with a
2380
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2381
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2382
   *                              instance, to ensure the well-formedness of XML documents with
2383
   *                              embedded external content.
2384
   *                              </td>
2385
   *                              </tr>
2386
   *                              <tr valign="top">
2387
   *                              <td><b>ENT_HTML401</b></td>
2388
   *                              <td>
2389
   *                              Handle code as HTML 4.01.
2390
   *                              </td>
2391
   *                              </tr>
2392
   *                              <tr valign="top">
2393
   *                              <td><b>ENT_XML1</b></td>
2394
   *                              <td>
2395
   *                              Handle code as XML 1.
2396
   *                              </td>
2397
   *                              </tr>
2398
   *                              <tr valign="top">
2399
   *                              <td><b>ENT_XHTML</b></td>
2400
   *                              <td>
2401
   *                              Handle code as XHTML.
2402
   *                              </td>
2403
   *                              </tr>
2404
   *                              <tr valign="top">
2405
   *                              <td><b>ENT_HTML5</b></td>
2406
   *                              <td>
2407
   *                              Handle code as HTML 5.
2408
   *                              </td>
2409
   *                              </tr>
2410
   *                              </table>
2411
   *                              </p>
2412
   * @param string $encoding      [optional] <p>
2413
   *                              Like <b>htmlspecialchars</b>,
2414
   *                              <b>htmlentities</b> takes an optional third argument
2415
   *                              <i>encoding</i> which defines encoding used in
2416
   *                              conversion.
2417
   *                              Although this argument is technically optional, you are highly
2418
   *                              encouraged to specify the correct value for your code.
2419
   *                              </p>
2420
   * @param bool   $double_encode [optional] <p>
2421
   *                              When <i>double_encode</i> is turned off PHP will not
2422
   *                              encode existing html entities. The default is to convert everything.
2423
   *                              </p>
2424
   *
2425
   *
2426
   * @return string the encoded string.
2427
   * </p>
2428
   * <p>
2429
   * If the input <i>string</i> contains an invalid code unit
2430 2
   * sequence within the given <i>encoding</i> an empty string
2431
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2432 2
   * <b>ENT_SUBSTITUTE</b> flags are set.
2433 1
   */
2434 1
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2435
  {
2436 2
    if ($encoding !== 'UTF-8') {
2437
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2438
    }
2439
2440
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2441
2442
    /**
2443
     * PHP doesn't replace a backslash to its html entity since this is something
2444
     * that's mostly used to escape characters when inserting in a database. Since
2445
     * we're using a decent database layer, we don't need this shit and we're replacing
2446 2
     * the double backslashes by its' html entity equivalent.
2447
     *
2448 2
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2449 1
     */
2450
    $str = str_replace('\\', '&#92;', $str);
2451
2452 2
    if ($encoding !== 'UTF-8') {
2453 2
      return $str;
2454 2
    }
2455 2
2456 2
    $byteLengths = self::chr_size_list($str);
2457 1
    $search = array();
2458
    $replacements = array();
2459 1
    foreach ($byteLengths as $counter => $byteLength) {
2460 1
      if ($byteLength >= 3) {
2461 1
        $char = self::access($str, $counter);
2462 1
2463 1
        if (!isset($replacements[$char])) {
2464 2
          $search[$char] = $char;
2465
          $replacements[$char] = self::html_encode($char);
2466 2
        }
2467
      }
2468
    }
2469
2470
    return str_replace($search, $replacements, $str);
2471
  }
2472
2473
  /**
2474
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2475
   *
2476
   * INFO: Take a look at "UTF8::htmlentities()"
2477
   *
2478
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2479
   *
2480
   * @param string $str           <p>
2481
   *                              The string being converted.
2482
   *                              </p>
2483
   * @param int    $flags         [optional] <p>
2484
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2485
   *                              invalid code unit sequences and the used document type. The default is
2486
   *                              ENT_COMPAT | ENT_HTML401.
2487
   *                              <table>
2488
   *                              Available <i>flags</i> constants
2489
   *                              <tr valign="top">
2490
   *                              <td>Constant Name</td>
2491
   *                              <td>Description</td>
2492
   *                              </tr>
2493
   *                              <tr valign="top">
2494
   *                              <td><b>ENT_COMPAT</b></td>
2495
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2496
   *                              </tr>
2497
   *                              <tr valign="top">
2498
   *                              <td><b>ENT_QUOTES</b></td>
2499
   *                              <td>Will convert both double and single quotes.</td>
2500
   *                              </tr>
2501
   *                              <tr valign="top">
2502
   *                              <td><b>ENT_NOQUOTES</b></td>
2503
   *                              <td>Will leave both double and single quotes unconverted.</td>
2504
   *                              </tr>
2505
   *                              <tr valign="top">
2506
   *                              <td><b>ENT_IGNORE</b></td>
2507
   *                              <td>
2508
   *                              Silently discard invalid code unit sequences instead of returning
2509
   *                              an empty string. Using this flag is discouraged as it
2510
   *                              may have security implications.
2511
   *                              </td>
2512
   *                              </tr>
2513
   *                              <tr valign="top">
2514
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2515
   *                              <td>
2516
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2517
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2518
   *                              </td>
2519
   *                              </tr>
2520
   *                              <tr valign="top">
2521
   *                              <td><b>ENT_DISALLOWED</b></td>
2522
   *                              <td>
2523
   *                              Replace invalid code points for the given document type with a
2524
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2525
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2526
   *                              instance, to ensure the well-formedness of XML documents with
2527
   *                              embedded external content.
2528
   *                              </td>
2529
   *                              </tr>
2530
   *                              <tr valign="top">
2531
   *                              <td><b>ENT_HTML401</b></td>
2532
   *                              <td>
2533
   *                              Handle code as HTML 4.01.
2534
   *                              </td>
2535
   *                              </tr>
2536
   *                              <tr valign="top">
2537
   *                              <td><b>ENT_XML1</b></td>
2538
   *                              <td>
2539
   *                              Handle code as XML 1.
2540
   *                              </td>
2541
   *                              </tr>
2542
   *                              <tr valign="top">
2543
   *                              <td><b>ENT_XHTML</b></td>
2544
   *                              <td>
2545
   *                              Handle code as XHTML.
2546
   *                              </td>
2547
   *                              </tr>
2548
   *                              <tr valign="top">
2549
   *                              <td><b>ENT_HTML5</b></td>
2550
   *                              <td>
2551
   *                              Handle code as HTML 5.
2552
   *                              </td>
2553
   *                              </tr>
2554
   *                              </table>
2555
   *                              </p>
2556
   * @param string $encoding      [optional] <p>
2557
   *                              Defines encoding used in conversion.
2558
   *                              </p>
2559
   *                              <p>
2560
   *                              For the purposes of this function, the encodings
2561
   *                              ISO-8859-1, ISO-8859-15,
2562
   *                              UTF-8, cp866,
2563
   *                              cp1251, cp1252, and
2564
   *                              KOI8-R are effectively equivalent, provided the
2565
   *                              <i>string</i> itself is valid for the encoding, as
2566
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2567
   *                              the same positions in all of these encodings.
2568
   *                              </p>
2569
   * @param bool   $double_encode [optional] <p>
2570
   *                              When <i>double_encode</i> is turned off PHP will not
2571
   *                              encode existing html entities, the default is to convert everything.
2572
   *                              </p>
2573
   *
2574
   * @return string The converted string.
2575
   * </p>
2576
   * <p>
2577
   * If the input <i>string</i> contains an invalid code unit
2578 1
   * sequence within the given <i>encoding</i> an empty string
2579
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2580 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2581 1
   */
2582 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2583
  {
2584 1
    if ($encoding !== 'UTF-8') {
2585
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2586
    }
2587
2588
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2589
  }
2590
2591
  /**
2592 1
   * Checks whether iconv is available on the server.
2593
   *
2594 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2595
   */
2596
  public static function iconv_loaded()
2597
  {
2598 1
    $return = extension_loaded('iconv') ? true : false;
2599
2600 1
    // INFO: "default_charset" is already set by the "Bootup"-class
2601 1
2602 1
    if (Bootup::is_php('5.6') === false) {
2603 1
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2604
      iconv_set_encoding('input_encoding', 'UTF-8');
2605 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2606
      iconv_set_encoding('internal_encoding', 'UTF-8');
2607
    }
2608
2609
    return $return;
2610
  }
2611
2612
  /**
2613
   * alias for "UTF8::decimal_to_chr()"
2614
   *
2615
   * @see UTF8::decimal_to_chr()
2616
   *
2617 2
   * @param mixed $int
2618
   *
2619 2
   * @return string
2620
   */
2621
  public static function int_to_chr($int)
2622
  {
2623
    return self::decimal_to_chr($int);
2624
  }
2625
2626
  /**
2627
   * Converts Integer to hexadecimal U+xxxx code point representation.
2628
   *
2629
   * INFO: opposite to UTF8::hex_to_int()
2630
   *
2631
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2632 3
   * @param string $pfix [optional]
2633
   *
2634 3
   * @return string <p>The code point, or empty string on failure.</p>
2635 3
   */
2636
  public static function int_to_hex($int, $pfix = 'U+')
2637 3
  {
2638
    if ((int)$int === $int) {
2639 3
      $hex = dechex($int);
2640
2641
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2642 1
2643
      return $pfix . $hex;
2644
    }
2645
2646
    return '';
2647
  }
2648
2649
  /**
2650 1
   * Checks whether intl-char is available on the server.
2651
   *
2652
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2653 1
   */
2654 1
  public static function intlChar_loaded()
2655
  {
2656 1
    return (
2657
        Bootup::is_php('7.0') === true
2658
        &&
2659
        class_exists('IntlChar') === true
2660
    );
2661
  }
2662
2663
  /**
2664 4
   * Checks whether intl is available on the server.
2665
   *
2666 4
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2667
   */
2668
  public static function intl_loaded()
2669
  {
2670
    return extension_loaded('intl') ? true : false;
2671
  }
2672
2673
  /**
2674
   * alias for "UTF8::is_ascii()"
2675
   *
2676
   * @see UTF8::is_ascii()
2677
   *
2678
   * @param string $str
2679
   *
2680
   * @return boolean
2681
   *
2682
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2683
   */
2684
  public static function isAscii($str)
2685
  {
2686
    return self::is_ascii($str);
2687
  }
2688
2689
  /**
2690
   * alias for "UTF8::is_base64()"
2691
   *
2692
   * @see UTF8::is_base64()
2693
   *
2694
   * @param string $str
2695
   *
2696
   * @return bool
2697
   *
2698
   * @deprecated <p>use "UTF8::is_base64()"</p>
2699
   */
2700
  public static function isBase64($str)
2701
  {
2702
    return self::is_base64($str);
2703
  }
2704
2705
  /**
2706
   * alias for "UTF8::is_binary()"
2707
   *
2708
   * @see UTF8::is_binary()
2709
   *
2710
   * @param string $str
2711
   *
2712
   * @return bool
2713
   *
2714
   * @deprecated <p>use "UTF8::is_binary()"</p>
2715
   */
2716
  public static function isBinary($str)
2717
  {
2718
    return self::is_binary($str);
2719
  }
2720
2721
  /**
2722
   * alias for "UTF8::is_bom()"
2723
   *
2724
   * @see UTF8::is_bom()
2725
   *
2726
   * @param string $utf8_chr
2727
   *
2728
   * @return boolean
2729
   *
2730
   * @deprecated <p>use "UTF8::is_bom()"</p>
2731
   */
2732
  public static function isBom($utf8_chr)
2733
  {
2734
    return self::is_bom($utf8_chr);
2735
  }
2736
2737
  /**
2738
   * alias for "UTF8::is_html()"
2739
   *
2740
   * @see UTF8::is_html()
2741
   *
2742
   * @param string $str
2743
   *
2744
   * @return boolean
2745
   *
2746
   * @deprecated <p>use "UTF8::is_html()"</p>
2747
   */
2748
  public static function isHtml($str)
2749
  {
2750
    return self::is_html($str);
2751
  }
2752
2753
  /**
2754
   * alias for "UTF8::is_json()"
2755
   *
2756
   * @see UTF8::is_json()
2757
   *
2758
   * @param string $str
2759
   *
2760
   * @return bool
2761
   *
2762
   * @deprecated <p>use "UTF8::is_json()"</p>
2763
   */
2764
  public static function isJson($str)
2765
  {
2766
    return self::is_json($str);
2767
  }
2768
2769
  /**
2770
   * alias for "UTF8::is_utf16()"
2771
   *
2772
   * @see UTF8::is_utf16()
2773
   *
2774
   * @param string $str
2775
   *
2776
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2777
   *
2778
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2779
   */
2780
  public static function isUtf16($str)
2781
  {
2782
    return self::is_utf16($str);
2783
  }
2784
2785
  /**
2786
   * alias for "UTF8::is_utf32()"
2787
   *
2788
   * @see UTF8::is_utf32()
2789
   *
2790
   * @param string $str
2791
   *
2792
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2793
   *
2794
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2795
   */
2796
  public static function isUtf32($str)
2797
  {
2798
    return self::is_utf32($str);
2799
  }
2800
2801
  /**
2802
   * alias for "UTF8::is_utf8()"
2803
   *
2804
   * @see UTF8::is_utf8()
2805
   *
2806
   * @param string $str
2807
   * @param bool   $strict
2808
   *
2809
   * @return bool
2810
   *
2811
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2812
   */
2813
  public static function isUtf8($str, $strict = false)
2814
  {
2815
    return self::is_utf8($str, $strict);
2816
  }
2817
2818
  /**
2819
   * Checks if a string is 7 bit ASCII.
2820
   *
2821
   * @param string $str <p>The string to check.</p>
2822
   *
2823
   * @return bool <p>
2824 52
   *              <strong>true</strong> if it is ASCII<br>
2825
   *              <strong>false</strong> otherwise
2826 52
   *              </p>
2827
   */
2828 52
  public static function is_ascii($str)
2829 6
  {
2830
    $str = (string)$str;
2831
2832 51
    if (!isset($str[0])) {
2833
      return true;
2834
    }
2835
2836
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2837
  }
2838
2839
  /**
2840
   * Returns true if the string is base64 encoded, false otherwise.
2841
   *
2842 1
   * @param string $str <p>The input string.</p>
2843
   *
2844 1
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2845
   */
2846 1
  public static function is_base64($str)
2847 1
  {
2848
    $str = (string)$str;
2849
2850 1
    if (!isset($str[0])) {
2851 1
      return false;
2852 1
    }
2853
2854
    $base64String = (string)base64_decode($str, true);
2855 1
    if ($base64String && base64_encode($base64String) === $str) {
2856
      return true;
2857
    }
2858
2859
    return false;
2860
  }
2861
2862
  /**
2863
   * Check if the input is binary... (is look like a hack).
2864
   *
2865 17
   * @param mixed $input
2866
   *
2867 17
   * @return bool
2868
   */
2869 17
  public static function is_binary($input)
2870 4
  {
2871
    $input = (string)$input;
2872
2873 17
    if (!isset($input[0])) {
2874 4
      return false;
2875
    }
2876
2877 17
    if (preg_match('~^[01]+$~', $input)) {
2878 17
      return true;
2879 5
    }
2880
2881
    $testLength = strlen($input);
2882 16
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2883 1
      return true;
2884
    }
2885
2886 16
    if (substr_count($input, "\x00") > 0) {
2887
      return true;
2888
    }
2889
2890
    return false;
2891
  }
2892
2893
  /**
2894
   * Check if the file is binary.
2895
   *
2896
   * @param string $file
2897
   *
2898
   * @return boolean
2899
   */
2900
  public static function is_binary_file($file)
2901
  {
2902
    try {
2903
      $fp = fopen($file, 'rb');
2904
      $block = fread($fp, 512);
2905
      fclose($fp);
2906
    } catch (\Exception $e) {
2907
      $block = '';
2908
    }
2909
2910
    return self::is_binary($block);
2911
  }
2912
2913
  /**
2914
   * Checks if the given string is equal to any "Byte Order Mark".
2915
   *
2916
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2917
   *
2918 1
   * @param string $str <p>The input string.</p>
2919
   *
2920 1
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2921 1
   */
2922 1
  public static function is_bom($str)
2923
  {
2924 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2925
      if ($str === $bomString) {
2926 1
        return true;
2927
      }
2928
    }
2929
2930
    return false;
2931
  }
2932
2933
  /**
2934
   * Check if the string contains any html-tags <lall>.
2935
   *
2936 1
   * @param string $str <p>The input string.</p>
2937
   *
2938 1
   * @return boolean
2939
   */
2940 1
  public static function is_html($str)
2941 1
  {
2942
    $str = (string)$str;
2943
2944
    if (!isset($str[0])) {
2945 1
      return false;
2946
    }
2947 1
2948
    // init
2949 1
    $matches = array();
2950 1
2951
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2952
2953 1
    if (count($matches) === 0) {
2954
      return false;
2955
    }
2956
2957
    return true;
2958
  }
2959
2960
  /**
2961
   * Try to check if "$str" is an json-string.
2962
   *
2963 1
   * @param string $str <p>The input string.</p>
2964
   *
2965 1
   * @return bool
2966
   */
2967 1
  public static function is_json($str)
2968
  {
2969
    $str = (string)$str;
2970
2971 1
    if (!isset($str[0])) {
2972
      return false;
2973
    }
2974
2975 1
    $json = self::json_decode($str);
2976 1
2977 1
    if (
2978 1
        (
2979 1
            is_object($json) === true
2980 1
            ||
2981 1
            is_array($json) === true
2982 1
        )
2983
        &&
2984
        json_last_error() === JSON_ERROR_NONE
2985 1
    ) {
2986
      return true;
2987
    }
2988
2989
    return false;
2990
  }
2991
2992
  /**
2993
   * Check if the string is UTF-16.
2994
   *
2995
   * @param string $str <p>The input string.</p>
2996
   *
2997
   * @return int|false <p>
2998
   *                   <strong>false</strong> if is't not UTF-16,<br>
2999 5
   *                   <strong>1</strong> for UTF-16LE,<br>
3000
   *                   <strong>2</strong> for UTF-16BE.
3001 5
   *                   </p>
3002
   */
3003 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3004
  {
3005 5
    $str = self::remove_bom($str);
3006 5
3007 5
    if (self::is_binary($str) === true) {
3008 5
3009 5
      $maybeUTF16LE = 0;
3010 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3011 5
      if ($test) {
3012 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3013 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3014 2
        if ($test3 === $test) {
3015 2
          $strChars = self::count_chars($str, true);
3016 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3017 5
            if (in_array($test3char, $strChars, true) === true) {
3018 5
              $maybeUTF16LE++;
3019
            }
3020 5
          }
3021 5
        }
3022 5
      }
3023 5
3024 5
      $maybeUTF16BE = 0;
3025 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3026 5
      if ($test) {
3027 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3028 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3029 4
        if ($test3 === $test) {
3030 4
          $strChars = self::count_chars($str, true);
3031 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3032 5
            if (in_array($test3char, $strChars, true) === true) {
3033 5
              $maybeUTF16BE++;
3034
            }
3035 5
          }
3036 4
        }
3037 2
      }
3038
3039
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3040 4
        if ($maybeUTF16LE > $maybeUTF16BE) {
3041
          return 1;
3042
        }
3043 2
3044
        return 2;
3045 2
      }
3046
3047
    }
3048
3049
    return false;
3050
  }
3051
3052
  /**
3053
   * Check if the string is UTF-32.
3054
   *
3055
   * @param string $str
3056
   *
3057
   * @return int|false <p>
3058
   *                   <strong>false</strong> if is't not UTF-32,<br>
3059 2
   *                   <strong>1</strong> for UTF-32LE,<br>
3060
   *                   <strong>2</strong> for UTF-32BE.
3061 2
   *                   </p>
3062
   */
3063 2 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3064
  {
3065 2
    $str = self::remove_bom($str);
3066 2
3067 2
    if (self::is_binary($str) === true) {
3068 2
3069 2
      $maybeUTF32LE = 0;
3070 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3071 2
      if ($test) {
3072 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3073 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3074 1
        if ($test3 === $test) {
3075 1
          $strChars = self::count_chars($str, true);
3076 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3077 2
            if (in_array($test3char, $strChars, true) === true) {
3078 2
              $maybeUTF32LE++;
3079
            }
3080 2
          }
3081 2
        }
3082 2
      }
3083 2
3084 2
      $maybeUTF32BE = 0;
3085 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3086 2
      if ($test) {
3087 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3088 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3089 1
        if ($test3 === $test) {
3090 1
          $strChars = self::count_chars($str, true);
3091 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3092 2
            if (in_array($test3char, $strChars, true) === true) {
3093 2
              $maybeUTF32BE++;
3094
            }
3095 2
          }
3096 1
        }
3097 1
      }
3098
3099
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3100 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3101
          return 1;
3102
        }
3103 2
3104
        return 2;
3105 2
      }
3106
3107
    }
3108
3109
    return false;
3110
  }
3111
3112
  /**
3113
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3114
   *
3115
   * @see    http://hsivonen.iki.fi/php-utf8/
3116
   *
3117
   * @param string $str    <p>The string to be checked.</p>
3118 60
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3119
   *
3120 60
   * @return bool
3121
   */
3122 60
  public static function is_utf8($str, $strict = false)
3123 3
  {
3124
    $str = (string)$str;
3125
3126 58
    if (!isset($str[0])) {
3127 1
      return true;
3128 1
    }
3129
3130
    if ($strict === true) {
3131
      if (self::is_utf16($str) !== false) {
3132
        return false;
3133
      }
3134
3135
      if (self::is_utf32($str) !== false) {
3136 58
        return false;
3137
      }
3138
    }
3139
3140
    if (self::pcre_utf8_support() !== true) {
3141
3142
      // If even just the first character can be matched, when the /u
3143
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3144
      // invalid, nothing at all will match, even if the string contains
3145 58
      // some valid sequences
3146
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3147 58
    }
3148 58
3149
    $mState = 0; // cached expected number of octets after the current octet
3150 58
    // until the beginning of the next UTF8 character sequence
3151
    $mUcs4 = 0; // cached Unicode character
3152
    $mBytes = 1; // cached expected number of octets in the current sequence
3153
3154 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3155
      self::checkForSupport();
3156
    }
3157 58
3158 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3159
      $len = \mb_strlen($str, '8BIT');
3160
    } else {
3161 58
      $len = strlen($str);
3162 58
    }
3163 58
3164
    /** @noinspection ForeachInvariantsInspection */
3165
    for ($i = 0; $i < $len; $i++) {
3166 58
      $in = ord($str[$i]);
3167
      if ($mState === 0) {
3168 53
        // When mState is zero we expect either a US-ASCII character or a
3169 58
        // multi-octet sequence.
3170
        if (0 === (0x80 & $in)) {
3171 48
          // US-ASCII, pass straight through.
3172 48
          $mBytes = 1;
3173 48 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3174 48
          // First octet of 2 octet sequence.
3175 55
          $mUcs4 = $in;
3176
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3177 30
          $mState = 1;
3178 30
          $mBytes = 2;
3179 30
        } elseif (0xE0 === (0xF0 & $in)) {
3180 30
          // First octet of 3 octet sequence.
3181 46
          $mUcs4 = $in;
3182
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3183 12
          $mState = 2;
3184 12
          $mBytes = 3;
3185 12 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3186 12
          // First octet of 4 octet sequence.
3187 22
          $mUcs4 = $in;
3188
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3189
          $mState = 3;
3190
          $mBytes = 4;
3191
        } elseif (0xF8 === (0xFC & $in)) {
3192
          /* First octet of 5 octet sequence.
3193
          *
3194
          * This is illegal because the encoded codepoint must be either
3195
          * (a) not the shortest form or
3196 4
          * (b) outside the Unicode range of 0-0x10FFFF.
3197 4
          * Rather than trying to resynchronize, we will carry on until the end
3198 4
          * of the sequence and let the later error handling code catch it.
3199 4
          */
3200 11
          $mUcs4 = $in;
3201
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3202 4
          $mState = 4;
3203 4
          $mBytes = 5;
3204 4 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3205 4
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3206 4
          $mUcs4 = $in;
3207
          $mUcs4 = ($mUcs4 & 1) << 30;
3208
          $mState = 5;
3209
          $mBytes = 6;
3210 5
        } else {
3211
          /* Current octet is neither in the US-ASCII range nor a legal first
3212 58
           * octet of a multi-octet sequence.
3213
           */
3214
          return false;
3215 53
        }
3216
      } else {
3217 49
        // When mState is non-zero, we expect a continuation of the multi-octet
3218 49
        // sequence
3219 49
        if (0x80 === (0xC0 & $in)) {
3220 49
          // Legal continuation.
3221
          $shift = ($mState - 1) * 6;
3222
          $tmp = $in;
3223
          $tmp = ($tmp & 0x0000003F) << $shift;
3224
          $mUcs4 |= $tmp;
3225 49
          /**
3226
           * End of the multi-octet sequence. mUcs4 now contains the final
3227
           * Unicode code point to be output
3228
           */
3229
          if (0 === --$mState) {
3230
            /*
3231 49
            * Check for illegal sequences and code points.
3232 49
            */
3233 49
            // From Unicode 3.1, non-shortest form is illegal
3234 49
            if (
3235
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3236 49
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3237
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3238 49
                (4 < $mBytes) ||
3239 49
                // From Unicode 3.2, surrogate characters are illegal.
3240 7
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3241
                // Code points outside the Unicode range are illegal.
3242
                ($mUcs4 > 0x10FFFF)
3243 49
            ) {
3244 49
              return false;
3245 49
            }
3246 49
            // initialize UTF8 cache
3247 49
            $mState = 0;
3248
            $mUcs4 = 0;
3249
            $mBytes = 1;
3250
          }
3251
        } else {
3252 26
          /**
3253
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3254
           * Incomplete multi-octet sequence.
3255 58
           */
3256
          return false;
3257 28
        }
3258
      }
3259
    }
3260
3261
    return true;
3262
  }
3263
3264
  /**
3265
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3266
   * Decodes a JSON string
3267
   *
3268
   * @link http://php.net/manual/en/function.json-decode.php
3269
   *
3270
   * @param string $json    <p>
3271
   *                        The <i>json</i> string being decoded.
3272
   *                        </p>
3273
   *                        <p>
3274
   *                        This function only works with UTF-8 encoded strings.
3275
   *                        </p>
3276
   *                        <p>PHP implements a superset of
3277
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3278
   *                        only supports these values when they are nested inside an array or an object.
3279
   *                        </p>
3280
   * @param bool   $assoc   [optional] <p>
3281
   *                        When <b>TRUE</b>, returned objects will be converted into
3282
   *                        associative arrays.
3283
   *                        </p>
3284
   * @param int    $depth   [optional] <p>
3285
   *                        User specified recursion depth.
3286
   *                        </p>
3287
   * @param int    $options [optional] <p>
3288
   *                        Bitmask of JSON decode options. Currently only
3289
   *                        <b>JSON_BIGINT_AS_STRING</b>
3290
   *                        is supported (default is to cast large integers as floats)
3291
   *                        </p>
3292
   *
3293
   * @return mixed the value encoded in <i>json</i> in appropriate
3294
   * PHP type. Values true, false and
3295
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3296 2
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3297
   * <i>json</i> cannot be decoded or if the encoded
3298 2
   * data is deeper than the recursion limit.
3299
   */
3300 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3301 2
  {
3302 2
    $json = (string)self::filter($json);
3303
3304
    if (Bootup::is_php('5.4') === true) {
3305
      $json = json_decode($json, $assoc, $depth, $options);
3306 2
    } else {
3307
      $json = json_decode($json, $assoc, $depth);
3308
    }
3309
3310
    return $json;
3311
  }
3312
3313
  /**
3314
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3315
   * Returns the JSON representation of a value.
3316
   *
3317
   * @link http://php.net/manual/en/function.json-encode.php
3318
   *
3319
   * @param mixed $value   <p>
3320
   *                       The <i>value</i> being encoded. Can be any type except
3321
   *                       a resource.
3322
   *                       </p>
3323
   *                       <p>
3324
   *                       All string data must be UTF-8 encoded.
3325
   *                       </p>
3326
   *                       <p>PHP implements a superset of
3327
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3328
   *                       only supports these values when they are nested inside an array or an object.
3329
   *                       </p>
3330
   * @param int   $options [optional] <p>
3331
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3332
   *                       <b>JSON_HEX_TAG</b>,
3333
   *                       <b>JSON_HEX_AMP</b>,
3334
   *                       <b>JSON_HEX_APOS</b>,
3335
   *                       <b>JSON_NUMERIC_CHECK</b>,
3336
   *                       <b>JSON_PRETTY_PRINT</b>,
3337
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3338
   *                       <b>JSON_FORCE_OBJECT</b>,
3339
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3340
   *                       constants is described on
3341
   *                       the JSON constants page.
3342
   *                       </p>
3343
   * @param int   $depth   [optional] <p>
3344
   *                       Set the maximum depth. Must be greater than zero.
3345 2
   *                       </p>
3346
   *
3347 2
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3348
   */
3349 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3350
  {
3351
    $value = self::filter($value);
3352 2
3353
    if (Bootup::is_php('5.5') === true) {
3354
      $json = json_encode($value, $options, $depth);
3355 2
    } else {
3356
      $json = json_encode($value, $options);
3357
    }
3358
3359
    return $json;
3360
  }
3361
3362
  /**
3363
   * Makes string's first char lowercase.
3364
   *
3365
   * @param string  $str       <p>The input string</p>
3366
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3367 7
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3368
   *
3369 7
   * @return string <p>The resulting string</p>
3370 7
   */
3371
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3372
  {
3373
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3374 7
    if ($strPartTwo === false) {
3375 7
      $strPartTwo = '';
3376 7
    }
3377
3378 7
    $strPartOne = self::strtolower(
3379
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3380 7
        $encoding,
3381
        $cleanUtf8
3382
    );
3383
3384
    return $strPartOne . $strPartTwo;
3385
  }
3386
3387
  /**
3388
   * alias for "UTF8::lcfirst()"
3389
   *
3390
   * @see UTF8::lcfirst()
3391
   *
3392
   * @param string  $word
3393
   * @param string  $encoding
3394 1
   * @param boolean $cleanUtf8
3395
   *
3396 1
   * @return string
3397
   */
3398
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3399
  {
3400
    return self::lcfirst($word, $encoding, $cleanUtf8);
3401
  }
3402
3403
  /**
3404
   * Lowercase for all words in the string.
3405
   *
3406
   * @param string   $str        <p>The input string.</p>
3407
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3408
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3409
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3410 1
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3411
   *
3412 1
   * @return string
3413 1
   */
3414
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3415
  {
3416 1
    if (!$str) {
3417 1
      return '';
3418
    }
3419 1
3420 1
    $words = self::str_to_words($str, $charlist);
3421 1
    $newWords = array();
3422 1
3423
    if (count($exceptions) > 0) {
3424
      $useExceptions = true;
3425 1
    } else {
3426
      $useExceptions = false;
3427 1
    }
3428 1
3429 View Code Duplication
    foreach ($words as $word) {
3430
3431
      if (!$word) {
3432
        continue;
3433 1
      }
3434
3435
      if (
3436 1
          $useExceptions === false
3437 1
          ||
3438 1
          (
3439 1
              $useExceptions === true
3440 1
              &&
3441 1
              !in_array($word, $exceptions, true)
3442
          )
3443 1
      ) {
3444 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3445
      }
3446 1
3447
      $newWords[] = $word;
3448
    }
3449
3450
    return implode('', $newWords);
3451
  }
3452
3453
  /**
3454
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3455
   *
3456
   * @param string $str   <p>The string to be trimmed</p>
3457 24
   * @param string $chars <p>Optional characters to be stripped</p>
3458
   *
3459 24
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3460
   */
3461 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3462 2
  {
3463
    $str = (string)$str;
3464
3465
    if (!isset($str[0])) {
3466 23
      return '';
3467 2
    }
3468
3469
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3470 23
    if ($chars === INF || !$chars) {
3471
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3472
    }
3473
3474
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3475
  }
3476
3477
  /**
3478
   * Returns the UTF-8 character with the maximum code point in the given data.
3479
   *
3480 1
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3481
   *
3482 1
   * @return string <p>The character with the highest code point than others.</p>
3483 1
   */
3484 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3485
  {
3486 1
    if (is_array($arg) === true) {
3487
      $arg = implode('', $arg);
3488
    }
3489
3490
    return self::chr(max(self::codepoints($arg)));
3491
  }
3492
3493
  /**
3494
   * Calculates and returns the maximum number of bytes taken by any
3495
   * UTF-8 encoded character in the given string.
3496
   *
3497 1
   * @param string $str <p>The original Unicode string.</p>
3498
   *
3499 1
   * @return int <p>Max byte lengths of the given chars.</p>
3500 1
   */
3501 1
  public static function max_chr_width($str)
3502
  {
3503
    $bytes = self::chr_size_list($str);
3504 1
    if (count($bytes) > 0) {
3505
      return (int)max($bytes);
3506
    }
3507
3508
    return 0;
3509
  }
3510
3511
  /**
3512 15
   * Checks whether mbstring is available on the server.
3513
   *
3514 15
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3515
   */
3516 15
  public static function mbstring_loaded()
3517 15
  {
3518 15
    $return = extension_loaded('mbstring') ? true : false;
3519
3520 15
    if ($return === true) {
3521
      \mb_internal_encoding('UTF-8');
3522
    }
3523
3524
    return $return;
3525
  }
3526
3527
  /**
3528
   * Returns the UTF-8 character with the minimum code point in the given data.
3529
   *
3530 1
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3531
   *
3532 1
   * @return string <p>The character with the lowest code point than others.</p>
3533 1
   */
3534 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3535
  {
3536 1
    if (is_array($arg) === true) {
3537
      $arg = implode('', $arg);
3538
    }
3539
3540
    return self::chr(min(self::codepoints($arg)));
3541
  }
3542
3543
  /**
3544
   * alias for "UTF8::normalize_encoding()"
3545
   *
3546
   * @see UTF8::normalize_encoding()
3547
   *
3548
   * @param string $encoding
3549
   * @param mixed  $fallback
3550
   *
3551
   * @return string
3552
   *
3553
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3554
   */
3555
  public static function normalizeEncoding($encoding, $fallback = false)
3556
  {
3557
    return self::normalize_encoding($encoding, $fallback);
3558
  }
3559
3560
  /**
3561
   * Normalize the encoding-"name" input.
3562
   *
3563
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3564 77
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3565
   *
3566 77
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3567
   */
3568
  public static function normalize_encoding($encoding, $fallback = false)
3569 77
  {
3570
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3571
3572 77
    // init
3573 2
    $encoding = trim((string)$encoding);
3574
3575 76
    // fallback
3576 1
    if (!$encoding && $fallback) {
3577
      return $fallback;
3578
    }
3579 76
    if (!$encoding) {
3580 1
      return 'UTF-8';
3581
    }
3582
3583 76
    if ('UTF-8' === $encoding) {
3584 6
      return $encoding;
3585
    }
3586
3587 75
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3588 74
      return $encoding;
3589
    }
3590
3591 5
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3592 5
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3593 5
    }
3594
3595
    $encodingOrig = $encoding;
3596 5
    $encoding = strtoupper($encoding);
3597 5
    $encodingUpperHelper = preg_replace('/[^A-Z0-9\s]/', '', $encoding);
3598 5
3599 5
    $equivalences = array(
3600 5
        'ISO8859'     => 'ISO-8859-1',
3601 5
        'ISO88591'    => 'ISO-8859-1',
3602 5
        'ISO'         => 'ISO-8859-1',
3603 5
        'LATIN'       => 'ISO-8859-1',
3604 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3605 5
        'ISO88592'    => 'ISO-8859-2',
3606 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3607 5
        'ISO88593'    => 'ISO-8859-3',
3608 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3609 5
        'ISO88594'    => 'ISO-8859-4',
3610 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3611 5
        'ISO88595'    => 'ISO-8859-5',
3612 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3613 5
        'ISO88597'    => 'ISO-8859-7',
3614 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3615 5
        'ISO88599'    => 'ISO-8859-9',
3616 5
        'LATIN5'      => 'ISO-8859-9', // Turkish
3617 5
        'ISO885911'   => 'ISO-8859-11',
3618 5
        'TIS620'      => 'ISO-8859-11', // Thai
3619 5
        'ISO885910'   => 'ISO-8859-10',
3620 5
        'LATIN6'      => 'ISO-8859-10', // Nordic
3621 5
        'ISO885913'   => 'ISO-8859-13',
3622 5
        'LATIN7'      => 'ISO-8859-13', // Baltic
3623 5
        'ISO885914'   => 'ISO-8859-14',
3624 5
        'LATIN8'      => 'ISO-8859-14', // Celtic
3625 5
        'ISO885915'   => 'ISO-8859-15',
3626 5
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3627 5
        'ISO885916'   => 'ISO-8859-16',
3628 5
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3629 5
        'CP1250'      => 'WINDOWS-1250',
3630 5
        'WIN1250'     => 'WINDOWS-1250',
3631 5
        'WINDOWS1250' => 'WINDOWS-1250',
3632 5
        'CP1251'      => 'WINDOWS-1251',
3633 5
        'WIN1251'     => 'WINDOWS-1251',
3634 5
        'WINDOWS1251' => 'WINDOWS-1251',
3635 5
        'CP1252'      => 'WINDOWS-1252',
3636 5
        'WIN1252'     => 'WINDOWS-1252',
3637 5
        'WINDOWS1252' => 'WINDOWS-1252',
3638 5
        'CP1253'      => 'WINDOWS-1253',
3639 5
        'WIN1253'     => 'WINDOWS-1253',
3640 5
        'WINDOWS1253' => 'WINDOWS-1253',
3641 5
        'CP1254'      => 'WINDOWS-1254',
3642 5
        'WIN1254'     => 'WINDOWS-1254',
3643 5
        'WINDOWS1254' => 'WINDOWS-1254',
3644 5
        'CP1255'      => 'WINDOWS-1255',
3645 5
        'WIN1255'     => 'WINDOWS-1255',
3646 5
        'WINDOWS1255' => 'WINDOWS-1255',
3647 5
        'CP1256'      => 'WINDOWS-1256',
3648 5
        'WIN1256'     => 'WINDOWS-1256',
3649 5
        'WINDOWS1256' => 'WINDOWS-1256',
3650 5
        'CP1257'      => 'WINDOWS-1257',
3651 5
        'WIN1257'     => 'WINDOWS-1257',
3652 5
        'WINDOWS1257' => 'WINDOWS-1257',
3653 5
        'CP1258'      => 'WINDOWS-1258',
3654 5
        'WIN1258'     => 'WINDOWS-1258',
3655 5
        'WINDOWS1258' => 'WINDOWS-1258',
3656 5
        'UTF16'       => 'UTF-16',
3657 5
        'UTF32'       => 'UTF-32',
3658 5
        'UTF8'        => 'UTF-8',
3659 5
        'UTF'         => 'UTF-8',
3660
        'UTF7'        => 'UTF-7',
3661 5
        '8BIT'        => 'CP850',
3662 5
        'BINARY'      => 'CP850',
3663 5
    );
3664
3665 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3666
      $encoding = $equivalences[$encodingUpperHelper];
3667 5
    }
3668
3669
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3670
3671
    return $encoding;
3672
  }
3673
3674
  /**
3675
   * Normalize some MS Word special characters.
3676
   *
3677 16
   * @param string $str <p>The string to be normalized.</p>
3678
   *
3679 16
   * @return string
3680
   */
3681 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3682 1
  {
3683
    $str = (string)$str;
3684
3685 16
    if (!isset($str[0])) {
3686 16
      return '';
3687
    }
3688 16
3689 1
    static $UTF8_MSWORD_KEYS_CACHE = null;
3690 1
    static $UTF8_MSWORD_VALUES_CACHE = null;
3691 1
3692
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3693 16
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3694
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3695
    }
3696
3697
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3698
  }
3699
3700
  /**
3701
   * Normalize the whitespace.
3702
   *
3703
   * @param string $str                     <p>The string to be normalized.</p>
3704
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3705
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3706 37
   *                                        bidirectional text chars.</p>
3707
   *
3708 37
   * @return string
3709
   */
3710 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3711 4
  {
3712
    $str = (string)$str;
3713
3714 37
    if (!isset($str[0])) {
3715 37
      return '';
3716
    }
3717 37
3718
    static $WHITESPACE_CACHE = array();
3719 2
    $cacheKey = (int)$keepNonBreakingSpace;
3720
3721 2
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3722
3723 1
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3724 1
3725
      if ($keepNonBreakingSpace === true) {
3726 2
        /** @noinspection OffsetOperationsInspection */
3727 2
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3728
      }
3729 37
3730 37
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3731
    }
3732 37
3733 1
    if ($keepBidiUnicodeControls === false) {
3734 1
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3735
3736 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3737 37
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3738
      }
3739 37
3740
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3741
    }
3742
3743
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3744
  }
3745
3746
  /**
3747
   * Strip all whitespace characters. This includes tabs and newline
3748
   * characters, as well as multibyte whitespace such as the thin space
3749
   * and ideographic space.
3750
   *
3751 12
   * @param string $str
3752
   *
3753 12
   * @return string
3754
   */
3755 12
  public static function strip_whitespace($str)
3756 1
  {
3757
    $str = (string)$str;
3758
3759 11
    if (!isset($str[0])) {
3760
      return '';
3761
    }
3762
3763
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3764
  }
3765
3766
  /**
3767
   * Format a number with grouped thousands.
3768
   *
3769
   * @param float  $number
3770
   * @param int    $decimals
3771
   * @param string $dec_point
3772
   * @param string $thousands_sep
3773
   *
3774
   * @return string
3775
   *
3776
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3777
   */
3778
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3779
  {
3780
    $thousands_sep = (string)$thousands_sep;
3781
    $dec_point = (string)$dec_point;
3782
    $number = (float)$number;
3783
3784
    if (
3785
        isset($thousands_sep[1], $dec_point[1])
3786
        &&
3787
        Bootup::is_php('5.4') === true
3788
    ) {
3789
      return str_replace(
3790
          array(
3791
              '.',
3792
              ',',
3793
          ),
3794
          array(
3795
              $dec_point,
3796
              $thousands_sep,
3797
          ),
3798
          number_format($number, $decimals, '.', ',')
3799
      );
3800
    }
3801
3802
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3803
  }
3804
3805
  /**
3806
   * Calculates Unicode code point of the given UTF-8 encoded character.
3807
   *
3808
   * INFO: opposite to UTF8::chr()
3809
   *
3810
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3811
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3812
   *
3813
   * @return int <p>
3814 23
   *             Unicode code point of the given character,<br>
3815
   *             0 on invalid UTF-8 byte sequence.
3816
   *             </p>
3817 23
   */
3818 23
  public static function ord($chr, $encoding = 'UTF-8')
3819
  {
3820 23
    // init
3821 1
    static $CHAR_CACHE = array();
3822
    $encoding = (string)$encoding;
3823
3824
    if ($encoding !== 'UTF-8') {
3825 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3826 1
3827 1
      // check again, if it's still not UTF-8
3828 1
      /** @noinspection NotOptimalIfConditionsInspection */
3829
      if ($encoding !== 'UTF-8') {
3830 23
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3831 23
      }
3832
    }
3833
3834 10
    if (isset($CHAR_CACHE[$chr]) === true) {
3835
      return $CHAR_CACHE[$chr];
3836
    }
3837
3838
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3839 10
      self::checkForSupport();
3840
    }
3841 10
3842
    // save the original string
3843
    $chr_orig = $chr;
3844
3845
    if (self::$SUPPORT['intlChar'] === true) {
3846
      $code = \IntlChar::ord($chr);
3847
      if ($code) {
3848
        $CHAR_CACHE[$chr_orig] = $code;
3849
        return $code;
3850 10
      }
3851 10
    }
3852
3853 10
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3854 1
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3855
    $code = $chr ? $chr[1] : 0;
3856
3857 10
    if (0xF0 <= $code && isset($chr[4])) {
3858 4
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3859
    }
3860
3861 10
    if (0xE0 <= $code && isset($chr[3])) {
3862 6
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3863
    }
3864
3865 10
    if (0xC0 <= $code && isset($chr[2])) {
3866
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3867
    }
3868
3869
    return $CHAR_CACHE[$chr_orig] = $code;
3870
  }
3871
3872
  /**
3873
   * Parses the string into an array (into the the second parameter).
3874
   *
3875
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3876
   *          if the second parameter is not set!
3877
   *
3878
   * @link http://php.net/manual/en/function.parse-str.php
3879
   *
3880
   * @param string  $str       <p>The input string.</p>
3881
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3882 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3883
   *
3884 1
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3885 1
   */
3886 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3887
  {
3888
    if ($cleanUtf8 === true) {
3889 1
      $str = self::clean($str);
3890 1
    }
3891 1
3892
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3893
    $return = \mb_parse_str($str, $result);
3894 1
    if ($return === false || empty($result)) {
3895
      return false;
3896
    }
3897
3898
    return true;
3899
  }
3900
3901
  /**
3902 58
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3903
   *
3904
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3905 58
   */
3906
  public static function pcre_utf8_support()
3907
  {
3908
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3909
    return (bool)@preg_match('//u', '');
3910
  }
3911
3912
  /**
3913
   * Create an array containing a range of UTF-8 characters.
3914
   *
3915
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3916 1
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3917
   *
3918 1
   * @return array
3919 1
   */
3920
  public static function range($var1, $var2)
3921
  {
3922 1
    if (!$var1 || !$var2) {
3923 1
      return array();
3924 1
    }
3925
3926 View Code Duplication
    if (ctype_digit((string)$var1)) {
3927 1
      $start = (int)$var1;
3928
    } elseif (ctype_xdigit($var1)) {
3929
      $start = (int)self::hex_to_int($var1);
3930 1
    } else {
3931
      $start = self::ord($var1);
3932
    }
3933
3934 1
    if (!$start) {
3935 1
      return array();
3936 1
    }
3937
3938 View Code Duplication
    if (ctype_digit((string)$var2)) {
3939 1
      $end = (int)$var2;
3940
    } elseif (ctype_xdigit($var2)) {
3941
      $end = (int)self::hex_to_int($var2);
3942 1
    } else {
3943
      $end = self::ord($var2);
3944
    }
3945
3946 1
    if (!$end) {
3947
      return array();
3948 1
    }
3949 1
3950 1
    return array_map(
3951 1
        array(
3952 1
            '\\voku\\helper\\UTF8',
3953
            'chr',
3954
        ),
3955
        range($start, $end)
3956
    );
3957
  }
3958
3959
  /**
3960
   * Multi decode html entity & fix urlencoded-win1252-chars.
3961
   *
3962
   * e.g:
3963
   * 'test+test'                     => 'test+test'
3964
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3965
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3966
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3967
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3968
   * 'Düsseldorf'                   => 'Düsseldorf'
3969
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3970
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3971
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3972
   *
3973
   * @param string $str          <p>The input string.</p>
3974 2
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3975
   *
3976 2
   * @return string
3977
   */
3978 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3979 1
  {
3980
    $str = (string)$str;
3981
3982 2
    if (!isset($str[0])) {
3983 2
      return '';
3984 1
    }
3985 1
3986
    $pattern = '/%u([0-9a-f]{3,4})/i';
3987 2
    if (preg_match($pattern, $str)) {
3988
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3989
    }
3990 2
3991
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3992 2
3993 2
    do {
3994 2
      $str_compare = $str;
3995 2
3996
      $str = self::fix_simple_utf8(
3997 2
          rawurldecode(
3998 2
              self::html_entity_decode(
3999 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
4000
                  $flags
4001 2
              )
4002
          )
4003 2
      );
4004
4005
    } while ($multi_decode === true && $str_compare !== $str);
4006
4007
    return (string)$str;
4008
  }
4009
4010
  /**
4011
   * alias for "UTF8::remove_bom()"
4012
   *
4013
   * @see UTF8::remove_bom()
4014
   *
4015
   * @param string $str
4016
   *
4017
   * @return string
4018
   *
4019
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4020
   */
4021
  public static function removeBOM($str)
4022
  {
4023
    return self::remove_bom($str);
4024
  }
4025
4026
  /**
4027
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4028
   *
4029 40
   * @param string $str <p>The input string.</p>
4030
   *
4031 40
   * @return string <p>String without UTF-BOM</p>
4032
   */
4033 40
  public static function remove_bom($str)
4034 5
  {
4035
    $str = (string)$str;
4036
4037 40
    if (!isset($str[0])) {
4038 40
      return '';
4039 5
    }
4040 5
4041
    foreach (self::$BOM as $bomString => $bomByteLength) {
4042
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4043 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4044 5
        if ($strTmp === false) {
4045 40
          $strTmp = '';
4046
        }
4047 40
        $str = (string)$strTmp;
4048
      }
4049
    }
4050
4051
    return $str;
4052
  }
4053
4054
  /**
4055
   * Removes duplicate occurrences of a string in another string.
4056
   *
4057
   * @param string          $str  <p>The base string.</p>
4058 1
   * @param string|string[] $what <p>String to search for in the base string.</p>
4059
   *
4060 1
   * @return string <p>The result string with removed duplicates.</p>
4061 1
   */
4062 1
  public static function remove_duplicates($str, $what = ' ')
4063
  {
4064 1
    if (is_string($what) === true) {
4065
      $what = array($what);
4066 1
    }
4067 1
4068 1
    if (is_array($what) === true) {
4069 1
      /** @noinspection ForeachSourceInspection */
4070
      foreach ($what as $item) {
4071 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4072
      }
4073
    }
4074
4075
    return $str;
4076
  }
4077
4078
  /**
4079
   * Remove invisible characters from a string.
4080
   *
4081
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4082
   *
4083
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4084
   *
4085
   * @param string $str
4086
   * @param bool   $url_encoded
4087 57
   * @param string $replacement
4088
   *
4089
   * @return string
4090 57
   */
4091
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4092
  {
4093
    // init
4094 57
    $non_displayables = array();
4095 57
4096 57
    // every control character except newline (dec 10),
4097 57
    // carriage return (dec 13) and horizontal tab (dec 09)
4098
    if ($url_encoded) {
4099 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4100
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4101
    }
4102 57
4103 57
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4104
4105 57
    do {
4106
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4107
    } while ($count !== 0);
4108
4109
    return $str;
4110
  }
4111
4112
  /**
4113
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4114
   *
4115
   * @param string $str                <p>The input string</p>
4116
   * @param string $replacementChar    <p>The replacement character.</p>
4117 57
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4118
   *
4119 57
   * @return string
4120
   */
4121 57
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4122 4
  {
4123
    $str = (string)$str;
4124
4125 57
    if (!isset($str[0])) {
4126 57
      return '';
4127 57
    }
4128 57
4129 57
    if ($processInvalidUtf8 === true) {
4130
      $replacementCharHelper = $replacementChar;
4131 57
      if ($replacementChar === '') {
4132
        $replacementCharHelper = 'none';
4133
      }
4134
4135 57
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4136 57
        self::checkForSupport();
4137 57
      }
4138 57
4139 57
      $save = \mb_substitute_character();
4140
      \mb_substitute_character($replacementCharHelper);
4141 57
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4142
      \mb_substitute_character($save);
4143 57
    }
4144 57
4145 57
    return str_replace(
4146
        array(
4147 57
            "\xEF\xBF\xBD",
4148 57
            '�',
4149 57
        ),
4150
        array(
4151 57
            $replacementChar,
4152
            $replacementChar,
4153
        ),
4154
        $str
4155
    );
4156
  }
4157
4158
  /**
4159
   * Strip whitespace or other characters from end of a UTF-8 string.
4160
   *
4161
   * @param string $str   <p>The string to be trimmed.</p>
4162 23
   * @param string $chars <p>Optional characters to be stripped.</p>
4163
   *
4164 23
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4165
   */
4166 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4167 5
  {
4168
    $str = (string)$str;
4169
4170
    if (!isset($str[0])) {
4171 19
      return '';
4172 3
    }
4173
4174
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4175 18
    if ($chars === INF || !$chars) {
4176
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4177
    }
4178
4179
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4180
  }
4181
4182
  /**
4183
   * rxClass
4184
   *
4185
   * @param string $s
4186 60
   * @param string $class
4187
   *
4188 60
   * @return string
4189
   */
4190 60
  private static function rxClass($s, $class = '')
4191
  {
4192 60
    static $RX_CLASSS_CACHE = array();
4193 48
4194
    $cacheKey = $s . $class;
4195
4196
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4197 19
      return $RX_CLASSS_CACHE[$cacheKey];
4198
    }
4199
4200 19
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4201 18
    $class = array($class);
4202
4203 18
    /** @noinspection SuspiciousLoopInspection */
4204 18
    foreach (self::str_split($s) as $s) {
4205 18
      if ('-' === $s) {
4206 2
        $class[0] = '-' . $class[0];
4207 2
      } elseif (!isset($s[2])) {
4208
        $class[0] .= preg_quote($s, '/');
4209
      } elseif (1 === self::strlen($s)) {
4210 19
        $class[0] .= $s;
4211
      } else {
4212 19
        $class[] = $s;
4213 19
      }
4214 19
    }
4215
4216 19
    if ($class[0]) {
4217 19
      $class[0] = '[' . $class[0] . ']';
4218 19
    }
4219
4220
    if (1 === count($class)) {
4221
      $return = $class[0];
4222 19
    } else {
4223
      $return = '(?:' . implode('|', $class) . ')';
4224 19
    }
4225
4226
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4227
4228
    return $return;
4229
  }
4230
4231
  /**
4232
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4233
   */
4234
  public static function showSupport()
4235
  {
4236
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4237
      self::checkForSupport();
4238
    }
4239
4240
    foreach (self::$SUPPORT as $utf8Support) {
4241
      echo $utf8Support . "\n<br>";
4242
    }
4243
  }
4244
4245
  /**
4246
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4247
   *
4248
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4249
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4250 1
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4251
   *
4252 1
   * @return string <p>The HTML numbered entity.</p>
4253
   */
4254 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4255 1
  {
4256
    $char = (string)$char;
4257
4258
    if (!isset($char[0])) {
4259
      return '';
4260 1
    }
4261 1
4262 1
    if (
4263 1
        $keepAsciiChars === true
4264
        &&
4265
        self::is_ascii($char) === true
4266 1
    ) {
4267
      return $char;
4268
    }
4269
4270 1
    if ($encoding !== 'UTF-8') {
4271
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4272
    }
4273
4274
    return '&#' . self::ord($char, $encoding) . ';';
4275
  }
4276
4277
  /**
4278
   * Convert a string to an array of Unicode characters.
4279
   *
4280
   * @param string  $str       <p>The string to split into array.</p>
4281
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4282 39
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4283
   *
4284 39
   * @return string[] <p>An array containing chunks of the string.</p>
4285
   */
4286 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4287 3
  {
4288
    $str = (string)$str;
4289
4290
    if (!isset($str[0])) {
4291 38
      return array();
4292
    }
4293 38
4294
    // init
4295
    $ret = array();
4296
4297 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4298 7
      self::checkForSupport();
4299 7
    }
4300
4301 38
    if ($cleanUtf8 === true) {
4302
      $str = self::clean($str);
4303 38
    }
4304 38
4305 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4306 38
4307 38
      preg_match_all('/./us', $str, $retArray);
4308
      if (isset($retArray[0])) {
4309 38
        $ret = $retArray[0];
4310
      }
4311
      unset($retArray);
4312
4313
    } else {
4314
4315
      // fallback
4316
4317
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4318
        self::checkForSupport();
4319
      }
4320
4321 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4322
        $len = \mb_strlen($str, '8BIT');
4323
      } else {
4324
        $len = strlen($str);
4325
      }
4326
4327
      /** @noinspection ForeachInvariantsInspection */
4328
      for ($i = 0; $i < $len; $i++) {
4329
4330
        if (($str[$i] & "\x80") === "\x00") {
4331
4332
          $ret[] = $str[$i];
4333
4334
        } elseif (
4335
            isset($str[$i + 1])
4336
            &&
4337
            ($str[$i] & "\xE0") === "\xC0"
4338
        ) {
4339
4340
          if (($str[$i + 1] & "\xC0") === "\x80") {
4341
            $ret[] = $str[$i] . $str[$i + 1];
4342
4343
            $i++;
4344
          }
4345
4346 View Code Duplication
        } elseif (
4347
            isset($str[$i + 2])
4348
            &&
4349
            ($str[$i] & "\xF0") === "\xE0"
4350
        ) {
4351
4352
          if (
4353
              ($str[$i + 1] & "\xC0") === "\x80"
4354
              &&
4355
              ($str[$i + 2] & "\xC0") === "\x80"
4356
          ) {
4357
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4358
4359
            $i += 2;
4360
          }
4361
4362
        } elseif (
4363
            isset($str[$i + 3])
4364
            &&
4365
            ($str[$i] & "\xF8") === "\xF0"
4366
        ) {
4367
4368 View Code Duplication
          if (
4369
              ($str[$i + 1] & "\xC0") === "\x80"
4370
              &&
4371
              ($str[$i + 2] & "\xC0") === "\x80"
4372
              &&
4373
              ($str[$i + 3] & "\xC0") === "\x80"
4374
          ) {
4375
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4376
4377
            $i += 3;
4378
          }
4379
4380 38
        }
4381 5
      }
4382
    }
4383 5
4384
    if ($length > 1) {
4385 5
      $ret = array_chunk($ret, $length);
4386 5
4387 5
      return array_map(
4388
          function ($item) {
4389
            return implode('', $item);
4390
          }, $ret
4391 34
      );
4392
    }
4393
4394
    if (isset($ret[0]) && $ret[0] === '') {
4395 34
      return array();
4396
    }
4397
4398
    return $ret;
4399
  }
4400
4401
  /**
4402
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4403
   *
4404
   * @param string $str <p>The input string.</p>
4405
   *
4406
   * @return false|string <p>
4407
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4408 13
   *                      otherwise it will return false.
4409
   *                      </p>
4410
   */
4411
  public static function str_detect_encoding($str)
4412
  {
4413
    //
4414 13
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4415
    //
4416 3
4417 1
    if (self::is_binary($str) === true) {
4418
4419
      if (self::is_utf16($str) === 1) {
4420 3
        return 'UTF-16LE';
4421 2
      }
4422
4423
      if (self::is_utf16($str) === 2) {
4424 1
        return 'UTF-16BE';
4425
      }
4426
4427
      if (self::is_utf32($str) === 1) {
4428 1
        return 'UTF-32LE';
4429
      }
4430
4431
      if (self::is_utf32($str) === 2) {
4432 1
        return 'UTF-32BE';
4433
      }
4434
4435
    }
4436
4437
    //
4438 12
    // 2.) simple check for ASCII chars
4439 4
    //
4440
4441
    if (self::is_ascii($str) === true) {
4442
      return 'ASCII';
4443
    }
4444
4445
    //
4446 12
    // 3.) simple check for UTF-8 chars
4447 10
    //
4448
4449
    if (self::is_utf8($str) === true) {
4450
      return 'UTF-8';
4451
    }
4452
4453
    //
4454
    // 4.) check via "\mb_detect_encoding()"
4455
    //
4456 6
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4457 6
4458 6
    $detectOrder = array(
4459 6
        'ISO-8859-1',
4460 6
        'ISO-8859-2',
4461 6
        'ISO-8859-3',
4462 6
        'ISO-8859-4',
4463 6
        'ISO-8859-5',
4464 6
        'ISO-8859-6',
4465 6
        'ISO-8859-7',
4466 6
        'ISO-8859-8',
4467 6
        'ISO-8859-9',
4468 6
        'ISO-8859-10',
4469 6
        'ISO-8859-13',
4470 6
        'ISO-8859-14',
4471 6
        'ISO-8859-15',
4472 6
        'ISO-8859-16',
4473 6
        'WINDOWS-1251',
4474 6
        'WINDOWS-1252',
4475 6
        'WINDOWS-1254',
4476 6
        'ISO-2022-JP',
4477
        'JIS',
4478 6
        'EUC-JP',
4479 6
    );
4480 6
4481
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4482
    if ($encoding) {
4483
      return $encoding;
4484
    }
4485
4486
    //
4487
    // 5.) check via "iconv()"
4488
    //
4489
4490
    $md5 = md5($str);
4491
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4492
      # INFO: //IGNORE and //TRANSLIT still throw notice
4493
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4494
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4495
        return $encodingTmp;
4496
      }
4497
    }
4498
4499
    return false;
4500
  }
4501
4502
  /**
4503
   * Check if the string ends with the given substring.
4504
   *
4505
   * @param string $haystack <p>The string to search in.</p>
4506
   * @param string $needle   <p>The substring to search for.</p>
4507 2
   *
4508
   * @return bool
4509 2
   */
4510 2
  public static function str_ends_with($haystack, $needle)
4511
  {
4512 2
    $haystack = (string)$haystack;
4513 1
    $needle = (string)$needle;
4514
4515
    if (!isset($haystack[0], $needle[0])) {
4516 2
      return false;
4517 2
    }
4518
4519
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4520
    if ($haystackSub === false) {
4521 2
      return false;
4522 2
    }
4523
4524
    if ($needle === $haystackSub) {
4525 2
      return true;
4526
    }
4527
4528
    return false;
4529
  }
4530
4531
  /**
4532
   * Check if the string ends with the given substring, case insensitive.
4533
   *
4534
   * @param string $haystack <p>The string to search in.</p>
4535
   * @param string $needle   <p>The substring to search for.</p>
4536 2
   *
4537
   * @return bool
4538 2
   */
4539 2 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4540
  {
4541 2
    $haystack = (string)$haystack;
4542 1
    $needle = (string)$needle;
4543
4544
    if (!isset($haystack[0], $needle[0])) {
4545 2
      return false;
4546 2
    }
4547
4548
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4549 2
      return true;
4550
    }
4551
4552
    return false;
4553
  }
4554
4555
  /**
4556
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4557
   *
4558
   * @link  http://php.net/manual/en/function.str-ireplace.php
4559
   *
4560
   * @param mixed $search  <p>
4561
   *                       Every replacement with search array is
4562
   *                       performed on the result of previous replacement.
4563
   *                       </p>
4564
   * @param mixed $replace <p>
4565
   *                       </p>
4566
   * @param mixed $subject <p>
4567
   *                       If subject is an array, then the search and
4568
   *                       replace is performed with every entry of
4569
   *                       subject, and the return value is an array as
4570
   *                       well.
4571
   *                       </p>
4572
   * @param int   $count   [optional] <p>
4573
   *                       The number of matched and replaced needles will
4574
   *                       be returned in count which is passed by
4575
   *                       reference.
4576
   *                       </p>
4577 26
   *
4578
   * @return mixed <p>A string or an array of replacements.</p>
4579 26
   */
4580
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4581
  {
4582 26
    $search = (array)$search;
4583 26
4584 2
    /** @noinspection AlterInForeachInspection */
4585 2
    foreach ($search as &$s) {
4586 24
      if ('' === $s .= '') {
4587
        $s = '/^(?<=.)$/';
4588 26
      } else {
4589
        $s = '/' . preg_quote($s, '/') . '/ui';
4590 26
      }
4591 26
    }
4592
4593 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4594
    $count = $replace; // used as reference parameter
4595
4596
    return $subject;
4597
  }
4598
4599
  /**
4600
   * Check if the string starts with the given substring, case insensitive.
4601
   *
4602
   * @param string $haystack <p>The string to search in.</p>
4603
   * @param string $needle   <p>The substring to search for.</p>
4604 2
   *
4605
   * @return bool
4606 2
   */
4607 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4608
  {
4609 2
    $haystack = (string)$haystack;
4610 1
    $needle = (string)$needle;
4611
4612
    if (!isset($haystack[0], $needle[0])) {
4613 2
      return false;
4614 2
    }
4615
4616
    if (self::stripos($haystack, $needle) === 0) {
4617 2
      return true;
4618
    }
4619
4620
    return false;
4621
  }
4622
4623
  /**
4624
   * Limit the number of characters in a string, but also after the next word.
4625
   *
4626
   * @param string $str
4627
   * @param int    $length
4628
   * @param string $strAddOn
4629 1
   *
4630
   * @return string
4631 1
   */
4632
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4633 1
  {
4634 1
    $str = (string)$str;
4635
4636
    if (!isset($str[0])) {
4637 1
      return '';
4638
    }
4639 1
4640 1
    $length = (int)$length;
4641
4642
    if (self::strlen($str) <= $length) {
4643 1
      return $str;
4644 1
    }
4645
4646
    if (self::substr($str, $length - 1, 1) === ' ') {
4647 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4648 1
    }
4649 1
4650 1
    $str = (string)self::substr($str, 0, $length);
4651
    $array = explode(' ', $str);
4652 1
    array_pop($array);
4653 1
    $new_str = implode(' ', $array);
4654 1
4655 1
    if ($new_str === '') {
4656
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4657
    } else {
4658 1
      $str = $new_str . $strAddOn;
4659
    }
4660
4661
    return $str;
4662
  }
4663
4664
  /**
4665
   * Pad a UTF-8 string to given length with another string.
4666
   *
4667
   * @param string $str        <p>The input string.</p>
4668
   * @param int    $pad_length <p>The length of return string.</p>
4669
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4670
   * @param int    $pad_type   [optional] <p>
4671
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4672
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4673
   *                           </p>
4674 2
   *
4675
   * @return string <strong>Returns the padded string</strong>
4676 2
   */
4677
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4678
  {
4679 2
    $str_length = self::strlen($str);
4680 2
4681
    if (
4682 2
        is_int($pad_length) === true
4683
        &&
4684 2
        $pad_length > 0
4685 2
        &&
4686
        $pad_length >= $str_length
4687 2
    ) {
4688
      $ps_length = self::strlen($pad_string);
4689
4690 2
      $diff = $pad_length - $str_length;
4691 2
4692 2
      switch ($pad_type) {
4693 2 View Code Duplication
        case STR_PAD_LEFT:
4694 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4695
          $pre = (string)self::substr($pre, 0, $diff);
4696 2
          $post = '';
4697 2
          break;
4698 2
4699 2
        case STR_PAD_BOTH:
4700 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4701 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4702
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4703 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4704 2
          break;
4705 2
4706 2
        case STR_PAD_RIGHT:
4707 2 View Code Duplication
        default:
4708 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4709
          $post = (string)self::substr($post, 0, $diff);
4710 2
          $pre = '';
4711
      }
4712
4713 2
      return $pre . $str . $post;
4714
    }
4715
4716
    return $str;
4717
  }
4718
4719
  /**
4720
   * Repeat a string.
4721
   *
4722
   * @param string $str        <p>
4723
   *                           The string to be repeated.
4724
   *                           </p>
4725
   * @param int    $multiplier <p>
4726
   *                           Number of time the input string should be
4727
   *                           repeated.
4728
   *                           </p>
4729
   *                           <p>
4730
   *                           multiplier has to be greater than or equal to 0.
4731
   *                           If the multiplier is set to 0, the function
4732
   *                           will return an empty string.
4733
   *                           </p>
4734 1
   *
4735
   * @return string <p>The repeated string.</p>
4736 1
   */
4737
  public static function str_repeat($str, $multiplier)
4738 1
  {
4739
    $str = self::filter($str);
4740
4741
    return str_repeat($str, $multiplier);
4742
  }
4743
4744
  /**
4745
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4746
   *
4747
   * Replace all occurrences of the search string with the replacement string
4748
   *
4749
   * @link http://php.net/manual/en/function.str-replace.php
4750
   *
4751
   * @param mixed $search  <p>
4752
   *                       The value being searched for, otherwise known as the needle.
4753
   *                       An array may be used to designate multiple needles.
4754
   *                       </p>
4755
   * @param mixed $replace <p>
4756
   *                       The replacement value that replaces found search
4757
   *                       values. An array may be used to designate multiple replacements.
4758
   *                       </p>
4759
   * @param mixed $subject <p>
4760
   *                       The string or array being searched and replaced on,
4761
   *                       otherwise known as the haystack.
4762
   *                       </p>
4763
   *                       <p>
4764
   *                       If subject is an array, then the search and
4765
   *                       replace is performed with every entry of
4766
   *                       subject, and the return value is an array as
4767
   *                       well.
4768
   *                       </p>
4769
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4770 12
   *
4771
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4772 12
   */
4773
  public static function str_replace($search, $replace, $subject, &$count = null)
4774
  {
4775
    return str_replace($search, $replace, $subject, $count);
4776
  }
4777
4778
  /**
4779
   * Replace the first "$search"-term with the "$replace"-term.
4780
   *
4781
   * @param string $search
4782
   * @param string $replace
4783
   * @param string $subject
4784 1
   *
4785
   * @return string
4786 1
   */
4787
  public static function str_replace_first($search, $replace, $subject)
4788 1
  {
4789 1
    $pos = self::strpos($subject, $search);
4790
4791
    if ($pos !== false) {
4792 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4793
    }
4794
4795
    return $subject;
4796
  }
4797
4798
  /**
4799
   * Shuffles all the characters in the string.
4800
   *
4801
   * @param string $str <p>The input string</p>
4802 1
   *
4803
   * @return string <p>The shuffled string.</p>
4804 1
   */
4805
  public static function str_shuffle($str)
4806 1
  {
4807
    $array = self::split($str);
4808 1
4809
    shuffle($array);
4810
4811
    return implode('', $array);
4812
  }
4813
4814
  /**
4815
   * Sort all characters according to code points.
4816
   *
4817
   * @param string $str    <p>A UTF-8 string.</p>
4818
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4819
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4820 1
   *
4821
   * @return string <p>String of sorted characters.</p>
4822 1
   */
4823
  public static function str_sort($str, $unique = false, $desc = false)
4824 1
  {
4825 1
    $array = self::codepoints($str);
4826 1
4827
    if ($unique) {
4828 1
      $array = array_flip(array_flip($array));
4829 1
    }
4830 1
4831 1
    if ($desc) {
4832
      arsort($array);
4833
    } else {
4834 1
      asort($array);
4835
    }
4836
4837
    return self::string($array);
4838
  }
4839
4840
  /**
4841
   * Split a string into an array.
4842
   *
4843
   * @param string $str
4844
   * @param int    $len
4845 22
   *
4846
   * @return array
4847 22
   */
4848
  public static function str_split($str, $len = 1)
4849 22
  {
4850 1
    $str = (string)$str;
4851
4852
    if (!isset($str[0])) {
4853 21
      return array();
4854
    }
4855 21
4856
    $len = (int)$len;
4857
4858
    if ($len < 1) {
4859
      return str_split($str, $len);
4860 21
    }
4861 21
4862
    /** @noinspection PhpInternalEntityUsedInspection */
4863 21
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4864 21
    $a = $a[0];
4865
4866
    if ($len === 1) {
4867 1
      return $a;
4868 1
    }
4869
4870
    $arrayOutput = array();
4871 1
    $p = -1;
4872 1
4873 1
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4874 1
    foreach ($a as $l => $a) {
4875 1
      if ($l % $len) {
4876
        $arrayOutput[$p] .= $a;
4877 1
      } else {
4878
        $arrayOutput[++$p] = $a;
4879 1
      }
4880
    }
4881
4882
    return $arrayOutput;
4883
  }
4884
4885
  /**
4886
   * Check if the string starts with the given substring.
4887
   *
4888
   * @param string $haystack <p>The string to search in.</p>
4889
   * @param string $needle   <p>The substring to search for.</p>
4890 2
   *
4891
   * @return bool
4892 2
   */
4893 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4894
  {
4895 2
    $haystack = (string)$haystack;
4896 1
    $needle = (string)$needle;
4897
4898
    if (!isset($haystack[0], $needle[0])) {
4899 2
      return false;
4900 2
    }
4901
4902
    if (self::strpos($haystack, $needle) === 0) {
4903 2
      return true;
4904
    }
4905
4906
    return false;
4907
  }
4908
4909
  /**
4910
   * Get a binary representation of a specific string.
4911
   *
4912
   * @param string $str <p>The input string.</p>
4913 1
   *
4914
   * @return string
4915 1
   */
4916
  public static function str_to_binary($str)
4917 1
  {
4918
    $str = (string)$str;
4919 1
4920
    $value = unpack('H*', $str);
4921
4922
    return base_convert($value[1], 16, 2);
4923
  }
4924
4925
  /**
4926
   * Convert a string into an array of words.
4927
   *
4928
   * @param string   $str
4929
   * @param string   $charList          <p>Additional chars for the definition of "words".</p>
4930
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4931
   * @param null|int $removeShortValues
4932 10
   *
4933
   * @return array
4934 10
   */
4935
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4936 10
  {
4937 1
    $str = (string)$str;
4938 1
4939
    if ($removeShortValues !== null) {
4940 10
      $removeShortValues = (int)$removeShortValues;
4941 2
    }
4942
4943
    if (!isset($str[0])) {
4944
      if ($removeEmptyValues === true) {
4945 2
        return array();
4946
      }
4947
4948 10
      return array('');
4949
    }
4950 10
4951
    $charList = self::rxClass($charList, '\pL');
4952
4953
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4954 10
4955
    if (
4956 10
        $removeShortValues === null
4957 10
        &&
4958
        $removeEmptyValues === false
4959
    ) {
4960 1
      return $return;
4961 1
    }
4962
4963
    $tmpReturn = array();
4964 1
    foreach ($return as $returnValue) {
4965 1
      if (
4966 1
          $removeShortValues !== null
4967 1
          &&
4968
          self::strlen($returnValue) <= $removeShortValues
4969
      ) {
4970
        continue;
4971
      }
4972 1
4973 1
      if (
4974 1
          $removeEmptyValues === true
4975 1
          &&
4976
          trim($returnValue) === ''
4977
      ) {
4978 1
        continue;
4979 1
      }
4980
4981 1
      $tmpReturn[] = $returnValue;
4982
    }
4983
4984
    return $tmpReturn;
4985
  }
4986
4987
  /**
4988
   * alias for "UTF8::to_ascii()"
4989
   *
4990
   * @see UTF8::to_ascii()
4991
   *
4992
   * @param string $str
4993
   * @param string $unknown
4994
   * @param bool   $strict
4995 7
   *
4996
   * @return string
4997 7
   */
4998
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4999
  {
5000
    return self::to_ascii($str, $unknown, $strict);
5001
  }
5002
5003
  /**
5004
   * Counts number of words in the UTF-8 string.
5005
   *
5006
   * @param string $str      <p>The input string.</p>
5007
   * @param int    $format   [optional] <p>
5008
   *                         <strong>0</strong> => return a number of words (default)<br>
5009
   *                         <strong>1</strong> => return an array of words<br>
5010
   *                         <strong>2</strong> => return an array of words with word-offset as key
5011
   *                         </p>
5012
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5013 1
   *
5014
   * @return array|int <p>The number of words in the string</p>
5015 1
   */
5016
  public static function str_word_count($str, $format = 0, $charlist = '')
5017 1
  {
5018
    $strParts = self::str_to_words($str, $charlist);
5019 1
5020
    $len = count($strParts);
5021 1
5022 1
    if ($format === 1) {
5023 1
5024 1
      $numberOfWords = array();
5025
      for ($i = 1; $i < $len; $i += 2) {
5026 1
        $numberOfWords[] = $strParts[$i];
5027
      }
5028 1
5029 1
    } elseif ($format === 2) {
5030 1
5031 1
      $numberOfWords = array();
5032 1
      $offset = self::strlen($strParts[0]);
5033 1
      for ($i = 1; $i < $len; $i += 2) {
5034
        $numberOfWords[$offset] = $strParts[$i];
5035 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5036
      }
5037 1
5038
    } else {
5039
5040
      $numberOfWords = ($len - 1) / 2;
5041 1
5042
    }
5043
5044
    return $numberOfWords;
5045
  }
5046
5047
  /**
5048
   * Case-insensitive string comparison.
5049
   *
5050
   * INFO: Case-insensitive version of UTF8::strcmp()
5051
   *
5052
   * @param string $str1
5053
   * @param string $str2
5054
   *
5055
   * @return int <p>
5056
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5057
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5058 11
   *             <strong>0</strong> if they are equal.
5059
   *             </p>
5060 11
   */
5061
  public static function strcasecmp($str1, $str2)
5062
  {
5063
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5064
  }
5065
5066
  /**
5067
   * alias for "UTF8::strstr()"
5068
   *
5069
   * @see UTF8::strstr()
5070
   *
5071
   * @param string  $haystack
5072
   * @param string  $needle
5073
   * @param bool    $before_needle
5074
   * @param string  $encoding
5075
   * @param boolean $cleanUtf8
5076 1
   *
5077
   * @return string|false
5078 1
   */
5079
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5080
  {
5081
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5082
  }
5083
5084
  /**
5085
   * Case-sensitive string comparison.
5086
   *
5087
   * @param string $str1
5088
   * @param string $str2
5089
   *
5090
   * @return int  <p>
5091
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5092
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5093 14
   *              <strong>0</strong> if they are equal.
5094
   *              </p>
5095
   */
5096 14
  public static function strcmp($str1, $str2)
5097 13
  {
5098 13
    /** @noinspection PhpUndefinedClassInspection */
5099 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5100
        \Normalizer::normalize($str1, \Normalizer::NFD),
5101
        \Normalizer::normalize($str2, \Normalizer::NFD)
5102
    );
5103
  }
5104
5105
  /**
5106
   * Find length of initial segment not matching mask.
5107
   *
5108
   * @param string $str
5109
   * @param string $charList
5110
   * @param int    $offset
5111
   * @param int    $length
5112 15
   *
5113
   * @return int|null
5114 15
   */
5115 1
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5116
  {
5117
    if ('' === $charList .= '') {
5118 14
      return null;
5119 2
    }
5120 2
5121 View Code Duplication
    if ($offset || $length !== null) {
5122
      $strTmp = self::substr($str, $offset, $length);
5123 2
      if ($strTmp === false) {
5124 2
        return null;
5125
      }
5126 14
      $str = (string)$strTmp;
5127 14
    }
5128 1
5129
    $str = (string)$str;
5130
    if (!isset($str[0])) {
5131 13
      return null;
5132
    }
5133 13
5134
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5135
      /** @noinspection OffsetOperationsInspection */
5136 1
      return self::strlen($length[1]);
5137
    }
5138
5139
    return self::strlen($str);
5140
  }
5141
5142
  /**
5143
   * alias for "UTF8::stristr()"
5144
   *
5145
   * @see UTF8::stristr()
5146
   *
5147
   * @param string  $haystack
5148
   * @param string  $needle
5149
   * @param bool    $before_needle
5150
   * @param string  $encoding
5151
   * @param boolean $cleanUtf8
5152 1
   *
5153
   * @return string|false
5154 1
   */
5155
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5156
  {
5157
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5158
  }
5159
5160
  /**
5161
   * Create a UTF-8 string from code points.
5162
   *
5163
   * INFO: opposite to UTF8::codepoints()
5164
   *
5165
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5166 2
   *
5167
   * @return string <p>UTF-8 encoded string.</p>
5168 2
   */
5169 2
  public static function string(array $array)
5170 2
  {
5171
    return implode(
5172 2
        '',
5173 2
        array_map(
5174 2
            array(
5175
                '\\voku\\helper\\UTF8',
5176 2
                'chr',
5177 2
            ),
5178
            $array
5179
        )
5180
    );
5181
  }
5182
5183
  /**
5184
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5185
   *
5186
   * @param string $str <p>The input string.</p>
5187 3
   *
5188
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5189 3
   */
5190 3
  public static function string_has_bom($str)
5191 3
  {
5192
    foreach (self::$BOM as $bomString => $bomByteLength) {
5193 3
      if (0 === strpos($str, $bomString)) {
5194
        return true;
5195 3
      }
5196
    }
5197
5198
    return false;
5199
  }
5200
5201
  /**
5202
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5203
   *
5204
   * @link http://php.net/manual/en/function.strip-tags.php
5205
   *
5206
   * @param string  $str            <p>
5207
   *                                The input string.
5208
   *                                </p>
5209
   * @param string  $allowable_tags [optional] <p>
5210
   *                                You can use the optional second parameter to specify tags which should
5211
   *                                not be stripped.
5212
   *                                </p>
5213
   *                                <p>
5214
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5215
   *                                can not be changed with allowable_tags.
5216
   *                                </p>
5217
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5218 2
   *
5219
   * @return string <p>The stripped string.</p>
5220 2
   */
5221 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5222 2
  {
5223 1
    $str = (string)$str;
5224
5225
    if (!isset($str[0])) {
5226 2
      return '';
5227 1
    }
5228 1
5229
    if ($cleanUtf8 === true) {
5230 2
      $str = self::clean($str);
5231
    }
5232
5233
    return strip_tags($str, $allowable_tags);
5234
  }
5235
5236
  /**
5237
   * Finds position of first occurrence of a string within another, case insensitive.
5238
   *
5239
   * @link http://php.net/manual/en/function.mb-stripos.php
5240
   *
5241
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5242
   * @param string  $needle    <p>The string to find in haystack.</p>
5243
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5244
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5245
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5246
   *
5247
   * @return int|false <p>
5248
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5249 10
   *                   or false if needle is not found.
5250
   *                   </p>
5251 10
   */
5252 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5253 10
  {
5254
    $haystack = (string)$haystack;
5255 10
    $needle = (string)$needle;
5256 3
    $offset = (int)$offset;
5257
5258
    if (!isset($haystack[0], $needle[0])) {
5259 9
      return false;
5260
    }
5261
5262 1
    if ($cleanUtf8 === true) {
5263 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5264 1
      // if invalid characters are found in $haystack before $needle
5265
      $haystack = self::clean($haystack);
5266
      $needle = self::clean($needle);
5267
    }
5268 9
5269 2 View Code Duplication
    if (
5270 9
        $encoding === 'UTF-8'
5271 9
        ||
5272 9
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5273 1
    ) {
5274
      $encoding = 'UTF-8';
5275
    } else {
5276 9
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5277
    }
5278
5279
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5280
      self::checkForSupport();
5281
    }
5282 9
5283 9
    if (
5284 9
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5285 9
        &&
5286 9
        self::$SUPPORT['intl'] === true
5287 9
        &&
5288
        Bootup::is_php('5.4') === true
5289
    ) {
5290
      return \grapheme_stripos($haystack, $needle, $offset);
5291 1
    }
5292
5293
    // fallback to "mb_"-function via polyfill
5294
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5295
  }
5296
5297
  /**
5298
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5299
   *
5300
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5301
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5302
   * @param bool    $before_needle [optional] <p>
5303
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5304
   *                               haystack before the first occurrence of the needle (excluding the needle).
5305
   *                               </p>
5306
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5307
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5308 17
   *
5309
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5310 17
   */
5311 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5312 17
  {
5313
    $haystack = (string)$haystack;
5314 17
    $needle = (string)$needle;
5315 6
    $before_needle = (bool)$before_needle;
5316
5317
    if (!isset($haystack[0], $needle[0])) {
5318 11
      return false;
5319 1
    }
5320 1
5321
    if ($encoding !== 'UTF-8') {
5322 11
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5323
    }
5324
5325 1
    if ($cleanUtf8 === true) {
5326 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5327 1
      // if invalid characters are found in $haystack before $needle
5328
      $needle = self::clean($needle);
5329 11
      $haystack = self::clean($haystack);
5330
    }
5331
5332
    if (!$needle) {
5333 11
      return $haystack;
5334
    }
5335
5336
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5337
      self::checkForSupport();
5338
    }
5339 11
5340 1 View Code Duplication
    if (
5341 11
        $encoding !== 'UTF-8'
5342
        &&
5343
        self::$SUPPORT['mbstring'] === false
5344
    ) {
5345 11
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5346 11
    }
5347
5348
    if (self::$SUPPORT['mbstring'] === true) {
5349
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5350
    }
5351
5352
    if (
5353
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5354
        &&
5355
        self::$SUPPORT['intl'] === true
5356
        &&
5357
        Bootup::is_php('5.4') === true
5358
    ) {
5359
      return \grapheme_stristr($haystack, $needle, $before_needle);
5360
    }
5361
5362
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
5363
      return stristr($haystack, $needle, $before_needle);
5364
    }
5365
5366
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5367
5368
    if (!isset($match[1])) {
5369
      return false;
5370
    }
5371
5372
    if ($before_needle) {
5373
      return $match[1];
5374
    }
5375
5376
    return self::substr($haystack, self::strlen($match[1]));
5377
  }
5378
5379
  /**
5380
   * Get the string length, not the byte-length!
5381
   *
5382
   * @link     http://php.net/manual/en/function.mb-strlen.php
5383
   *
5384
   * @param string  $str       <p>The string being checked for length.</p>
5385
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5386
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5387
   *
5388 88
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5389
   *             character counted as +1)</p>
5390 88
   */
5391
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5392 88
  {
5393 5
    $str = (string)$str;
5394
5395
    if (!isset($str[0])) {
5396
      return 0;
5397
    }
5398 87
5399 13 View Code Duplication
    if (
5400 87
        $encoding === 'UTF-8'
5401 78
        ||
5402 78
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5403 12
    ) {
5404
      $encoding = 'UTF-8';
5405
    } else {
5406 87
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5407
    }
5408
5409
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5410
      self::checkForSupport();
5411 87
    }
5412 87
5413
    switch ($encoding) {
5414
      case 'ASCII':
5415 10
      case 'CP850':
5416 10
      case '8BIT':
5417 10
        if (
5418 10
            $encoding === 'CP850'
5419
            &&
5420
            self::$SUPPORT['mbstring_func_overload'] === false
5421
        ) {
5422
          return strlen($str);
5423
        }
5424 79
5425
        return \mb_strlen($str, '8BIT');
5426
    }
5427 2
5428 2
    if ($cleanUtf8 === true) {
5429
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5430
      // if invalid characters are found in $str
5431
      $str = self::clean($str);
5432 79
    }
5433 2
5434 79 View Code Duplication
    if (
5435
        $encoding !== 'UTF-8'
5436 79
        &&
5437
        self::$SUPPORT['mbstring'] === false
5438
        &&
5439
        self::$SUPPORT['iconv'] === false
5440
    ) {
5441
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5442 79
    }
5443 2
5444 79
    if (
5445 2
        $encoding !== 'UTF-8'
5446 79
        &&
5447
        self::$SUPPORT['iconv'] === true
5448
        &&
5449
        self::$SUPPORT['mbstring'] === false
5450 79
    ) {
5451 79
      return \iconv_strlen($str, $encoding);
5452
    }
5453
5454
    if (self::$SUPPORT['mbstring'] === true) {
5455
      return \mb_strlen($str, $encoding);
5456
    }
5457
5458
    if (self::$SUPPORT['iconv'] === true) {
5459
      return \iconv_strlen($str, $encoding);
5460
    }
5461
5462
    if (
5463
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5464
        &&
5465
        self::$SUPPORT['intl'] === true
5466
        &&
5467
        Bootup::is_php('5.4') === true
5468
    ) {
5469
      return \grapheme_strlen($str);
5470
    }
5471
5472
    if (self::is_ascii($str)) {
5473
      return strlen($str);
5474
    }
5475
5476
    // fallback via vanilla php
5477
    preg_match_all('/./us', $str, $parts);
5478
    $returnTmp = count($parts[0]);
5479
    if ($returnTmp !== 0) {
5480
      return $returnTmp;
5481
    }
5482
5483
    // fallback to "mb_"-function via polyfill
5484
    return \mb_strlen($str, $encoding);
5485
  }
5486
5487
  /**
5488
   * Case insensitive string comparisons using a "natural order" algorithm.
5489
   *
5490
   * INFO: natural order version of UTF8::strcasecmp()
5491
   *
5492
   * @param string $str1 <p>The first string.</p>
5493
   * @param string $str2 <p>The second string.</p>
5494
   *
5495 1
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5496
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5497 1
   *             <strong>0</strong> if they are equal
5498
   */
5499
  public static function strnatcasecmp($str1, $str2)
5500
  {
5501
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5502
  }
5503
5504
  /**
5505
   * String comparisons using a "natural order" algorithm
5506
   *
5507
   * INFO: natural order version of UTF8::strcmp()
5508
   *
5509
   * @link  http://php.net/manual/en/function.strnatcmp.php
5510
   *
5511
   * @param string $str1 <p>The first string.</p>
5512
   * @param string $str2 <p>The second string.</p>
5513
   *
5514 2
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5515
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5516 2
   *             <strong>0</strong> if they are equal
5517
   */
5518
  public static function strnatcmp($str1, $str2)
5519
  {
5520
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5521
  }
5522
5523
  /**
5524
   * Case-insensitive string comparison of the first n characters.
5525
   *
5526
   * @link  http://php.net/manual/en/function.strncasecmp.php
5527
   *
5528
   * @param string $str1 <p>The first string.</p>
5529
   * @param string $str2 <p>The second string.</p>
5530
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5531
   *
5532 1
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5533
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5534 1
   *             <strong>0</strong> if they are equal
5535
   */
5536
  public static function strncasecmp($str1, $str2, $len)
5537
  {
5538
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5539
  }
5540
5541
  /**
5542
   * String comparison of the first n characters.
5543
   *
5544
   * @link  http://php.net/manual/en/function.strncmp.php
5545
   *
5546
   * @param string $str1 <p>The first string.</p>
5547
   * @param string $str2 <p>The second string.</p>
5548
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5549
   *
5550 2
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5551
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5552 2
   *             <strong>0</strong> if they are equal
5553 2
   */
5554
  public static function strncmp($str1, $str2, $len)
5555 2
  {
5556
    $str1 = (string)self::substr($str1, 0, $len);
5557
    $str2 = (string)self::substr($str2, 0, $len);
5558
5559
    return self::strcmp($str1, $str2);
5560
  }
5561
5562
  /**
5563
   * Search a string for any of a set of characters.
5564
   *
5565
   * @link  http://php.net/manual/en/function.strpbrk.php
5566
   *
5567
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5568 1
   * @param string $char_list <p>This parameter is case sensitive.</p>
5569
   *
5570 1
   * @return string String starting from the character found, or false if it is not found.
5571 1
   */
5572
  public static function strpbrk($haystack, $char_list)
5573 1
  {
5574 1
    $haystack = (string)$haystack;
5575
    $char_list = (string)$char_list;
5576
5577 1
    if (!isset($haystack[0], $char_list[0])) {
5578 1
      return false;
5579
    }
5580
5581 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5582
      return substr($haystack, strpos($haystack, $m[0]));
5583
    }
5584
5585
    return false;
5586
  }
5587
5588
  /**
5589
   * Find position of first occurrence of string in a string.
5590
   *
5591
   * @link http://php.net/manual/en/function.mb-strpos.php
5592
   *
5593
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5594
   * @param string  $needle    <p>The string to find in haystack.</p>
5595
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5596
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5597
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5598
   *
5599
   * @return int|false <p>
5600 58
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5601
   *                   If needle is not found it returns false.
5602 58
   *                   </p>
5603 58
   */
5604
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5605 58
  {
5606 3
    $haystack = (string)$haystack;
5607
    $needle = (string)$needle;
5608
5609
    if (!isset($haystack[0], $needle[0])) {
5610 57
      return false;
5611
    }
5612
5613
    // init
5614 57
    $offset = (int)$offset;
5615
5616
    // iconv and mbstring do not support integer $needle
5617
5618 57
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5619
      $needle = (string)self::chr($needle);
5620
    }
5621 2
5622 2
    if ($cleanUtf8 === true) {
5623 2
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5624
      // if invalid characters are found in $haystack before $needle
5625
      $needle = self::clean($needle);
5626
      $haystack = self::clean($haystack);
5627 57
    }
5628 42
5629 57 View Code Duplication
    if (
5630 17
        $encoding === 'UTF-8'
5631 17
        ||
5632 41
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5633
    ) {
5634
      $encoding = 'UTF-8';
5635 57
    } else {
5636
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5637
    }
5638
5639
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5640
      self::checkForSupport();
5641 57
    }
5642 41
5643 57
    if (
5644 41
        $encoding === 'CP850'
5645
        &&
5646
        self::$SUPPORT['mbstring_func_overload'] === false
5647
    ) {
5648
      return strpos($haystack, $needle, $offset);
5649 17
    }
5650 17
5651 17 View Code Duplication
    if (
5652 1
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5653 17
        &
5654
        self::$SUPPORT['iconv'] === true
5655
        &&
5656
        self::$SUPPORT['mbstring'] === false
5657
    ) {
5658
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5659 17
    }
5660
5661 17
    if (
5662 1
        $offset >= 0 // iconv_strpos() can't handle negative offset
5663 17
        &&
5664
        $encoding !== 'UTF-8'
5665 17
        &&
5666
        self::$SUPPORT['mbstring'] === false
5667
        &&
5668
        self::$SUPPORT['iconv'] === true
5669
    ) {
5670
      // ignore invalid negative offset to keep compatibility
5671 17
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5672 17
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5673
    }
5674
5675
    if (self::$SUPPORT['mbstring'] === true) {
5676
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5677
    }
5678
5679
    if (
5680
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5681
        &&
5682
        self::$SUPPORT['intl'] === true
5683
        &&
5684
        Bootup::is_php('5.4') === true
5685
    ) {
5686
      return \grapheme_strpos($haystack, $needle, $offset);
5687
    }
5688
5689
    if (
5690
        $offset >= 0 // iconv_strpos() can't handle negative offset
5691
        &&
5692
        self::$SUPPORT['iconv'] === true
5693
    ) {
5694
      // ignore invalid negative offset to keep compatibility
5695
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5696
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5697
    }
5698
5699
    $haystackIsAscii = self::is_ascii($haystack);
5700
    if ($haystackIsAscii && self::is_ascii($needle)) {
5701
      return strpos($haystack, $needle, $offset);
5702
    }
5703
5704
    // fallback via vanilla php
5705
5706
    if ($haystackIsAscii) {
5707
      $haystackTmp = substr($haystack, $offset);
5708
    } else {
5709
      $haystackTmp = self::substr($haystack, $offset);
5710
    }
5711
    if ($haystackTmp === false) {
5712
      $haystackTmp = '';
5713
    }
5714
    $haystack = (string)$haystackTmp;
5715
5716
    if ($offset < 0) {
5717
      $offset = 0;
5718
    }
5719
5720
    $pos = strpos($haystack, $needle);
5721
    if ($pos === false) {
5722
      return false;
5723
    }
5724
5725
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5726
    if ($returnTmp !== false) {
5727
      return $returnTmp;
5728
    }
5729
5730
    // fallback to "mb_"-function via polyfill
5731
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5732
  }
5733
5734
  /**
5735
   * Finds the last occurrence of a character in a string within another.
5736
   *
5737
   * @link http://php.net/manual/en/function.mb-strrchr.php
5738
   *
5739
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5740
   * @param string $needle        <p>The string to find in haystack</p>
5741
   * @param bool   $before_needle [optional] <p>
5742
   *                              Determines which portion of haystack
5743
   *                              this function returns.
5744
   *                              If set to true, it returns all of haystack
5745
   *                              from the beginning to the last occurrence of needle.
5746
   *                              If set to false, it returns all of haystack
5747
   *                              from the last occurrence of needle to the end,
5748
   *                              </p>
5749
   * @param string $encoding      [optional] <p>
5750
   *                              Character encoding name to use.
5751
   *                              If it is omitted, internal character encoding is used.
5752
   *                              </p>
5753 1
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5754
   *
5755 1
   * @return string|false The portion of haystack or false if needle is not found.
5756 1
   */
5757 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5758
  {
5759 1
    if ($encoding !== 'UTF-8') {
5760
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5761
    }
5762
5763
    if ($cleanUtf8 === true) {
5764
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5765
      // if invalid characters are found in $haystack before $needle
5766
      $needle = self::clean($needle);
5767 1
      $haystack = self::clean($haystack);
5768
    }
5769
5770
    // fallback to "mb_"-function via polyfill
5771
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5772
  }
5773
5774
  /**
5775
   * Reverses characters order in the string.
5776
   *
5777 4
   * @param string $str The input string
5778
   *
5779 4
   * @return string The string with characters in the reverse sequence
5780
   */
5781 4
  public static function strrev($str)
5782 2
  {
5783
    $str = (string)$str;
5784
5785 3
    if (!isset($str[0])) {
5786
      return '';
5787
    }
5788
5789
    return implode('', array_reverse(self::split($str)));
5790
  }
5791
5792
  /**
5793
   * Finds the last occurrence of a character in a string within another, case insensitive.
5794
   *
5795
   * @link http://php.net/manual/en/function.mb-strrichr.php
5796
   *
5797
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5798
   * @param string  $needle        <p>The string to find in haystack.</p>
5799
   * @param bool    $before_needle [optional] <p>
5800
   *                               Determines which portion of haystack
5801
   *                               this function returns.
5802
   *                               If set to true, it returns all of haystack
5803
   *                               from the beginning to the last occurrence of needle.
5804
   *                               If set to false, it returns all of haystack
5805
   *                               from the last occurrence of needle to the end,
5806
   *                               </p>
5807
   * @param string  $encoding      [optional] <p>
5808
   *                               Character encoding name to use.
5809
   *                               If it is omitted, internal character encoding is used.
5810
   *                               </p>
5811 1
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5812
   *
5813 1
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5814 1
   */
5815 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5816
  {
5817 1
    if ($encoding !== 'UTF-8') {
5818
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5819
    }
5820
5821
    if ($cleanUtf8 === true) {
5822
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5823
      // if invalid characters are found in $haystack before $needle
5824 1
      $needle = self::clean($needle);
5825
      $haystack = self::clean($haystack);
5826
    }
5827
5828
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5829
  }
5830
5831
  /**
5832
   * Find position of last occurrence of a case-insensitive string.
5833
   *
5834
   * @param string  $haystack  <p>The string to look in.</p>
5835
   * @param string  $needle    <p>The string to look for.</p>
5836
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5837
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5838
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5839
   *
5840
   * @return int|false <p>
5841 1
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5842
   *                   not found, it returns false.
5843 1
   *                   </p>
5844
   */
5845
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5846
  {
5847
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5848 1
      $needle = (string)self::chr($needle);
5849 1
    }
5850 1
5851
    // init
5852 1
    $haystack = (string)$haystack;
5853
    $needle = (string)$needle;
5854
    $offset = (int)$offset;
5855
5856
    if (!isset($haystack[0], $needle[0])) {
5857
      return false;
5858 1
    }
5859
5860 1 View Code Duplication
    if (
5861
        $cleanUtf8 === true
5862
        ||
5863
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5864
    ) {
5865
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5866
5867
      $needle = self::clean($needle);
5868
      $haystack = self::clean($haystack);
5869 1
    }
5870
5871 1 View Code Duplication
    if (
5872 1
        $encoding === 'UTF-8'
5873 1
        ||
5874
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5875
    ) {
5876
      $encoding = 'UTF-8';
5877 1
    } else {
5878
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5879
    }
5880
5881
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5882
      self::checkForSupport();
5883 1
    }
5884
5885 1 View Code Duplication
    if (
5886
        $encoding !== 'UTF-8'
5887
        &&
5888
        self::$SUPPORT['mbstring'] === false
5889 1
    ) {
5890 1
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5891
    }
5892
5893
    if (self::$SUPPORT['mbstring'] === true) {
5894
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5895
    }
5896
5897
    if (
5898
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5899
        &&
5900
        self::$SUPPORT['intl'] === true
5901
        &&
5902
        Bootup::is_php('5.4') === true
5903
    ) {
5904
      return \grapheme_strripos($haystack, $needle, $offset);
5905
    }
5906
5907
    // fallback via vanilla php
5908
5909
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5910
  }
5911
5912
  /**
5913
   * Find position of last occurrence of a string in a string.
5914
   *
5915
   * @link http://php.net/manual/en/function.mb-strrpos.php
5916
   *
5917
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5918
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5919
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5920
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5921
   *                              the end of the string.
5922
   *                              </p>
5923
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5924
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5925 10
   *
5926
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5927 10
   *                   is not found, it returns false.</p>
5928 2
   */
5929 2
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5930
  {
5931
    if ((int)$needle === $needle && $needle >= 0) {
5932 10
      $needle = (string)self::chr($needle);
5933 10
    }
5934 10
5935
    // init
5936 10
    $haystack = (string)$haystack;
5937 2
    $needle = (string)$needle;
5938
    $offset = (int)$offset;
5939
5940
    if (!isset($haystack[0], $needle[0])) {
5941
      return false;
5942 9
    }
5943
5944 9 View Code Duplication
    if (
5945
        $cleanUtf8 === true
5946 3
        ||
5947 3
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5948 3
    ) {
5949
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5950
      $needle = self::clean($needle);
5951
      $haystack = self::clean($haystack);
5952 9
    }
5953 1
5954 9 View Code Duplication
    if (
5955 9
        $encoding === 'UTF-8'
5956 9
        ||
5957 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5958
    ) {
5959
      $encoding = 'UTF-8';
5960 9
    } else {
5961
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5962
    }
5963
5964
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5965
      self::checkForSupport();
5966 9
    }
5967 1
5968 9 View Code Duplication
    if (
5969
        $encoding !== 'UTF-8'
5970
        &&
5971
        self::$SUPPORT['mbstring'] === false
5972 9
    ) {
5973 9
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5974
    }
5975
5976
    if (self::$SUPPORT['mbstring'] === true) {
5977
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5978
    }
5979
5980
    if (
5981
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5982
        &&
5983
        self::$SUPPORT['intl'] === true
5984
        &&
5985
        Bootup::is_php('5.4') === true
5986
    ) {
5987
      return \grapheme_strrpos($haystack, $needle, $offset);
5988
    }
5989
5990
    // fallback via vanilla php
5991
5992
    $haystackTmp = null;
5993
    if ($offset > 0) {
5994
      $haystackTmp = self::substr($haystack, $offset);
5995
    } elseif ($offset < 0) {
5996
      $haystackTmp = self::substr($haystack, 0, $offset);
5997
      $offset = 0;
5998
    }
5999
6000
    if ($haystackTmp !== null) {
6001
      if ($haystackTmp === false) {
6002
        $haystackTmp = '';
6003
      }
6004
      $haystack = (string)$haystackTmp;
6005
    }
6006
6007
    $pos = strrpos($haystack, $needle);
6008
    if ($pos === false) {
6009
      return false;
6010
    }
6011
6012
    return $offset + self::strlen(substr($haystack, 0, $pos));
6013
  }
6014
6015
  /**
6016
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6017
   * mask.
6018
   *
6019
   * @param string $str    <p>The input string.</p>
6020
   * @param string $mask   <p>The mask of chars</p>
6021
   * @param int    $offset [optional]
6022 10
   * @param int    $length [optional]
6023
   *
6024 10
   * @return int
6025 2
   */
6026 2
  public static function strspn($str, $mask, $offset = 0, $length = null)
6027
  {
6028 View Code Duplication
    if ($offset || $length !== null) {
6029 2
      $strTmp = self::substr($str, $offset, $length);
6030 2
      if ($strTmp === false) {
6031
        $strTmp = '';
6032 10
      }
6033 10
      $str = (string)$strTmp;
6034 2
    }
6035
6036
    $str = (string)$str;
6037 8
    if (!isset($str[0], $mask[0])) {
6038
      return 0;
6039
    }
6040
6041
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6042
  }
6043
6044
  /**
6045
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6046
   *
6047
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6048
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6049
   * @param bool    $before_needle [optional] <p>
6050
   *                               If <b>TRUE</b>, strstr() returns the part of the
6051
   *                               haystack before the first occurrence of the needle (excluding the needle).
6052
   *                               </p>
6053
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6054 2
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6055
   *
6056 2
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6057 2
   */
6058
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6059 2
  {
6060 1
    $haystack = (string)$haystack;
6061
    $needle = (string)$needle;
6062
6063 2
    if (!isset($haystack[0], $needle[0])) {
6064
      return false;
6065
    }
6066
6067
    if ($cleanUtf8 === true) {
6068
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6069
      // if invalid characters are found in $haystack before $needle
6070 2
      $needle = self::clean($needle);
6071 1
      $haystack = self::clean($haystack);
6072 1
    }
6073
6074 2
    if ($encoding !== 'UTF-8') {
6075
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6076
    }
6077
6078
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6079
      self::checkForSupport();
6080 2
    }
6081 1
6082 2 View Code Duplication
    if (
6083
        $encoding !== 'UTF-8'
6084
        &&
6085
        self::$SUPPORT['mbstring'] === false
6086 2
    ) {
6087 2
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6088
    }
6089
6090
    if (self::$SUPPORT['mbstring'] === true) {
6091
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6092
    }
6093
6094
    if (
6095
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6096
        &&
6097
        self::$SUPPORT['intl'] === true
6098
        &&
6099
        Bootup::is_php('5.4') === true
6100
    ) {
6101
      return \grapheme_strstr($haystack, $needle, $before_needle);
6102
    }
6103
6104
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6105
6106
    if (!isset($match[1])) {
6107
      return false;
6108
    }
6109
6110
    if ($before_needle) {
6111
      return $match[1];
6112
    }
6113
6114
    return self::substr($haystack, self::strlen($match[1]));
6115
  }
6116
6117
  /**
6118
   * Unicode transformation for case-less matching.
6119
   *
6120
   * @link http://unicode.org/reports/tr21/tr21-5.html
6121
   *
6122
   * @param string  $str       <p>The input string.</p>
6123
   * @param bool    $full      [optional] <p>
6124
   *                           <b>true</b>, replace full case folding chars (default)<br>
6125
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6126
   *                           </p>
6127 13
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6128
   *
6129
   * @return string
6130 13
   */
6131
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6132 13
  {
6133 4
    // init
6134
    $str = (string)$str;
6135
6136 12
    if (!isset($str[0])) {
6137 12
      return '';
6138
    }
6139 12
6140 1
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6141 1
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6142 1
6143
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6144 12
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6145
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6146 12
    }
6147
6148 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6149
6150 12
    if ($full) {
6151 1
6152 1
      static $FULL_CASE_FOLD = null;
6153
6154
      if ($FULL_CASE_FOLD === null) {
6155 12
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6156 12
      }
6157
6158 12
      /** @noinspection OffsetOperationsInspection */
6159 1
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6160 1
    }
6161
6162 12
    if ($cleanUtf8 === true) {
6163
      $str = self::clean($str);
6164
    }
6165
6166
    return self::strtolower($str);
6167
  }
6168
6169
  /**
6170
   * Make a string lowercase.
6171
   *
6172
   * @link http://php.net/manual/en/function.mb-strtolower.php
6173
   *
6174
   * @param string      $str       <p>The string being lowercased.</p>
6175
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6176
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6177 25
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6178
   *
6179
   * @return string str with all alphabetic characters converted to lowercase.
6180 25
   */
6181 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6182 25
  {
6183 3
    // init
6184
    $str = (string)$str;
6185
6186 23
    if (!isset($str[0])) {
6187
      return '';
6188
    }
6189 1
6190 1
    if ($cleanUtf8 === true) {
6191
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6192 23
      // if invalid characters are found in $haystack before $needle
6193 2
      $str = self::clean($str);
6194 2
    }
6195
6196 23
    if ($encoding !== 'UTF-8') {
6197 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6198
    }
6199
6200
    if ($lang !== null) {
6201
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6202 1
        self::checkForSupport();
6203 1
      }
6204 1
6205 1
      if (
6206
          self::$SUPPORT['intl'] === true
6207 1
          &&
6208 1
          Bootup::is_php('5.4') === true
6209
      ) {
6210
6211
        $langCode = $lang . '-Lower';
6212
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6213
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6214 1
6215
          $langCode = 'Any-Lower';
6216
        }
6217
6218
        return transliterator_transliterate($langCode, $str);
6219
      }
6220 23
6221
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6222
    }
6223
6224
    return \mb_strtolower($str, $encoding);
6225
  }
6226
6227
  /**
6228
   * Generic case sensitive transformation for collation matching.
6229
   *
6230 3
   * @param string $str <p>The input string</p>
6231
   *
6232
   * @return string
6233 3
   */
6234
  private static function strtonatfold($str)
6235
  {
6236
    /** @noinspection PhpUndefinedClassInspection */
6237
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6238
  }
6239
6240
  /**
6241
   * Make a string uppercase.
6242
   *
6243
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6244
   *
6245
   * @param string      $str       <p>The string being uppercased.</p>
6246
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6247
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6248 19
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6249
   *
6250 19
   * @return string str with all alphabetic characters converted to uppercase.
6251
   */
6252 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6253 3
  {
6254
    $str = (string)$str;
6255
6256 17
    if (!isset($str[0])) {
6257
      return '';
6258
    }
6259 1
6260 1
    if ($cleanUtf8 === true) {
6261
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6262 17
      // if invalid characters are found in $haystack before $needle
6263 2
      $str = self::clean($str);
6264 2
    }
6265
6266 17
    if ($encoding !== 'UTF-8') {
6267 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6268
    }
6269
6270
    if ($lang !== null) {
6271
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6272 1
        self::checkForSupport();
6273 1
      }
6274 1
6275 1
      if (
6276
          self::$SUPPORT['intl'] === true
6277 1
          &&
6278 1
          Bootup::is_php('5.4') === true
6279
      ) {
6280
6281
        $langCode = $lang . '-Upper';
6282
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6283
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6284 1
6285
          $langCode = 'Any-Upper';
6286
        }
6287
6288
        return transliterator_transliterate($langCode, $str);
6289
      }
6290 17
6291
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6292
    }
6293
6294
    return \mb_strtoupper($str, $encoding);
6295
  }
6296
6297
  /**
6298
   * Translate characters or replace sub-strings.
6299
   *
6300
   * @link  http://php.net/manual/en/function.strtr.php
6301
   *
6302
   * @param string          $str  <p>The string being translated.</p>
6303
   * @param string|string[] $from <p>The string replacing from.</p>
6304
   * @param string|string[] $to   <p>The string being translated to to.</p>
6305
   *
6306
   * @return string <p>
6307 1
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6308
   *                corresponding character in to.
6309 1
   *                </p>
6310
   */
6311 1
  public static function strtr($str, $from, $to = INF)
6312
  {
6313
    $str = (string)$str;
6314
6315 1
    if (!isset($str[0])) {
6316
      return '';
6317
    }
6318
6319 1
    if ($from === $to) {
6320 1
      return $str;
6321 1
    }
6322 1
6323 1
    if (INF !== $to) {
6324
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6324 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6325 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6325 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6326 1
      $countFrom = count($from);
6327 1
      $countTo = count($to);
6328 1
6329 1
      if ($countFrom > $countTo) {
6330
        $from = array_slice($from, 0, $countTo);
6331 1
      } elseif ($countFrom < $countTo) {
6332 1
        $to = array_slice($to, 0, $countFrom);
6333
      }
6334 1
6335 1
      $from = array_combine($from, $to);
6336
    }
6337
6338 1
    if (is_string($from)) {
6339
      return str_replace($from, '', $str);
6340
    }
6341
6342
    return strtr($str, $from);
6343
  }
6344
6345
  /**
6346
   * Return the width of a string.
6347
   *
6348
   * @param string  $str       <p>The input string.</p>
6349
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6350 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6351
   *
6352 1
   * @return int
6353 1
   */
6354 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6355
  {
6356 1
    if ($encoding !== 'UTF-8') {
6357
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6358
    }
6359 1
6360 1
    if ($cleanUtf8 === true) {
6361
      // iconv and mbstring are not tolerant to invalid encoding
6362
      // further, their behaviour is inconsistent with that of PHP's substr
6363 1
      $str = self::clean($str);
6364
    }
6365
6366
    // fallback to "mb_"-function via polyfill
6367
    return \mb_strwidth($str, $encoding);
6368
  }
6369
6370
  /**
6371
   * Changes all keys in an array.
6372
   *
6373
   * @param array $array <p>The array to work on</p>
6374
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6375
   *                     or <strong>CASE_LOWER</strong> (default)</p>
6376 1
   *
6377
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6378 1
   *                     input is not an array.</p>
6379
   */
6380
  public static function array_change_key_case($array, $case = CASE_LOWER)
6381
  {
6382
    if (!is_array($array)) {
6383
      return false;
6384 1
    }
6385
6386 1
    if (
6387
        $case !== CASE_LOWER
6388
        &&
6389
        $case !== CASE_UPPER
6390 1
    ) {
6391 1
      $case = CASE_UPPER;
6392 1
    }
6393 1
6394 1
    $return = array();
6395 1
    foreach ($array as $key => $value) {
6396
      if ($case === CASE_LOWER) {
6397
        $key = self::strtolower($key);
6398 1
      } else {
6399 1
        $key = self::strtoupper($key);
6400
      }
6401 1
6402
      $return[$key] = $value;
6403
    }
6404
6405
    return $return;
6406
  }
6407
6408
  /**
6409
   * Get part of a string.
6410
   *
6411
   * @link http://php.net/manual/en/function.mb-substr.php
6412
   *
6413
   * @param string  $str       <p>The string being checked.</p>
6414
   * @param int     $offset    <p>The first position used in str.</p>
6415
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6416
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6417
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6418
   *
6419 76
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6420
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6421
   *                      characters long, <b>FALSE</b> will be returned.</p>
6422 76
   */
6423
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6424 76
  {
6425 10
    // init
6426
    $str = (string)$str;
6427
6428
    if (!isset($str[0])) {
6429 74
      return '';
6430 3
    }
6431
6432
    // Empty string
6433 73
    if ($length === 0) {
6434
      return '';
6435
    }
6436 1
6437 1
    if ($cleanUtf8 === true) {
6438
      // iconv and mbstring are not tolerant to invalid encoding
6439
      // further, their behaviour is inconsistent with that of PHP's substr
6440 73
      $str = self::clean($str);
6441 1
    }
6442
6443
    // Whole string
6444 72
    if (!$offset && $length === null) {
6445 72
      return $str;
6446 49
    }
6447 49
6448
    $str_length = 0;
6449
    if ($offset || $length === null) {
6450 72
      $str_length = (int)self::strlen($str, $encoding);
6451 2
    }
6452
6453
    // Impossible
6454 70
    if ($offset && $offset > $str_length) {
6455 34
      return false;
6456 34
    }
6457 59
6458
    if ($length === null) {
6459
      $length = $str_length;
6460
    } else {
6461
      $length = (int)$length;
6462 70
    }
6463 23
6464 70 View Code Duplication
    if (
6465 49
        $encoding === 'UTF-8'
6466 49
        ||
6467 22
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6468
    ) {
6469
      $encoding = 'UTF-8';
6470 70
    } else {
6471
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6472
    }
6473
6474
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6475
      self::checkForSupport();
6476 70
    }
6477 21
6478 70
    if (
6479 21
        $encoding === 'CP850'
6480
        &&
6481
        self::$SUPPORT['mbstring_func_overload'] === false
6482
    ) {
6483
      return substr($str, $offset, $length === null ? $str_length : $length);
6484 49
    }
6485 1
6486 49 View Code Duplication
    if (
6487
        $encoding !== 'UTF-8'
6488
        &&
6489
        self::$SUPPORT['mbstring'] === false
6490 49
    ) {
6491 49
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6492
    }
6493
6494
    if (self::$SUPPORT['mbstring'] === true) {
6495
      return \mb_substr($str, $offset, $length, $encoding);
6496
    }
6497
6498
    if (
6499
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6500
        &&
6501
        self::$SUPPORT['intl'] === true
6502
        &&
6503
        Bootup::is_php('5.4') === true
6504
    ) {
6505
      return \grapheme_substr($str, $offset, $length);
6506
    }
6507
6508
    if (
6509
        $length >= 0 // "iconv_substr()" can't handle negative length
6510
        &&
6511
        self::$SUPPORT['iconv'] === true
6512
    ) {
6513
      return \iconv_substr($str, $offset, $length);
6514
    }
6515
6516
    if (self::is_ascii($str)) {
6517
      return ($length === null) ?
6518
          substr($str, $offset) :
6519
          substr($str, $offset, $length);
6520
    }
6521
6522
    // fallback via vanilla php
6523
6524
    // split to array, and remove invalid characters
6525
    $array = self::split($str);
6526
6527
    // extract relevant part, and join to make sting again
6528
    return implode('', array_slice($array, $offset, $length));
6529
  }
6530
6531
  /**
6532
   * Binary safe comparison of two strings from an offset, up to length characters.
6533
   *
6534
   * @param string  $str1               <p>The main string being compared.</p>
6535
   * @param string  $str2               <p>The secondary string being compared.</p>
6536
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6537
   *                                    counting from the end of the string.</p>
6538
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6539
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6540
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6541
   *                                    insensitive.</p>
6542
   *
6543
   * @return int <p>
6544
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6545 1
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6546
   *             <strong>0</strong> if they are equal.
6547
   *             </p>
6548
   */
6549 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6550
  {
6551 1
    if (
6552 1
        $offset !== 0
6553 1
        ||
6554
        $length !== null
6555
    ) {
6556 1
      $str1Tmp = self::substr($str1, $offset, $length);
6557
      if ($str1Tmp === false) {
6558 1
        $str1Tmp = '';
6559 1
      }
6560
      $str1 = (string)$str1Tmp;
6561
6562 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6563 1
      if ($str2Tmp === false) {
6564
        $str2Tmp = '';
6565 1
      }
6566 1
      $str2 = (string)$str2Tmp;
6567
    }
6568
6569 1
    if ($case_insensitivity === true) {
6570
      return self::strcasecmp($str1, $str2);
6571
    }
6572
6573
    return self::strcmp($str1, $str2);
6574
  }
6575
6576
  /**
6577
   * Count the number of substring occurrences.
6578
   *
6579
   * @link  http://php.net/manual/en/function.substr-count.php
6580
   *
6581
   * @param string  $haystack  <p>The string to search in.</p>
6582
   * @param string  $needle    <p>The substring to search for.</p>
6583
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6584
   * @param int     $length    [optional] <p>
6585
   *                           The maximum length after the specified offset to search for the
6586
   *                           substring. It outputs a warning if the offset plus the length is
6587
   *                           greater than the haystack length.
6588
   *                           </p>
6589
   * @param string  $encoding  <p>Set the charset.</p>
6590 1
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6591
   *
6592
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6593 1
   */
6594 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6595
  {
6596 1
    // init
6597 1
    $haystack = (string)$haystack;
6598
    $needle = (string)$needle;
6599
6600 1
    if (!isset($haystack[0], $needle[0])) {
6601
      return false;
6602 1
    }
6603 1
6604 1
    if ($offset || $length !== null) {
6605
6606 1
      if ($length === null) {
6607 1
        $length = (int)self::strlen($haystack);
6608
      }
6609
6610
      $offset = (int)$offset;
6611
      $length = (int)$length;
6612 1
6613
      if (
6614 1
          (
6615 1
              $length !== 0
6616 1
              &&
6617 1
              $offset !== 0
6618 1
          )
6619 1
          &&
6620 1
          $length + $offset <= 0
6621
          &&
6622
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6623 1
      ) {
6624 1
        return false;
6625
      }
6626
6627 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6628 1
      if ($haystackTmp === false) {
6629
        $haystackTmp = '';
6630 1
      }
6631 1
      $haystack = (string)$haystackTmp;
6632 1
    }
6633
6634 1
    if ($encoding !== 'UTF-8') {
6635
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6636
    }
6637
6638
    if ($cleanUtf8 === true) {
6639
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6640
      // if invalid characters are found in $haystack before $needle
6641 1
      $needle = self::clean($needle);
6642
      $haystack = self::clean($haystack);
6643
    }
6644
6645
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6646
      self::checkForSupport();
6647 1
    }
6648 1
6649 1 View Code Duplication
    if (
6650
        $encoding !== 'UTF-8'
6651
        &&
6652
        self::$SUPPORT['mbstring'] === false
6653 1
    ) {
6654 1
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6655
    }
6656
6657
    if (self::$SUPPORT['mbstring'] === true) {
6658
      return \mb_substr_count($haystack, $needle, $encoding);
6659
    }
6660
6661
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6662
6663
    return count($matches);
6664
  }
6665
6666
  /**
6667
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6668
   *
6669
   * @param string $haystack <p>The string to search in.</p>
6670 1
   * @param string $needle   <p>The substring to search for.</p>
6671
   *
6672
   * @return string <p>Return the sub-string.</p>
6673 1
   */
6674 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6675
  {
6676 1
    // init
6677 1
    $haystack = (string)$haystack;
6678
    $needle = (string)$needle;
6679
6680 1
    if (!isset($haystack[0])) {
6681 1
      return '';
6682
    }
6683
6684 1
    if (!isset($needle[0])) {
6685 1
      return $haystack;
6686 1
    }
6687
6688
    if (self::str_istarts_with($haystack, $needle) === true) {
6689 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6690 1
      if ($haystackTmp === false) {
6691
        $haystackTmp = '';
6692 1
      }
6693
      $haystack = (string)$haystackTmp;
6694
    }
6695
6696
    return $haystack;
6697
  }
6698
6699
  /**
6700
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6701
   *
6702
   * @param string $haystack <p>The string to search in.</p>
6703 1
   * @param string $needle   <p>The substring to search for.</p>
6704
   *
6705
   * @return string <p>Return the sub-string.</p>
6706 1
   */
6707 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6708
  {
6709 1
    // init
6710 1
    $haystack = (string)$haystack;
6711
    $needle = (string)$needle;
6712
6713 1
    if (!isset($haystack[0])) {
6714 1
      return '';
6715
    }
6716
6717 1
    if (!isset($needle[0])) {
6718 1
      return $haystack;
6719 1
    }
6720
6721
    if (self::str_iends_with($haystack, $needle) === true) {
6722 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6723 1
      if ($haystackTmp === false) {
6724
        $haystackTmp = '';
6725 1
      }
6726
      $haystack = (string)$haystackTmp;
6727
    }
6728
6729
    return $haystack;
6730
  }
6731
6732
  /**
6733
   * Removes an prefix ($needle) from start of the string ($haystack).
6734
   *
6735
   * @param string $haystack <p>The string to search in.</p>
6736 1
   * @param string $needle   <p>The substring to search for.</p>
6737
   *
6738
   * @return string <p>Return the sub-string.</p>
6739 1
   */
6740 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6741
  {
6742 1
    // init
6743 1
    $haystack = (string)$haystack;
6744
    $needle = (string)$needle;
6745
6746 1
    if (!isset($haystack[0])) {
6747 1
      return '';
6748
    }
6749
6750 1
    if (!isset($needle[0])) {
6751 1
      return $haystack;
6752 1
    }
6753
6754
    if (self::str_starts_with($haystack, $needle) === true) {
6755 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6756 1
      if ($haystackTmp === false) {
6757
        $haystackTmp = '';
6758 1
      }
6759
      $haystack = (string)$haystackTmp;
6760
    }
6761
6762
    return $haystack;
6763
  }
6764
6765
  /**
6766
   * Replace text within a portion of a string.
6767
   *
6768
   * source: https://gist.github.com/stemar/8287074
6769
   *
6770
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6771
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6772
   * @param int|int[]       $offset           <p>
6773
   *                                          If start is positive, the replacing will begin at the start'th offset
6774
   *                                          into string.
6775
   *                                          <br><br>
6776
   *                                          If start is negative, the replacing will begin at the start'th character
6777
   *                                          from the end of string.
6778
   *                                          </p>
6779
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6780
   *                                          portion of string which is to be replaced. If it is negative, it
6781
   *                                          represents the number of characters from the end of string at which to
6782
   *                                          stop replacing. If it is not given, then it will default to strlen(
6783
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6784
   *                                          length is zero then this function will have the effect of inserting
6785 7
   *                                          replacement into string at the given start offset.</p>
6786
   *
6787 7
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6788 1
   */
6789
  public static function substr_replace($str, $replacement, $offset, $length = null)
6790
  {
6791 1
    if (is_array($str) === true) {
6792 1
      $num = count($str);
6793 1
6794 1
      // the replacement
6795
      if (is_array($replacement) === true) {
6796
        $replacement = array_slice($replacement, 0, $num);
6797
      } else {
6798 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6799 1
      }
6800 1
6801 1
      // the offset
6802 1 View Code Duplication
      if (is_array($offset) === true) {
6803 1
        $offset = array_slice($offset, 0, $num);
6804 1
        foreach ($offset as &$valueTmp) {
6805 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6806 1
        }
6807 1
        unset($valueTmp);
6808
      } else {
6809
        $offset = array_pad(array($offset), $num, $offset);
6810
      }
6811 1
6812 1
      // the length
6813 1
      if (!isset($length)) {
6814 1
        $length = array_fill(0, $num, 0);
6815 1 View Code Duplication
      } elseif (is_array($length) === true) {
6816 1
        $length = array_slice($length, 0, $num);
6817
        foreach ($length as &$valueTmpV2) {
6818
          if (isset($valueTmpV2)) {
6819 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6820
          } else {
6821
            $valueTmpV2 = 0;
6822
          }
6823 1
        }
6824 1
        unset($valueTmpV2);
6825 1
      } else {
6826 1
        $length = array_pad(array($length), $num, $length);
6827
      }
6828
6829
      // recursive call
6830 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6831
    }
6832
6833 7
    if (is_array($replacement) === true) {
6834 1
      if (count($replacement) > 0) {
6835 1
        $replacement = $replacement[0];
6836 1
      } else {
6837 1
        $replacement = '';
6838
      }
6839 1
    }
6840
6841
    // init
6842 7
    $str = (string)$str;
6843 7
    $replacement = (string)$replacement;
6844
6845 7
    if (!isset($str[0])) {
6846 1
      return $replacement;
6847
    }
6848
6849 6
    if (self::is_ascii($str)) {
6850 3
      return ($length === null) ?
6851 3
          substr_replace($str, $replacement, $offset) :
6852 3
          substr_replace($str, $replacement, $offset, $length);
6853
    }
6854
6855 5
    preg_match_all('/./us', $str, $smatches);
6856 5
    preg_match_all('/./us', $replacement, $rmatches);
6857
6858 5
    if ($length === null) {
6859 3
      $length = (int)self::strlen($str);
6860 3
    }
6861
6862 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6863
6864 5
    return implode('', $smatches[0]);
6865
  }
6866
6867
  /**
6868
   * Removes an suffix ($needle) from end of the string ($haystack).
6869
   *
6870
   * @param string $haystack <p>The string to search in.</p>
6871
   * @param string $needle   <p>The substring to search for.</p>
6872
   *
6873
   * @return string <p>Return the sub-string.</p>
6874
   */
6875 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6876
  {
6877 1
    $haystack = (string)$haystack;
6878 1
    $needle = (string)$needle;
6879
6880 1
    if (!isset($haystack[0])) {
6881 1
      return '';
6882
    }
6883
6884 1
    if (!isset($needle[0])) {
6885 1
      return $haystack;
6886
    }
6887
6888 1
    if (self::str_ends_with($haystack, $needle) === true) {
6889 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6890 1
      if ($haystackTmp === false) {
6891
        $haystackTmp = '';
6892
      }
6893 1
      $haystack = (string)$haystackTmp;
6894 1
    }
6895
6896 1
    return $haystack;
6897
  }
6898
6899
  /**
6900
   * Returns a case swapped version of the string.
6901
   *
6902
   * @param string  $str       <p>The input string.</p>
6903
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6904
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6905
   *
6906
   * @return string <p>Each character's case swapped.</p>
6907
   */
6908 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6909
  {
6910 1
    $str = (string)$str;
6911
6912 1
    if (!isset($str[0])) {
6913 1
      return '';
6914
    }
6915
6916 1
    if ($encoding !== 'UTF-8') {
6917 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6918 1
    }
6919
6920 1
    if ($cleanUtf8 === true) {
6921
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6922
      // if invalid characters are found in $haystack before $needle
6923 1
      $str = self::clean($str);
6924 1
    }
6925
6926 1
    $strSwappedCase = preg_replace_callback(
6927 1
        '/[\S]/u',
6928
        function ($match) use ($encoding) {
6929 1
          $marchToUpper = self::strtoupper($match[0], $encoding);
6930
6931 1
          if ($match[0] === $marchToUpper) {
6932 1
            return self::strtolower($match[0], $encoding);
6933
          }
6934
6935 1
          return $marchToUpper;
6936 1
        },
6937
        $str
6938 1
    );
6939
6940 1
    return $strSwappedCase;
6941
  }
6942
6943
  /**
6944
   * alias for "UTF8::to_ascii()"
6945
   *
6946
   * @see UTF8::to_ascii()
6947
   *
6948
   * @param string $s
6949
   * @param string $subst_chr
6950
   * @param bool   $strict
6951
   *
6952
   * @return string
6953
   *
6954
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6955
   */
6956
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6957
  {
6958
    return self::to_ascii($s, $subst_chr, $strict);
6959
  }
6960
6961
  /**
6962
   * alias for "UTF8::to_iso8859()"
6963
   *
6964
   * @see UTF8::to_iso8859()
6965
   *
6966
   * @param string $str
6967
   *
6968
   * @return string|string[]
6969
   *
6970
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6971
   */
6972
  public static function toIso8859($str)
6973
  {
6974
    return self::to_iso8859($str);
6975
  }
6976
6977
  /**
6978
   * alias for "UTF8::to_latin1()"
6979
   *
6980
   * @see UTF8::to_latin1()
6981
   *
6982
   * @param $str
6983
   *
6984
   * @return string
6985
   *
6986
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6987
   */
6988
  public static function toLatin1($str)
6989
  {
6990
    return self::to_latin1($str);
6991
  }
6992
6993
  /**
6994
   * alias for "UTF8::to_utf8()"
6995
   *
6996
   * @see UTF8::to_utf8()
6997
   *
6998
   * @param string $str
6999
   *
7000
   * @return string
7001
   *
7002
   * @deprecated <p>use "UTF8::to_utf8()"</p>
7003
   */
7004
  public static function toUTF8($str)
7005
  {
7006
    return self::to_utf8($str);
7007
  }
7008
7009
  /**
7010
   * Convert a string into ASCII.
7011
   *
7012
   * @param string $str     <p>The input string.</p>
7013
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7014
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7015
   *                        performance</p>
7016
   *
7017
   * @return string
7018
   */
7019 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
7020
  {
7021 21
    static $UTF8_TO_ASCII;
7022
7023
    // init
7024 21
    $str = (string)$str;
7025
7026 21
    if (!isset($str[0])) {
7027 4
      return '';
7028
    }
7029
7030
    // check if we only have ASCII, first (better performance)
7031 18
    if (self::is_ascii($str) === true) {
7032 6
      return $str;
7033
    }
7034
7035 13
    $str = self::clean($str, true, true, true);
7036
7037
    // check again, if we only have ASCII, now ...
7038 13
    if (self::is_ascii($str) === true) {
7039 7
      return $str;
7040
    }
7041
7042 7
    if ($strict === true) {
7043 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7044
        self::checkForSupport();
7045
      }
7046
7047
      if (
7048 1
          self::$SUPPORT['intl'] === true
7049 1
          &&
7050 1
          Bootup::is_php('5.4') === true
7051 1
      ) {
7052
7053
        // HACK for issue from "transliterator_transliterate()"
7054 1
        $str = str_replace(
7055 1
            'ℌ',
7056 1
            'H',
7057
            $str
7058 1
        );
7059
7060 1
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7061
7062
        // check again, if we only have ASCII, now ...
7063 1
        if (self::is_ascii($str) === true) {
7064 1
          return $str;
7065
        }
7066
7067 1
      }
7068 1
    }
7069
7070 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7071 7
    $chars = $ar[0];
7072 7
    $ord = null;
7073 7
    foreach ($chars as &$c) {
7074
7075 7
      $ordC0 = ord($c[0]);
7076
7077 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7078 7
        continue;
7079
      }
7080
7081 7
      $ordC1 = ord($c[1]);
7082
7083
      // ASCII - next please
7084 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7085 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7086 7
      }
7087
7088 7
      if ($ordC0 >= 224) {
7089 2
        $ordC2 = ord($c[2]);
7090
7091 2
        if ($ordC0 <= 239) {
7092 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7093 2
        }
7094
7095 2
        if ($ordC0 >= 240) {
7096 1
          $ordC3 = ord($c[3]);
7097
7098 1
          if ($ordC0 <= 247) {
7099 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7100 1
          }
7101
7102 1
          if ($ordC0 >= 248) {
7103
            $ordC4 = ord($c[4]);
7104
7105 View Code Duplication
            if ($ordC0 <= 251) {
7106
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7107
            }
7108
7109
            if ($ordC0 >= 252) {
7110
              $ordC5 = ord($c[5]);
7111
7112 View Code Duplication
              if ($ordC0 <= 253) {
7113
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7114
              }
7115
            }
7116
          }
7117 1
        }
7118 2
      }
7119
7120 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7121
        $c = $unknown;
7122
        continue;
7123
      }
7124
7125 7
      if (null === $ord) {
7126
        $c = $unknown;
7127
        continue;
7128
      }
7129
7130 7
      $bank = $ord >> 8;
7131 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7132 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7133 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7134 1
          $UTF8_TO_ASCII[$bank] = array();
7135 1
        }
7136 3
      }
7137
7138 7
      $newchar = $ord & 255;
7139
7140 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7141
7142
        // keep for debugging
7143
        /*
7144
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7145
        echo "char: " . $c . "\n";
7146
        echo "ord: " . $ord . "\n";
7147
        echo "newchar: " . $newchar . "\n";
7148
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7149
        echo "bank:" . $bank . "\n\n";
7150
        */
7151
7152 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7153 7
      } else {
7154
7155
        // keep for debugging missing chars
7156
        /*
7157
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7158
        echo "char: " . $c . "\n";
7159
        echo "ord: " . $ord . "\n";
7160
        echo "newchar: " . $newchar . "\n";
7161
        echo "bank:" . $bank . "\n\n";
7162
        */
7163
7164 1
        $c = $unknown;
7165
      }
7166 7
    }
7167
7168 7
    return implode('', $chars);
7169
  }
7170
7171
  /**
7172
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7173
   *
7174
   * @param string|string[] $str
7175
   *
7176
   * @return string|string[]
7177
   */
7178 2
  public static function to_iso8859($str)
7179
  {
7180 2
    if (is_array($str) === true) {
7181
7182
      /** @noinspection ForeachSourceInspection */
7183 1
      foreach ($str as $k => $v) {
7184
        /** @noinspection AlterInForeachInspection */
7185
        /** @noinspection OffsetOperationsInspection */
7186 1
        $str[$k] = self::to_iso8859($v);
7187 1
      }
7188
7189 1
      return $str;
7190
    }
7191
7192 2
    $str = (string)$str;
7193
7194 2
    if (!isset($str[0])) {
7195 1
      return '';
7196
    }
7197
7198 2
    return self::utf8_decode($str);
7199
  }
7200
7201
  /**
7202
   * alias for "UTF8::to_iso8859()"
7203
   *
7204
   * @see UTF8::to_iso8859()
7205
   *
7206
   * @param string|string[] $str
7207
   *
7208
   * @return string|string[]
7209
   */
7210 1
  public static function to_latin1($str)
7211
  {
7212 1
    return self::to_iso8859($str);
7213
  }
7214
7215
  /**
7216
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7217
   *
7218
   * <ul>
7219
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7220
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7221
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7222
   * case.</li>
7223
   * </ul>
7224
   *
7225
   * @param string|string[] $str                    <p>Any string or array.</p>
7226
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7227
   *
7228
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7229
   */
7230 20
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7231
  {
7232 20
    if (is_array($str) === true) {
7233
      /** @noinspection ForeachSourceInspection */
7234 2
      foreach ($str as $k => $v) {
7235
        /** @noinspection AlterInForeachInspection */
7236
        /** @noinspection OffsetOperationsInspection */
7237 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7238 2
      }
7239
7240 2
      return $str;
7241
    }
7242
7243 20
    $str = (string)$str;
7244
7245 20
    if (!isset($str[0])) {
7246 3
      return $str;
7247
    }
7248
7249 20
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7250
      self::checkForSupport();
7251
    }
7252
7253 20 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7254
      $max = \mb_strlen($str, '8BIT');
7255
    } else {
7256 20
      $max = strlen($str);
7257
    }
7258
7259 20
    $buf = '';
7260
7261
    /** @noinspection ForeachInvariantsInspection */
7262 20
    for ($i = 0; $i < $max; $i++) {
7263 20
      $c1 = $str[$i];
7264
7265 20
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7266
7267 20
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7268
7269 18
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7270
7271 18
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7272 17
            $buf .= $c1 . $c2;
7273 17
            $i++;
7274 17
          } else { // not valid UTF8 - convert it
7275 6
            $buf .= self::to_utf8_convert($c1);
7276
          }
7277
7278 20
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7279
7280 19
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7281 19
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7282
7283 19
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7284 14
            $buf .= $c1 . $c2 . $c3;
7285 14
            $i += 2;
7286 14
          } else { // not valid UTF8 - convert it
7287 9
            $buf .= self::to_utf8_convert($c1);
7288
          }
7289
7290 20
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7291
7292 10
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7293 10
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7294 10
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7295
7296 10
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7297 4
            $buf .= $c1 . $c2 . $c3 . $c4;
7298 4
            $i += 3;
7299 4
          } else { // not valid UTF8 - convert it
7300 7
            $buf .= self::to_utf8_convert($c1);
7301
          }
7302
7303 10
        } else { // doesn't look like UTF8, but should be converted
7304 7
          $buf .= self::to_utf8_convert($c1);
7305
        }
7306
7307 20
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7308
7309 4
        $buf .= self::to_utf8_convert($c1);
7310
7311 4
      } else { // it doesn't need conversion
7312 18
        $buf .= $c1;
7313
      }
7314 20
    }
7315
7316
    // decode unicode escape sequences
7317 20
    $buf = preg_replace_callback(
7318 20
        '/\\\\u([0-9a-f]{4})/i',
7319 20
        function ($match) {
7320 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7321 20
        },
7322
        $buf
7323 20
    );
7324
7325
    // decode UTF-8 codepoints
7326 20
    if ($decodeHtmlEntityToUtf8 === true) {
7327 1
      $buf = self::html_entity_decode($buf);
7328 1
    }
7329
7330 20
    return $buf;
7331
  }
7332
7333
  /**
7334
   * @param int $int
7335
   *
7336
   * @return string
7337
   */
7338 14
  private static function to_utf8_convert($int)
7339
  {
7340 14
    $buf = '';
7341
7342 14
    $ordC1 = ord($int);
7343 14
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7344 1
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7345 1
    } else {
7346 14
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7347 14
      $cc2 = ($int & "\x3F") | "\x80";
7348 14
      $buf .= $cc1 . $cc2;
7349
    }
7350
7351 14
    return $buf;
7352
  }
7353
7354
  /**
7355
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7356
   *
7357
   * INFO: This is slower then "trim()"
7358
   *
7359
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7360
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7361
   *
7362
   * @param string $str   <p>The string to be trimmed</p>
7363
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7364
   *
7365
   * @return string <p>The trimmed string.</p>
7366
   */
7367 26
  public static function trim($str = '', $chars = INF)
7368
  {
7369 26
    $str = (string)$str;
7370
7371 26
    if (!isset($str[0])) {
7372 5
      return '';
7373
    }
7374
7375
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7376 22
    if ($chars === INF || !$chars) {
7377 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7378
    }
7379
7380 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7381
  }
7382
7383
  /**
7384
   * Makes string's first char uppercase.
7385
   *
7386
   * @param string  $str       <p>The input string.</p>
7387
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7388
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7389
   *
7390
   * @return string <p>The resulting string</p>
7391
   */
7392 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7393
  {
7394 14
    if ($cleanUtf8 === true) {
7395
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7396
      // if invalid characters are found in $haystack before $needle
7397
      $str = self::clean($str);
7398
    }
7399
7400 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7401 14
    if ($strPartTwo === false) {
7402
      $strPartTwo = '';
7403
    }
7404
7405 14
    $strPartOne = self::strtoupper(
7406 14
        (string)self::substr($str, 0, 1, $encoding),
7407 14
        $encoding,
7408
        $cleanUtf8
7409 14
    );
7410
7411 14
    return $strPartOne . $strPartTwo;
7412
  }
7413
7414
  /**
7415
   * alias for "UTF8::ucfirst()"
7416
   *
7417
   * @see UTF8::ucfirst()
7418
   *
7419
   * @param string  $word
7420
   * @param string  $encoding
7421
   * @param boolean $cleanUtf8
7422
   *
7423
   * @return string
7424
   */
7425 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7426
  {
7427 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7428
  }
7429
7430
  /**
7431
   * Uppercase for all words in the string.
7432
   *
7433
   * @param string   $str        <p>The input string.</p>
7434
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7435
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7436
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7437
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7438
   *
7439
   * @return string
7440
   */
7441 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7442
  {
7443 8
    if (!$str) {
7444 2
      return '';
7445
    }
7446
7447
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7448
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7449
7450 7
    if ($cleanUtf8 === true) {
7451
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7452
      // if invalid characters are found in $haystack before $needle
7453
      $str = self::clean($str);
7454
    }
7455
7456 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7457
7458
    if (
7459
        $usePhpDefaultFunctions === true
7460 7
        &&
7461 7
        self::is_ascii($str) === true
7462 7
    ) {
7463
      return ucwords($str);
7464
    }
7465
7466 7
    $words = self::str_to_words($str, $charlist);
7467 7
    $newWords = array();
7468
7469 7
    if (count($exceptions) > 0) {
7470 1
      $useExceptions = true;
7471 1
    } else {
7472 7
      $useExceptions = false;
7473
    }
7474
7475 7 View Code Duplication
    foreach ($words as $word) {
7476
7477 7
      if (!$word) {
7478 7
        continue;
7479
      }
7480
7481
      if (
7482
          $useExceptions === false
7483 7
          ||
7484
          (
7485
              $useExceptions === true
7486 1
              &&
7487 1
              !in_array($word, $exceptions, true)
7488 1
          )
7489 7
      ) {
7490 7
        $word = self::ucfirst($word, $encoding);
7491 7
      }
7492
7493 7
      $newWords[] = $word;
7494 7
    }
7495
7496 7
    return implode('', $newWords);
7497
  }
7498
7499
  /**
7500
   * Multi decode html entity & fix urlencoded-win1252-chars.
7501
   *
7502
   * e.g:
7503
   * 'test+test'                     => 'test test'
7504
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7505
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7506
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7507
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7508
   * 'Düsseldorf'                   => 'Düsseldorf'
7509
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7510
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7511
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7512
   *
7513
   * @param string $str          <p>The input string.</p>
7514
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7515
   *
7516
   * @return string
7517
   */
7518 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7519
  {
7520 1
    $str = (string)$str;
7521
7522 1
    if (!isset($str[0])) {
7523 1
      return '';
7524
    }
7525
7526 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7527 1
    if (preg_match($pattern, $str)) {
7528 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7529 1
    }
7530
7531 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7532
7533
    do {
7534 1
      $str_compare = $str;
7535
7536 1
      $str = self::fix_simple_utf8(
7537 1
          urldecode(
7538 1
              self::html_entity_decode(
7539 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7540
                  $flags
7541 1
              )
7542 1
          )
7543 1
      );
7544
7545 1
    } while ($multi_decode === true && $str_compare !== $str);
7546
7547 1
    return (string)$str;
7548
  }
7549
7550
  /**
7551
   * Return a array with "urlencoded"-win1252 -> UTF-8
7552
   *
7553
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7554
   *
7555
   * @return array
7556
   */
7557
  public static function urldecode_fix_win1252_chars()
7558
  {
7559
    return array(
7560
        '%20' => ' ',
7561
        '%21' => '!',
7562
        '%22' => '"',
7563
        '%23' => '#',
7564
        '%24' => '$',
7565
        '%25' => '%',
7566
        '%26' => '&',
7567
        '%27' => "'",
7568
        '%28' => '(',
7569
        '%29' => ')',
7570
        '%2A' => '*',
7571
        '%2B' => '+',
7572
        '%2C' => ',',
7573
        '%2D' => '-',
7574
        '%2E' => '.',
7575
        '%2F' => '/',
7576
        '%30' => '0',
7577
        '%31' => '1',
7578
        '%32' => '2',
7579
        '%33' => '3',
7580
        '%34' => '4',
7581
        '%35' => '5',
7582
        '%36' => '6',
7583
        '%37' => '7',
7584
        '%38' => '8',
7585
        '%39' => '9',
7586
        '%3A' => ':',
7587
        '%3B' => ';',
7588
        '%3C' => '<',
7589
        '%3D' => '=',
7590
        '%3E' => '>',
7591
        '%3F' => '?',
7592
        '%40' => '@',
7593
        '%41' => 'A',
7594
        '%42' => 'B',
7595
        '%43' => 'C',
7596
        '%44' => 'D',
7597
        '%45' => 'E',
7598
        '%46' => 'F',
7599
        '%47' => 'G',
7600
        '%48' => 'H',
7601
        '%49' => 'I',
7602
        '%4A' => 'J',
7603
        '%4B' => 'K',
7604
        '%4C' => 'L',
7605
        '%4D' => 'M',
7606
        '%4E' => 'N',
7607
        '%4F' => 'O',
7608
        '%50' => 'P',
7609
        '%51' => 'Q',
7610
        '%52' => 'R',
7611
        '%53' => 'S',
7612
        '%54' => 'T',
7613
        '%55' => 'U',
7614
        '%56' => 'V',
7615
        '%57' => 'W',
7616
        '%58' => 'X',
7617
        '%59' => 'Y',
7618
        '%5A' => 'Z',
7619
        '%5B' => '[',
7620
        '%5C' => '\\',
7621
        '%5D' => ']',
7622
        '%5E' => '^',
7623
        '%5F' => '_',
7624
        '%60' => '`',
7625
        '%61' => 'a',
7626
        '%62' => 'b',
7627
        '%63' => 'c',
7628
        '%64' => 'd',
7629
        '%65' => 'e',
7630
        '%66' => 'f',
7631
        '%67' => 'g',
7632
        '%68' => 'h',
7633
        '%69' => 'i',
7634
        '%6A' => 'j',
7635
        '%6B' => 'k',
7636
        '%6C' => 'l',
7637
        '%6D' => 'm',
7638
        '%6E' => 'n',
7639
        '%6F' => 'o',
7640
        '%70' => 'p',
7641
        '%71' => 'q',
7642
        '%72' => 'r',
7643
        '%73' => 's',
7644
        '%74' => 't',
7645
        '%75' => 'u',
7646
        '%76' => 'v',
7647
        '%77' => 'w',
7648
        '%78' => 'x',
7649
        '%79' => 'y',
7650
        '%7A' => 'z',
7651
        '%7B' => '{',
7652
        '%7C' => '|',
7653
        '%7D' => '}',
7654
        '%7E' => '~',
7655
        '%7F' => '',
7656
        '%80' => '`',
7657
        '%81' => '',
7658
        '%82' => '‚',
7659
        '%83' => 'ƒ',
7660
        '%84' => '„',
7661
        '%85' => '…',
7662
        '%86' => '†',
7663
        '%87' => '‡',
7664
        '%88' => 'ˆ',
7665
        '%89' => '‰',
7666
        '%8A' => 'Š',
7667
        '%8B' => '‹',
7668
        '%8C' => 'Œ',
7669
        '%8D' => '',
7670
        '%8E' => 'Ž',
7671
        '%8F' => '',
7672
        '%90' => '',
7673
        '%91' => '‘',
7674
        '%92' => '’',
7675
        '%93' => '“',
7676
        '%94' => '”',
7677
        '%95' => '•',
7678
        '%96' => '–',
7679
        '%97' => '—',
7680
        '%98' => '˜',
7681
        '%99' => '™',
7682
        '%9A' => 'š',
7683
        '%9B' => '›',
7684
        '%9C' => 'œ',
7685
        '%9D' => '',
7686
        '%9E' => 'ž',
7687
        '%9F' => 'Ÿ',
7688
        '%A0' => '',
7689
        '%A1' => '¡',
7690
        '%A2' => '¢',
7691
        '%A3' => '£',
7692
        '%A4' => '¤',
7693
        '%A5' => '¥',
7694
        '%A6' => '¦',
7695
        '%A7' => '§',
7696
        '%A8' => '¨',
7697
        '%A9' => '©',
7698
        '%AA' => 'ª',
7699
        '%AB' => '«',
7700
        '%AC' => '¬',
7701
        '%AD' => '',
7702
        '%AE' => '®',
7703
        '%AF' => '¯',
7704
        '%B0' => '°',
7705
        '%B1' => '±',
7706
        '%B2' => '²',
7707
        '%B3' => '³',
7708
        '%B4' => '´',
7709
        '%B5' => 'µ',
7710
        '%B6' => '¶',
7711
        '%B7' => '·',
7712
        '%B8' => '¸',
7713
        '%B9' => '¹',
7714
        '%BA' => 'º',
7715
        '%BB' => '»',
7716
        '%BC' => '¼',
7717
        '%BD' => '½',
7718
        '%BE' => '¾',
7719
        '%BF' => '¿',
7720
        '%C0' => 'À',
7721
        '%C1' => 'Á',
7722
        '%C2' => 'Â',
7723
        '%C3' => 'Ã',
7724
        '%C4' => 'Ä',
7725
        '%C5' => 'Å',
7726
        '%C6' => 'Æ',
7727
        '%C7' => 'Ç',
7728
        '%C8' => 'È',
7729
        '%C9' => 'É',
7730
        '%CA' => 'Ê',
7731
        '%CB' => 'Ë',
7732
        '%CC' => 'Ì',
7733
        '%CD' => 'Í',
7734
        '%CE' => 'Î',
7735
        '%CF' => 'Ï',
7736
        '%D0' => 'Ð',
7737
        '%D1' => 'Ñ',
7738
        '%D2' => 'Ò',
7739
        '%D3' => 'Ó',
7740
        '%D4' => 'Ô',
7741
        '%D5' => 'Õ',
7742
        '%D6' => 'Ö',
7743
        '%D7' => '×',
7744
        '%D8' => 'Ø',
7745
        '%D9' => 'Ù',
7746
        '%DA' => 'Ú',
7747
        '%DB' => 'Û',
7748
        '%DC' => 'Ü',
7749
        '%DD' => 'Ý',
7750
        '%DE' => 'Þ',
7751
        '%DF' => 'ß',
7752
        '%E0' => 'à',
7753
        '%E1' => 'á',
7754
        '%E2' => 'â',
7755
        '%E3' => 'ã',
7756
        '%E4' => 'ä',
7757
        '%E5' => 'å',
7758
        '%E6' => 'æ',
7759
        '%E7' => 'ç',
7760
        '%E8' => 'è',
7761
        '%E9' => 'é',
7762
        '%EA' => 'ê',
7763
        '%EB' => 'ë',
7764
        '%EC' => 'ì',
7765
        '%ED' => 'í',
7766
        '%EE' => 'î',
7767
        '%EF' => 'ï',
7768
        '%F0' => 'ð',
7769
        '%F1' => 'ñ',
7770
        '%F2' => 'ò',
7771
        '%F3' => 'ó',
7772
        '%F4' => 'ô',
7773
        '%F5' => 'õ',
7774
        '%F6' => 'ö',
7775
        '%F7' => '÷',
7776
        '%F8' => 'ø',
7777
        '%F9' => 'ù',
7778
        '%FA' => 'ú',
7779
        '%FB' => 'û',
7780
        '%FC' => 'ü',
7781
        '%FD' => 'ý',
7782
        '%FE' => 'þ',
7783
        '%FF' => 'ÿ',
7784
    );
7785
  }
7786
7787
  /**
7788
   * Decodes an UTF-8 string to ISO-8859-1.
7789
   *
7790
   * @param string $str <p>The input string.</p>
7791
   *
7792
   * @return string
7793
   */
7794 6
  public static function utf8_decode($str)
7795
  {
7796
    // init
7797 6
    $str = (string)$str;
7798
7799 6
    if (!isset($str[0])) {
7800 3
      return '';
7801
    }
7802
7803 6
    $str = (string)self::to_utf8($str);
7804
7805 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7806 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7807
7808 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7809 1
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7810 1
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7811 1
    }
7812
7813
    /** @noinspection PhpInternalEntityUsedInspection */
7814 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7815
7816 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7817
      self::checkForSupport();
7818
    }
7819
7820 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7821
      $len = \mb_strlen($str, '8BIT');
7822
    } else {
7823 6
      $len = strlen($str);
7824
    }
7825
7826
    /** @noinspection ForeachInvariantsInspection */
7827 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7828 6
      switch ($str[$i] & "\xF0") {
7829 6
        case "\xC0":
7830 6
        case "\xD0":
7831 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7832 6
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7833 6
          break;
7834
7835
        /** @noinspection PhpMissingBreakStatementInspection */
7836 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7837
          ++$i;
7838 6
        case "\xE0":
7839 4
          $str[$j] = '?';
7840 4
          $i += 2;
7841 4
          break;
7842
7843 6
        default:
7844 6
          $str[$j] = $str[$i];
7845 6
      }
7846 6
    }
7847
7848 6
    return (string)self::substr($str, 0, $j, '8BIT');
7849
  }
7850
7851
  /**
7852
   * Encodes an ISO-8859-1 string to UTF-8.
7853
   *
7854
   * @param string $str <p>The input string.</p>
7855
   *
7856
   * @return string
7857
   */
7858 7
  public static function utf8_encode($str)
7859
  {
7860
    // init
7861 7
    $str = (string)$str;
7862
7863 7
    if (!isset($str[0])) {
7864 7
      return '';
7865
    }
7866
7867 7
    $strTmp = \utf8_encode($str);
7868 7
    if ($strTmp === false) {
7869
      return '';
7870
    }
7871
7872 7
    $str = (string)$strTmp;
7873 7
    if (false === strpos($str, "\xC2")) {
7874 3
      return $str;
7875
    }
7876
7877 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7878 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7879
7880 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7881 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7882 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7883 1
    }
7884
7885 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7886
  }
7887
7888
  /**
7889
   * fix -> utf8-win1252 chars
7890
   *
7891
   * @param string $str <p>The input string.</p>
7892
   *
7893
   * @return string
7894
   *
7895
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7896
   */
7897
  public static function utf8_fix_win1252_chars($str)
7898
  {
7899
    return self::fix_simple_utf8($str);
7900
  }
7901
7902
  /**
7903
   * Returns an array with all utf8 whitespace characters.
7904
   *
7905
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7906
   *
7907
   * @author: Derek E. [email protected]
7908
   *
7909
   * @return array <p>
7910
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7911
   *               as defined in above URL.
7912
   *               </p>
7913
   */
7914 1
  public static function whitespace_table()
7915
  {
7916 1
    return self::$WHITESPACE_TABLE;
7917
  }
7918
7919
  /**
7920
   * Limit the number of words in a string.
7921
   *
7922
   * @param string $str      <p>The input string.</p>
7923
   * @param int    $limit    <p>The limit of words as integer.</p>
7924
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7925
   *
7926
   * @return string
7927
   */
7928 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7929
  {
7930 1
    $str = (string)$str;
7931
7932 1
    if (!isset($str[0])) {
7933 1
      return '';
7934
    }
7935
7936
    // init
7937 1
    $limit = (int)$limit;
7938
7939 1
    if ($limit < 1) {
7940 1
      return '';
7941
    }
7942
7943 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7944
7945
    if (
7946 1
        !isset($matches[0])
7947 1
        ||
7948 1
        self::strlen($str) === self::strlen($matches[0])
7949 1
    ) {
7950 1
      return $str;
7951
    }
7952
7953 1
    return self::rtrim($matches[0]) . $strAddOn;
7954
  }
7955
7956
  /**
7957
   * Wraps a string to a given number of characters
7958
   *
7959
   * @link  http://php.net/manual/en/function.wordwrap.php
7960
   *
7961
   * @param string $str   <p>The input string.</p>
7962
   * @param int    $width [optional] <p>The column width.</p>
7963
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7964
   * @param bool   $cut   [optional] <p>
7965
   *                      If the cut is set to true, the string is
7966
   *                      always wrapped at or before the specified width. So if you have
7967
   *                      a word that is larger than the given width, it is broken apart.
7968
   *                      </p>
7969
   *
7970
   * @return string <p>The given string wrapped at the specified column.</p>
7971
   */
7972 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7973
  {
7974 10
    $str = (string)$str;
7975 10
    $break = (string)$break;
7976
7977 10
    if (!isset($str[0], $break[0])) {
7978 3
      return '';
7979
    }
7980
7981 8
    $w = '';
7982 8
    $strSplit = explode($break, $str);
7983 8
    $count = count($strSplit);
7984
7985 8
    $chars = array();
7986
    /** @noinspection ForeachInvariantsInspection */
7987 8
    for ($i = 0; $i < $count; ++$i) {
7988
7989 8
      if ($i) {
7990 1
        $chars[] = $break;
7991 1
        $w .= '#';
7992 1
      }
7993
7994 8
      $c = $strSplit[$i];
7995 8
      unset($strSplit[$i]);
7996
7997 8
      foreach (self::split($c) as $c) {
7998 8
        $chars[] = $c;
7999 8
        $w .= ' ' === $c ? ' ' : '?';
8000 8
      }
8001 8
    }
8002
8003 8
    $strReturn = '';
8004 8
    $j = 0;
8005 8
    $b = $i = -1;
8006 8
    $w = wordwrap($w, $width, '#', $cut);
8007
8008 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8009 6
      for (++$i; $i < $b; ++$i) {
8010 6
        $strReturn .= $chars[$j];
8011 6
        unset($chars[$j++]);
8012 6
      }
8013
8014 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8015 3
        unset($chars[$j++]);
8016 3
      }
8017
8018 6
      $strReturn .= $break;
8019 6
    }
8020
8021 8
    return $strReturn . implode('', $chars);
8022
  }
8023
8024
  /**
8025
   * Returns an array of Unicode White Space characters.
8026
   *
8027
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
8028
   */
8029 1
  public static function ws()
8030
  {
8031 1
    return self::$WHITESPACE;
8032
  }
8033
8034
}
8035