Completed
Push — master ( a87fa0...1847a1 )
by Lars
06:18
created

UTF8::strpos()   F

Complexity

Conditions 32
Paths 945

Size

Total Lines 129
Code Lines 73

Duplication

Lines 18
Ratio 13.95 %

Code Coverage

Tests 54
CRAP Score 50.0601

Importance

Changes 0
Metric Value
dl 18
loc 129
ccs 54
cts 73
cp 0.7397
rs 2.1166
c 0
b 0
f 0
cc 32
eloc 73
nc 945
nop 5
crap 50.0601

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 16
  public function __construct()
817
  {
818 16
    self::checkForSupport();
819 16
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840 1
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    $convert = base_convert($bin, 2, 16);
878 1
    if ($convert === '0') {
879 1
      return '';
880
    }
881
882 1
    return pack('H*', $convert);
883
  }
884
885
  /**
886
   * Returns the UTF-8 Byte Order Mark Character.
887
   *
888
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
889
   *
890
   * @return string UTF-8 Byte Order Mark
891
   */
892 2
  public static function bom()
893
  {
894 2
    return "\xef\xbb\xbf";
895
  }
896
897
  /**
898
   * @alias of UTF8::chr_map()
899
   *
900
   * @see   UTF8::chr_map()
901
   *
902
   * @param string|array $callback
903
   * @param string       $str
904
   *
905
   * @return array
906
   */
907 1
  public static function callback($callback, $str)
908
  {
909 1
    return self::chr_map($callback, $str);
910
  }
911
912
  /**
913
   * This method will auto-detect your server environment for UTF-8 support.
914
   *
915
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
916
   */
917 19
  public static function checkForSupport()
918
  {
919 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
920
921 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
922
923
      // http://php.net/manual/en/book.mbstring.php
924 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
925
926
      if (
927 1
          defined('MB_OVERLOAD_STRING')
928 1
          &&
929 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
930 1
      ) {
931
        self::$SUPPORT['mbstring_func_overload'] = true;
932
      } else {
933 1
        self::$SUPPORT['mbstring_func_overload'] = false;
934
      }
935
936
      // http://php.net/manual/en/book.iconv.php
937 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
938
939
      // http://php.net/manual/en/book.intl.php
940 1
      self::$SUPPORT['intl'] = self::intl_loaded();
941 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
942
      if (
943 1
          self::$SUPPORT['intl'] === true
944 1
          &&
945 1
          function_exists('transliterator_list_ids') === true
946 1
      ) {
947
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
948
      }
949
950
      // http://php.net/manual/en/class.intlchar.php
951 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
952
953
      // http://php.net/manual/en/book.pcre.php
954 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
955 1
    }
956 19
  }
957
958
  /**
959
   * Generates a UTF-8 encoded character from the given code point.
960
   *
961
   * INFO: opposite to UTF8::ord()
962
   *
963
   * @param int    $code_point <p>The code point for which to generate a character.</p>
964
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
965
   *
966
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
967
   */
968 10
  public static function chr($code_point, $encoding = 'UTF-8')
969
  {
970
    // init
971 10
    static $CHAR_CACHE = array();
972
973 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
974
      self::checkForSupport();
975
    }
976
977 10
    if ($encoding !== 'UTF-8') {
978 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
979 2
    }
980
981 View Code Duplication
    if (
982
        $encoding !== 'UTF-8'
983 10
        &&
984
        $encoding !== 'WINDOWS-1252'
985 10
        &&
986 1
        self::$SUPPORT['mbstring'] === false
987 10
    ) {
988
      trigger_error('UTF8::chr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
989
    }
990
991 10
    $cacheKey = $code_point . $encoding;
992 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
993 8
      return $CHAR_CACHE[$cacheKey];
994
    }
995
996 9
    if (self::$SUPPORT['intlChar'] === true) {
997
      $str = \IntlChar::chr($code_point);
998
999
      if ($encoding !== 'UTF-8') {
1000
        $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1001
      }
1002
1003
      $CHAR_CACHE[$cacheKey] = $str;
1004
      return $str;
1005
    }
1006
1007
    // check type of code_point, only if there is no support for "\IntlChar"
1008 9
    if ((int)$code_point !== $code_point) {
1009 1
      $CHAR_CACHE[$cacheKey] = null;
1010 1
      return null;
1011
    }
1012
1013 9
    if ($code_point <= 0x7F) {
1014 7
      $str = self::chr_and_parse_int($code_point);
1015 9
    } elseif ($code_point <= 0x7FF) {
1016 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
1017 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1018 7
    } elseif ($code_point <= 0xFFFF) {
1019 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
1020 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1021 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1022 7
    } else {
1023 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
1024 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1025 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1026 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1027
    }
1028
1029 9
    if ($encoding !== 'UTF-8') {
1030 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1031 1
    }
1032
1033
    // add into static cache
1034 9
    $CHAR_CACHE[$cacheKey] = $str;
1035
1036 9
    return $str;
1037
  }
1038
1039
  /**
1040
   * @param int $int
1041
   *
1042
   * @return string
1043
   */
1044 26
  private static function chr_and_parse_int($int)
1045
  {
1046 26
    return chr((int)$int);
1047
  }
1048
1049
  /**
1050
   * Applies callback to all characters of a string.
1051
   *
1052
   * @param string|array $callback <p>The callback function.</p>
1053
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1054
   *
1055
   * @return array <p>The outcome of callback.</p>
1056
   */
1057 1
  public static function chr_map($callback, $str)
1058
  {
1059 1
    $chars = self::split($str);
1060
1061 1
    return array_map($callback, $chars);
1062
  }
1063
1064
  /**
1065
   * Generates an array of byte length of each character of a Unicode string.
1066
   *
1067
   * 1 byte => U+0000  - U+007F
1068
   * 2 byte => U+0080  - U+07FF
1069
   * 3 byte => U+0800  - U+FFFF
1070
   * 4 byte => U+10000 - U+10FFFF
1071
   *
1072
   * @param string $str <p>The original Unicode string.</p>
1073
   *
1074
   * @return array <p>An array of byte lengths of each character.</p>
1075
   */
1076 4
  public static function chr_size_list($str)
1077
  {
1078 4
    $str = (string)$str;
1079
1080 4
    if (!isset($str[0])) {
1081 3
      return array();
1082
    }
1083
1084 4
    return array_map(
1085
        function ($data) {
1086 4
          return UTF8::strlen($data, '8BIT');
1087 4
        },
1088 4
        self::split($str)
1089 4
    );
1090
  }
1091
1092
  /**
1093
   * Get a decimal code representation of a specific character.
1094
   *
1095
   * @param string $char <p>The input character.</p>
1096
   *
1097
   * @return int
1098
   */
1099 2
  public static function chr_to_decimal($char)
1100
  {
1101 2
    $char = (string)$char;
1102 2
    $code = self::ord($char[0]);
1103 2
    $bytes = 1;
1104
1105 2
    if (!($code & 0x80)) {
1106
      // 0xxxxxxx
1107 2
      return $code;
1108
    }
1109
1110 2
    if (($code & 0xe0) === 0xc0) {
1111
      // 110xxxxx
1112 2
      $bytes = 2;
1113 2
      $code &= ~0xc0;
1114 2
    } elseif (($code & 0xf0) === 0xe0) {
1115
      // 1110xxxx
1116 2
      $bytes = 3;
1117 2
      $code &= ~0xe0;
1118 2
    } elseif (($code & 0xf8) === 0xf0) {
1119
      // 11110xxx
1120 1
      $bytes = 4;
1121 1
      $code &= ~0xf0;
1122 1
    }
1123
1124 2
    for ($i = 2; $i <= $bytes; $i++) {
1125
      // 10xxxxxx
1126 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1127 2
    }
1128
1129 2
    return $code;
1130
  }
1131
1132
  /**
1133
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1134
   *
1135
   * @param string $char <p>The input character</p>
1136
   * @param string $pfix [optional]
1137
   *
1138
   * @return string <p>The code point encoded as U+xxxx<p>
1139
   */
1140 1
  public static function chr_to_hex($char, $pfix = 'U+')
1141
  {
1142 1
    $char = (string)$char;
1143
1144 1
    if (!isset($char[0])) {
1145 1
      return '';
1146
    }
1147
1148 1
    if ($char === '&#0;') {
1149
      $char = '';
1150
    }
1151
1152 1
    return self::int_to_hex(self::ord($char), $pfix);
1153
  }
1154
1155
  /**
1156
   * alias for "UTF8::chr_to_decimal()"
1157
   *
1158
   * @see UTF8::chr_to_decimal()
1159
   *
1160
   * @param string $chr
1161
   *
1162
   * @return int
1163
   */
1164 1
  public static function chr_to_int($chr)
1165
  {
1166 1
    return self::chr_to_decimal($chr);
1167
  }
1168
1169
  /**
1170
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1171
   *
1172
   * @param string $body     <p>The original string to be split.</p>
1173
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1174
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1175
   *
1176
   * @return string <p>The chunked string</p>
1177
   */
1178 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1179
  {
1180 1
    return implode($end, self::split($body, $chunklen));
1181
  }
1182
1183
  /**
1184
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1185
   *
1186
   * @param string $str                     <p>The string to be sanitized.</p>
1187
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1188
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1189
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1190
   *                                        => "..."</p>
1191
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1192
   *                                        $normalize_whitespace</p>
1193
   *
1194
   * @return string <p>Clean UTF-8 encoded string.</p>
1195
   */
1196 61
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1197
  {
1198
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1199
    // caused connection reset problem on larger strings
1200
1201
    $regx = '/
1202
      (
1203
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1204
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1205
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1206
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1207
        ){1,100}                      # ...one or more times
1208
      )
1209
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1210
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1211 61
    /x';
1212 61
    $str = preg_replace($regx, '$1', $str);
1213
1214 61
    $str = self::replace_diamond_question_mark($str, '');
1215 61
    $str = self::remove_invisible_characters($str);
1216
1217 61
    if ($normalize_whitespace === true) {
1218 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1219 36
    }
1220
1221 61
    if ($normalize_msword === true) {
1222 15
      $str = self::normalize_msword($str);
1223 15
    }
1224
1225 61
    if ($remove_bom === true) {
1226 35
      $str = self::remove_bom($str);
1227 35
    }
1228
1229 61
    return $str;
1230
  }
1231
1232
  /**
1233
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1234
   *
1235
   * @param string $str <p>The input string.</p>
1236
   *
1237
   * @return string
1238
   */
1239 21 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1240
  {
1241 21
    $str = (string)$str;
1242
1243 21
    if (!isset($str[0])) {
1244 2
      return '';
1245
    }
1246
1247
    // fixed ISO <-> UTF-8 Errors
1248 21
    $str = self::fix_simple_utf8($str);
1249
1250
    // remove all none UTF-8 symbols
1251
    // && remove diamond question mark (�)
1252
    // && remove remove invisible characters (e.g. "\0")
1253
    // && remove BOM
1254
    // && normalize whitespace chars (but keep non-breaking-spaces)
1255 21
    $str = self::clean($str, true, true, false, true);
1256
1257 21
    return (string)$str;
1258
  }
1259
1260
  /**
1261
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1262
   *
1263
   * INFO: opposite to UTF8::string()
1264
   *
1265
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1266
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1267
   *                                    default, code points will be returned as integers.</p>
1268
   *
1269
   * @return array <p>The array of code points.</p>
1270
   */
1271 7
  public static function codepoints($arg, $u_style = false)
1272
  {
1273 7
    if (is_string($arg) === true) {
1274 7
      $arg = self::split($arg);
1275 7
    }
1276
1277 7
    $arg = array_map(
1278
        array(
1279 7
            '\\voku\\helper\\UTF8',
1280 7
            'ord',
1281 7
        ),
1282
        $arg
1283 7
    );
1284
1285 7
    if ($u_style) {
1286 1
      $arg = array_map(
1287
          array(
1288 1
              '\\voku\\helper\\UTF8',
1289 1
              'int_to_hex',
1290 1
          ),
1291
          $arg
1292 1
      );
1293 1
    }
1294
1295 7
    return $arg;
1296
  }
1297
1298
  /**
1299
   * Returns count of characters used in a string.
1300
   *
1301
   * @param string $str       <p>The input string.</p>
1302
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1303
   *
1304
   * @return array <p>An associative array of Character as keys and
1305
   *               their count as values.</p>
1306
   */
1307 7
  public static function count_chars($str, $cleanUtf8 = false)
1308
  {
1309 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1310
  }
1311
1312
  /**
1313
   * Converts a int-value into an UTF-8 character.
1314
   *
1315
   * @param mixed $int
1316
   *
1317
   * @return string
1318
   */
1319 5
  public static function decimal_to_chr($int)
1320
  {
1321 5
    if (Bootup::is_php('5.4') === true) {
1322
      $flags = ENT_QUOTES | ENT_HTML5;
1323
    } else {
1324 5
      $flags = ENT_QUOTES;
1325
    }
1326
1327 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1328
  }
1329
1330
  /**
1331
   * Encode a string with a new charset-encoding.
1332
   *
1333
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1334
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1335
   *
1336
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1337
   * @param string $str      <p>The input string</p>
1338
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1339
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1340
   *
1341
   * @return string
1342
   */
1343 11
  public static function encode($encoding, $str, $force = true)
1344
  {
1345 11
    $str = (string)$str;
1346 11
    $encoding = (string)$encoding;
1347
1348 11
    if (!isset($str[0], $encoding[0])) {
1349 5
      return $str;
1350
    }
1351
1352 11
    if ($encoding !== 'UTF-8') {
1353 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1354 2
    }
1355
1356 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1357
      self::checkForSupport();
1358
    }
1359
1360 11
    $encodingDetected = self::str_detect_encoding($str);
1361
1362
    if (
1363
        $encodingDetected !== false
1364 11
        &&
1365
        (
1366
            $force === true
1367 11
            ||
1368
            $encodingDetected !== $encoding
1369 3
        )
1370 11
    ) {
1371
1372 View Code Duplication
      if (
1373
          $encoding === 'UTF-8'
1374 11
          &&
1375
          (
1376
              $force === true
1377 11
              || $encodingDetected === 'UTF-8'
1378 2
              || $encodingDetected === 'WINDOWS-1252'
1379 2
              || $encodingDetected === 'ISO-8859-1'
1380 2
          )
1381 11
      ) {
1382 11
        return self::to_utf8($str);
1383
      }
1384
1385 View Code Duplication
      if (
1386
          $encoding === 'ISO-8859-1'
1387 3
          &&
1388
          (
1389
              $force === true
1390 2
              || $encodingDetected === 'ISO-8859-1'
1391 1
              || $encodingDetected === 'WINDOWS-1252'
1392 1
              || $encodingDetected === 'UTF-8'
1393 1
          )
1394 3
      ) {
1395 2
        return self::to_iso8859($str);
1396
      }
1397
1398 View Code Duplication
      if (
1399
          $encoding !== 'UTF-8'
1400 2
          &&
1401
          $encoding !== 'WINDOWS-1252'
1402 2
          &&
1403 1
          self::$SUPPORT['mbstring'] === false
1404 2
      ) {
1405
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1406
      }
1407
1408 2
      $strEncoded = \mb_convert_encoding(
1409 2
          $str,
1410 2
          $encoding,
1411
          $encodingDetected
1412 2
      );
1413
1414 2
      if ($strEncoded) {
1415 2
        return $strEncoded;
1416
      }
1417
    }
1418
1419 1
    return $str;
1420
  }
1421
1422
  /**
1423
   * Reads entire file into a string.
1424
   *
1425
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1426
   *
1427
   * @link http://php.net/manual/en/function.file-get-contents.php
1428
   *
1429
   * @param string        $filename      <p>
1430
   *                                     Name of the file to read.
1431
   *                                     </p>
1432
   * @param int|false     $flags         [optional] <p>
1433
   *                                     Prior to PHP 6, this parameter is called
1434
   *                                     use_include_path and is a bool.
1435
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1436
   *                                     to trigger include path
1437
   *                                     search.
1438
   *                                     </p>
1439
   *                                     <p>
1440
   *                                     The value of flags can be any combination of
1441
   *                                     the following flags (with some restrictions), joined with the
1442
   *                                     binary OR (|)
1443
   *                                     operator.
1444
   *                                     </p>
1445
   *                                     <p>
1446
   *                                     <table>
1447
   *                                     Available flags
1448
   *                                     <tr valign="top">
1449
   *                                     <td>Flag</td>
1450
   *                                     <td>Description</td>
1451
   *                                     </tr>
1452
   *                                     <tr valign="top">
1453
   *                                     <td>
1454
   *                                     FILE_USE_INCLUDE_PATH
1455
   *                                     </td>
1456
   *                                     <td>
1457
   *                                     Search for filename in the include directory.
1458
   *                                     See include_path for more
1459
   *                                     information.
1460
   *                                     </td>
1461
   *                                     </tr>
1462
   *                                     <tr valign="top">
1463
   *                                     <td>
1464
   *                                     FILE_TEXT
1465
   *                                     </td>
1466
   *                                     <td>
1467
   *                                     As of PHP 6, the default encoding of the read
1468
   *                                     data is UTF-8. You can specify a different encoding by creating a
1469
   *                                     custom context or by changing the default using
1470
   *                                     stream_default_encoding. This flag cannot be
1471
   *                                     used with FILE_BINARY.
1472
   *                                     </td>
1473
   *                                     </tr>
1474
   *                                     <tr valign="top">
1475
   *                                     <td>
1476
   *                                     FILE_BINARY
1477
   *                                     </td>
1478
   *                                     <td>
1479
   *                                     With this flag, the file is read in binary mode. This is the default
1480
   *                                     setting and cannot be used with FILE_TEXT.
1481
   *                                     </td>
1482
   *                                     </tr>
1483
   *                                     </table>
1484
   *                                     </p>
1485
   * @param resource|null $context       [optional] <p>
1486
   *                                     A valid context resource created with
1487
   *                                     stream_context_create. If you don't need to use a
1488
   *                                     custom context, you can skip this parameter by &null;.
1489
   *                                     </p>
1490
   * @param int|null $offset             [optional] <p>
1491
   *                                     The offset where the reading starts.
1492
   *                                     </p>
1493
   * @param int|null $maxLength          [optional] <p>
1494
   *                                     Maximum length of data read. The default is to read until end
1495
   *                                     of file is reached.
1496
   *                                     </p>
1497
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1498
   *
1499
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1500
   *                                     or pdf, because they used non default utf-8 chars</p>
1501
   *
1502
   * @return string <p>The function returns the read data or false on failure.</p>
1503
   */
1504 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1505
  {
1506
    // init
1507 3
    $timeout = (int)$timeout;
1508 3
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1509
1510 3
    if ($timeout && $context === null) {
1511 2
      $context = stream_context_create(
1512
          array(
1513
              'http' =>
1514
                  array(
1515 2
                      'timeout' => $timeout,
1516 2
                  ),
1517
          )
1518 2
      );
1519 2
    }
1520
1521 3
    if (!$flags) {
1522 3
      $flags = false;
1523 3
    }
1524
1525 3
    if ($offset === null) {
1526 3
      $offset = 0;
1527 3
    }
1528
1529 3
    if (is_int($maxLength) === true) {
1530 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1531 1
    } else {
1532 3
      $data = file_get_contents($filename, $flags, $context, $offset);
1533
    }
1534
1535
    // return false on error
1536 3
    if ($data === false) {
1537 1
      return false;
1538
    }
1539
1540 2
    if ($convertToUtf8 === true) {
1541 2
      $data = self::encode('UTF-8', $data, false);
1542 2
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1543 2
    }
1544
1545 2
    return $data;
1546
  }
1547
1548
  /**
1549
   * Checks if a file starts with BOM (Byte Order Mark) character.
1550
   *
1551
   * @param string $file_path <p>Path to a valid file.</p>
1552
   *
1553
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1554
   */
1555 1
  public static function file_has_bom($file_path)
1556
  {
1557 1
    return self::string_has_bom(file_get_contents($file_path));
1558
  }
1559
1560
  /**
1561
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1562
   *
1563
   * @param mixed  $var
1564
   * @param int    $normalization_form
1565
   * @param string $leading_combining
1566
   *
1567
   * @return mixed
1568
   */
1569 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1570
  {
1571 9
    switch (gettype($var)) {
1572 9 View Code Duplication
      case 'array':
1573 3
        foreach ($var as $k => $v) {
1574
          /** @noinspection AlterInForeachInspection */
1575 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1576 3
        }
1577 3
        break;
1578 9 View Code Duplication
      case 'object':
1579 2
        foreach ($var as $k => $v) {
1580 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1581 2
        }
1582 2
        break;
1583 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1584
1585 8
        if (false !== strpos($var, "\r")) {
1586
          // Workaround https://bugs.php.net/65732
1587 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1588 2
        }
1589
1590 8
        if (self::is_ascii($var) === false) {
1591
          /** @noinspection PhpUndefinedClassInspection */
1592 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1593 6
            $n = '-';
1594 6
          } else {
1595
            /** @noinspection PhpUndefinedClassInspection */
1596 6
            $n = \Normalizer::normalize($var, $normalization_form);
1597
1598 6
            if (isset($n[0])) {
1599 3
              $var = $n;
1600 3
            } else {
1601 5
              $var = self::encode('UTF-8', $var, true);
1602
            }
1603
          }
1604
1605
          if (
1606 8
              $var[0] >= "\x80"
1607 8
              &&
1608 6
              isset($n[0], $leading_combining[0])
1609 8
              &&
1610 5
              preg_match('/^\p{Mn}/u', $var)
1611 8
          ) {
1612
            // Prevent leading combining chars
1613
            // for NFC-safe concatenations.
1614 2
            $var = $leading_combining . $var;
1615 2
          }
1616 8
        }
1617
1618 8
        break;
1619 9
    }
1620
1621 9
    return $var;
1622
  }
1623
1624
  /**
1625
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1626
   *
1627
   * Gets a specific external variable by name and optionally filters it
1628
   *
1629
   * @link  http://php.net/manual/en/function.filter-input.php
1630
   *
1631
   * @param int    $type          <p>
1632
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1633
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1634
   *                              <b>INPUT_ENV</b>.
1635
   *                              </p>
1636
   * @param string $variable_name <p>
1637
   *                              Name of a variable to get.
1638
   *                              </p>
1639
   * @param int    $filter        [optional] <p>
1640
   *                              The ID of the filter to apply. The
1641
   *                              manual page lists the available filters.
1642
   *                              </p>
1643
   * @param mixed  $options       [optional] <p>
1644
   *                              Associative array of options or bitwise disjunction of flags. If filter
1645
   *                              accepts options, flags can be provided in "flags" field of array.
1646
   *                              </p>
1647
   *
1648
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1649
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1650
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1651
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1652
   * @since 5.2.0
1653
   */
1654 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1655
  {
1656
    if (4 > func_num_args()) {
1657
      $var = filter_input($type, $variable_name, $filter);
1658
    } else {
1659
      $var = filter_input($type, $variable_name, $filter, $options);
1660
    }
1661
1662
    return self::filter($var);
1663
  }
1664
1665
  /**
1666
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1667
   *
1668
   * Gets external variables and optionally filters them
1669
   *
1670
   * @link  http://php.net/manual/en/function.filter-input-array.php
1671
   *
1672
   * @param int   $type       <p>
1673
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1674
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1675
   *                          <b>INPUT_ENV</b>.
1676
   *                          </p>
1677
   * @param mixed $definition [optional] <p>
1678
   *                          An array defining the arguments. A valid key is a string
1679
   *                          containing a variable name and a valid value is either a filter type, or an array
1680
   *                          optionally specifying the filter, flags and options. If the value is an
1681
   *                          array, valid keys are filter which specifies the
1682
   *                          filter type,
1683
   *                          flags which specifies any flags that apply to the
1684
   *                          filter, and options which specifies any options that
1685
   *                          apply to the filter. See the example below for a better understanding.
1686
   *                          </p>
1687
   *                          <p>
1688
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1689
   *                          input array are filtered by this filter.
1690
   *                          </p>
1691
   * @param bool  $add_empty  [optional] <p>
1692
   *                          Add missing keys as <b>NULL</b> to the return value.
1693
   *                          </p>
1694
   *
1695
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1696
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1697
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1698
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1699
   * fails.
1700
   * @since 5.2.0
1701
   */
1702 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1703
  {
1704
    if (2 > func_num_args()) {
1705
      $a = filter_input_array($type);
1706
    } else {
1707
      $a = filter_input_array($type, $definition, $add_empty);
1708
    }
1709
1710
    return self::filter($a);
1711
  }
1712
1713
  /**
1714
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1715
   *
1716
   * Filters a variable with a specified filter
1717
   *
1718
   * @link  http://php.net/manual/en/function.filter-var.php
1719
   *
1720
   * @param mixed $variable <p>
1721
   *                        Value to filter.
1722
   *                        </p>
1723
   * @param int   $filter   [optional] <p>
1724
   *                        The ID of the filter to apply. The
1725
   *                        manual page lists the available filters.
1726
   *                        </p>
1727
   * @param mixed $options  [optional] <p>
1728
   *                        Associative array of options or bitwise disjunction of flags. If filter
1729
   *                        accepts options, flags can be provided in "flags" field of array. For
1730
   *                        the "callback" filter, callable type should be passed. The
1731
   *                        callback must accept one argument, the value to be filtered, and return
1732
   *                        the value after filtering/sanitizing it.
1733
   *                        </p>
1734
   *                        <p>
1735
   *                        <code>
1736
   *                        // for filters that accept options, use this format
1737
   *                        $options = array(
1738
   *                        'options' => array(
1739
   *                        'default' => 3, // value to return if the filter fails
1740
   *                        // other options here
1741
   *                        'min_range' => 0
1742
   *                        ),
1743
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1744
   *                        );
1745
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1746
   *                        // for filter that only accept flags, you can pass them directly
1747
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1748
   *                        // for filter that only accept flags, you can also pass as an array
1749
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1750
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1751
   *                        // callback validate filter
1752
   *                        function foo($value)
1753
   *                        {
1754
   *                        // Expected format: Surname, GivenNames
1755
   *                        if (strpos($value, ", ") === false) return false;
1756
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1757
   *                        $empty = (empty($surname) || empty($givennames));
1758
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1759
   *                        if ($empty || $notstrings) {
1760
   *                        return false;
1761
   *                        } else {
1762
   *                        return $value;
1763
   *                        }
1764
   *                        }
1765
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1766
   *                        </code>
1767
   *                        </p>
1768
   *
1769
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1770
   * @since 5.2.0
1771
   */
1772 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1773
  {
1774 1
    if (3 > func_num_args()) {
1775 1
      $variable = filter_var($variable, $filter);
1776 1
    } else {
1777 1
      $variable = filter_var($variable, $filter, $options);
1778
    }
1779
1780 1
    return self::filter($variable);
1781
  }
1782
1783
  /**
1784
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1785
   *
1786
   * Gets multiple variables and optionally filters them
1787
   *
1788
   * @link  http://php.net/manual/en/function.filter-var-array.php
1789
   *
1790
   * @param array $data       <p>
1791
   *                          An array with string keys containing the data to filter.
1792
   *                          </p>
1793
   * @param mixed $definition [optional] <p>
1794
   *                          An array defining the arguments. A valid key is a string
1795
   *                          containing a variable name and a valid value is either a
1796
   *                          filter type, or an
1797
   *                          array optionally specifying the filter, flags and options.
1798
   *                          If the value is an array, valid keys are filter
1799
   *                          which specifies the filter type,
1800
   *                          flags which specifies any flags that apply to the
1801
   *                          filter, and options which specifies any options that
1802
   *                          apply to the filter. See the example below for a better understanding.
1803
   *                          </p>
1804
   *                          <p>
1805
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1806
   *                          input array are filtered by this filter.
1807
   *                          </p>
1808
   * @param bool  $add_empty  [optional] <p>
1809
   *                          Add missing keys as <b>NULL</b> to the return value.
1810
   *                          </p>
1811
   *
1812
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1813
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1814
   * the variable is not set.
1815
   * @since 5.2.0
1816
   */
1817 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1818
  {
1819 1
    if (2 > func_num_args()) {
1820 1
      $a = filter_var_array($data);
1821 1
    } else {
1822 1
      $a = filter_var_array($data, $definition, $add_empty);
1823
    }
1824
1825 1
    return self::filter($a);
1826
  }
1827
1828
  /**
1829
   * Check if the number of unicode characters are not more than the specified integer.
1830
   *
1831
   * @param string $str      The original string to be checked.
1832
   * @param int    $box_size The size in number of chars to be checked against string.
1833
   *
1834
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1835
   */
1836 1
  public static function fits_inside($str, $box_size)
1837
  {
1838 1
    return (self::strlen($str) <= $box_size);
1839
  }
1840
1841
  /**
1842
   * Try to fix simple broken UTF-8 strings.
1843
   *
1844
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1845
   *
1846
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1847
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1848
   * See: http://en.wikipedia.org/wiki/Windows-1252
1849
   *
1850
   * @param string $str <p>The input string</p>
1851
   *
1852
   * @return string
1853
   */
1854 26 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1855
  {
1856
    // init
1857 26
    $str = (string)$str;
1858
1859 26
    if (!isset($str[0])) {
1860 2
      return '';
1861
    }
1862
1863 26
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1864 26
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1865
1866 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1867 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1868 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1869 1
    }
1870
1871 26
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1872
  }
1873
1874
  /**
1875
   * Fix a double (or multiple) encoded UTF8 string.
1876
   *
1877
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1878
   *
1879
   * @return string|string[] <p>Will return the fixed input-"array" or
1880
   *                         the fixed input-"string".</p>
1881
   */
1882 1
  public static function fix_utf8($str)
1883
  {
1884 1
    if (is_array($str) === true) {
1885
1886
      /** @noinspection ForeachSourceInspection */
1887 1
      foreach ($str as $k => $v) {
1888
        /** @noinspection AlterInForeachInspection */
1889
        /** @noinspection OffsetOperationsInspection */
1890 1
        $str[$k] = self::fix_utf8($v);
1891 1
      }
1892
1893 1
      return $str;
1894
    }
1895
1896 1
    $last = '';
1897 1
    while ($last !== $str) {
1898 1
      $last = $str;
1899 1
      $str = self::to_utf8(
1900 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1899 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1901 1
      );
1902 1
    }
1903
1904 1
    return $str;
1905
  }
1906
1907
  /**
1908
   * Get character of a specific character.
1909
   *
1910
   * @param string $char
1911
   *
1912
   * @return string <p>'RTL' or 'LTR'</p>
1913
   */
1914 1
  public static function getCharDirection($char)
1915
  {
1916 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1917
      self::checkForSupport();
1918
    }
1919
1920 1
    if (self::$SUPPORT['intlChar'] === true) {
1921
      $tmpReturn = \IntlChar::charDirection($char);
1922
1923
      // from "IntlChar"-Class
1924
      $charDirection = array(
1925
          'RTL' => array(1, 13, 14, 15, 21),
1926
          'LTR' => array(0, 11, 12, 20),
1927
      );
1928
1929
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1930
        return 'LTR';
1931
      }
1932
1933
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1934
        return 'RTL';
1935
      }
1936
    }
1937
1938 1
    $c = static::chr_to_decimal($char);
1939
1940 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1941 1
      return 'LTR';
1942
    }
1943
1944 1
    if (0x85e >= $c) {
1945
1946 1
      if (0x5be === $c ||
1947 1
          0x5c0 === $c ||
1948 1
          0x5c3 === $c ||
1949 1
          0x5c6 === $c ||
1950 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1951 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1952 1
          0x608 === $c ||
1953 1
          0x60b === $c ||
1954 1
          0x60d === $c ||
1955 1
          0x61b === $c ||
1956 1
          (0x61e <= $c && 0x64a >= $c) ||
1957 1
          (0x66d <= $c && 0x66f >= $c) ||
1958 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1959 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1960 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1961 1
          (0x6fa <= $c && 0x70d >= $c) ||
1962 1
          0x710 === $c ||
1963 1
          (0x712 <= $c && 0x72f >= $c) ||
1964 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1965 1
          0x7b1 === $c ||
1966 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1967 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1968 1
          0x7fa === $c ||
1969 1
          (0x800 <= $c && 0x815 >= $c) ||
1970 1
          0x81a === $c ||
1971 1
          0x824 === $c ||
1972 1
          0x828 === $c ||
1973 1
          (0x830 <= $c && 0x83e >= $c) ||
1974 1
          (0x840 <= $c && 0x858 >= $c) ||
1975
          0x85e === $c
1976 1
      ) {
1977 1
        return 'RTL';
1978
      }
1979
1980 1
    } elseif (0x200f === $c) {
1981
1982
      return 'RTL';
1983
1984 1
    } elseif (0xfb1d <= $c) {
1985
1986 1
      if (0xfb1d === $c ||
1987 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1988 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1989 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1990 1
          0xfb3e === $c ||
1991 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1992 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1993 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1994 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1995 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1996 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1997 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1998 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1999 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
2000 1
          (0x10800 <= $c && 0x10805 >= $c) ||
2001 1
          0x10808 === $c ||
2002 1
          (0x1080a <= $c && 0x10835 >= $c) ||
2003 1
          (0x10837 <= $c && 0x10838 >= $c) ||
2004 1
          0x1083c === $c ||
2005 1
          (0x1083f <= $c && 0x10855 >= $c) ||
2006 1
          (0x10857 <= $c && 0x1085f >= $c) ||
2007 1
          (0x10900 <= $c && 0x1091b >= $c) ||
2008 1
          (0x10920 <= $c && 0x10939 >= $c) ||
2009 1
          0x1093f === $c ||
2010 1
          0x10a00 === $c ||
2011 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
2012 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
2013 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
2014 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
2015 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
2016 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
2017 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
2018 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
2019 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
2020
          (0x10b78 <= $c && 0x10b7f >= $c)
2021 1
      ) {
2022 1
        return 'RTL';
2023
      }
2024
    }
2025
2026 1
    return 'LTR';
2027
  }
2028
2029
  /**
2030
   * get data from "/data/*.ser"
2031
   *
2032
   * @param string $file
2033
   *
2034
   * @return bool|string|array|int <p>Will return false on error.</p>
2035
   */
2036 4
  private static function getData($file)
2037
  {
2038 4
    $file = __DIR__ . '/data/' . $file . '.php';
2039 4
    if (file_exists($file)) {
2040
      /** @noinspection PhpIncludeInspection */
2041 4
      return require $file;
2042
    }
2043
2044 1
    return false;
2045
  }
2046
2047
  /**
2048
   * Check for php-support.
2049
   *
2050
   * @param string|null $key
2051
   *
2052
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2053
   *               return bool-value, if $key is used and available<br>
2054
   *               otherwise return null</p>
2055
   */
2056 19
  public static function getSupportInfo($key = null)
2057
  {
2058 19
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2059
      self::checkForSupport();
2060
    }
2061
2062 19
    if ($key === null) {
2063 2
      return self::$SUPPORT;
2064
    }
2065
2066 18
    if (!isset(self::$SUPPORT[$key])) {
2067 1
      return null;
2068
    }
2069
2070 17
    return self::$SUPPORT[$key];
2071
  }
2072
2073
  /**
2074
   * alias for "UTF8::string_has_bom()"
2075
   *
2076
   * @see UTF8::string_has_bom()
2077
   *
2078
   * @param string $str
2079
   *
2080
   * @return bool
2081
   *
2082
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2083
   */
2084
  public static function hasBom($str)
2085
  {
2086
    return self::string_has_bom($str);
2087
  }
2088
2089
  /**
2090
   * Converts a hexadecimal-value into an UTF-8 character.
2091
   *
2092
   * @param string $hexdec <p>The hexadecimal value.</p>
2093
   *
2094
   * @return string|false <p>One single UTF-8 character.</p>
2095
   */
2096 2
  public static function hex_to_chr($hexdec)
2097
  {
2098 2
    return self::decimal_to_chr(hexdec($hexdec));
2099
  }
2100
2101
  /**
2102
   * Converts hexadecimal U+xxxx code point representation to integer.
2103
   *
2104
   * INFO: opposite to UTF8::int_to_hex()
2105
   *
2106
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2107
   *
2108
   * @return int|false <p>The code point, or false on failure.</p>
2109
   */
2110 1
  public static function hex_to_int($hexDec)
2111
  {
2112 1
    $hexDec = (string)$hexDec;
2113
2114 1
    if (!isset($hexDec[0])) {
2115 1
      return false;
2116
    }
2117
2118 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2119 1
      return intval($match[1], 16);
2120
    }
2121
2122 1
    return false;
2123
  }
2124
2125
  /**
2126
   * alias for "UTF8::html_entity_decode()"
2127
   *
2128
   * @see UTF8::html_entity_decode()
2129
   *
2130
   * @param string $str
2131
   * @param int    $flags
2132
   * @param string $encoding
2133
   *
2134
   * @return string
2135
   */
2136 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2137
  {
2138 1
    return self::html_entity_decode($str, $flags, $encoding);
2139
  }
2140
2141
  /**
2142
   * Converts a UTF-8 string to a series of HTML numbered entities.
2143
   *
2144
   * INFO: opposite to UTF8::html_decode()
2145
   *
2146
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2147
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2148
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2149
   *
2150
   * @return string <p>HTML numbered entities.</p>
2151
   */
2152 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2153
  {
2154
    // init
2155 2
    $str = (string)$str;
2156
2157 2
    if (!isset($str[0])) {
2158 1
      return '';
2159
    }
2160
2161 2
    if ($encoding !== 'UTF-8') {
2162 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2163 1
    }
2164
2165
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2166 2
    if (function_exists('mb_encode_numericentity')) {
2167
2168 2
      $startCode = 0x00;
2169 2
      if ($keepAsciiChars === true) {
2170 1
        $startCode = 0x80;
2171 1
      }
2172
2173 2
      return mb_encode_numericentity(
2174 2
          $str,
2175 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2176
          $encoding
2177 2
      );
2178
    }
2179
2180
    return implode(
2181
        '',
2182
        array_map(
2183
            function ($data) use ($keepAsciiChars, $encoding) {
2184
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2185
            },
2186
            self::split($str)
2187
        )
2188
    );
2189
  }
2190
2191
  /**
2192
   * UTF-8 version of html_entity_decode()
2193
   *
2194
   * The reason we are not using html_entity_decode() by itself is because
2195
   * while it is not technically correct to leave out the semicolon
2196
   * at the end of an entity most browsers will still interpret the entity
2197
   * correctly. html_entity_decode() does not convert entities without
2198
   * semicolons, so we are left with our own little solution here. Bummer.
2199
   *
2200
   * Convert all HTML entities to their applicable characters
2201
   *
2202
   * INFO: opposite to UTF8::html_encode()
2203
   *
2204
   * @link http://php.net/manual/en/function.html-entity-decode.php
2205
   *
2206
   * @param string $str      <p>
2207
   *                         The input string.
2208
   *                         </p>
2209
   * @param int    $flags    [optional] <p>
2210
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2211
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2212
   *                         <table>
2213
   *                         Available <i>flags</i> constants
2214
   *                         <tr valign="top">
2215
   *                         <td>Constant Name</td>
2216
   *                         <td>Description</td>
2217
   *                         </tr>
2218
   *                         <tr valign="top">
2219
   *                         <td><b>ENT_COMPAT</b></td>
2220
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2221
   *                         </tr>
2222
   *                         <tr valign="top">
2223
   *                         <td><b>ENT_QUOTES</b></td>
2224
   *                         <td>Will convert both double and single quotes.</td>
2225
   *                         </tr>
2226
   *                         <tr valign="top">
2227
   *                         <td><b>ENT_NOQUOTES</b></td>
2228
   *                         <td>Will leave both double and single quotes unconverted.</td>
2229
   *                         </tr>
2230
   *                         <tr valign="top">
2231
   *                         <td><b>ENT_HTML401</b></td>
2232
   *                         <td>
2233
   *                         Handle code as HTML 4.01.
2234
   *                         </td>
2235
   *                         </tr>
2236
   *                         <tr valign="top">
2237
   *                         <td><b>ENT_XML1</b></td>
2238
   *                         <td>
2239
   *                         Handle code as XML 1.
2240
   *                         </td>
2241
   *                         </tr>
2242
   *                         <tr valign="top">
2243
   *                         <td><b>ENT_XHTML</b></td>
2244
   *                         <td>
2245
   *                         Handle code as XHTML.
2246
   *                         </td>
2247
   *                         </tr>
2248
   *                         <tr valign="top">
2249
   *                         <td><b>ENT_HTML5</b></td>
2250
   *                         <td>
2251
   *                         Handle code as HTML 5.
2252
   *                         </td>
2253
   *                         </tr>
2254
   *                         </table>
2255
   *                         </p>
2256
   * @param string $encoding [optional] <p>Encoding to use.</p>
2257
   *
2258
   * @return string <p>The decoded string.</p>
2259
   */
2260 16
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2261
  {
2262
    // init
2263 16
    $str = (string)$str;
2264
2265 16
    if (!isset($str[0])) {
2266 5
      return '';
2267
    }
2268
2269 16
    if (!isset($str[3])) { // examples: &; || &x;
2270 9
      return $str;
2271
    }
2272
2273
    if (
2274 15
        strpos($str, '&') === false
2275 15
        ||
2276
        (
2277 15
            strpos($str, '&#') === false
2278 15
            &&
2279 9
            strpos($str, ';') === false
2280 9
        )
2281 15
    ) {
2282 8
      return $str;
2283
    }
2284
2285 15
    if ($encoding !== 'UTF-8') {
2286 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2287 2
    }
2288
2289 15
    if ($flags === null) {
2290 5
      if (Bootup::is_php('5.4') === true) {
2291
        $flags = ENT_QUOTES | ENT_HTML5;
2292
      } else {
2293 5
        $flags = ENT_QUOTES;
2294
      }
2295 5
    }
2296
2297 View Code Duplication
    if (
2298
        $encoding !== 'UTF-8'
2299 15
        &&
2300
        $encoding !== 'WINDOWS-1252'
2301 15
        &&
2302 2
        self::$SUPPORT['mbstring'] === false
2303 15
    ) {
2304
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2305
    }
2306
2307
    do {
2308 15
      $str_compare = $str;
2309
2310 15
      $str = preg_replace_callback(
2311 15
          "/&#\d{2,6};/",
2312
          function ($matches) use ($encoding) {
2313 13
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2314
2315 13
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2316 13
              return $returnTmp;
2317
            }
2318
2319 6
            return $matches[0];
2320 15
          },
2321
          $str
2322 15
      );
2323
2324
      // decode numeric & UTF16 two byte entities
2325 15
      $str = html_entity_decode(
2326 15
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2327 15
          $flags,
2328
          $encoding
2329 15
      );
2330
2331 15
    } while ($str_compare !== $str);
2332
2333 15
    return $str;
2334
  }
2335
2336
  /**
2337
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2338
   *
2339
   * @link http://php.net/manual/en/function.htmlentities.php
2340
   *
2341
   * @param string $str           <p>
2342
   *                              The input string.
2343
   *                              </p>
2344
   * @param int    $flags         [optional] <p>
2345
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2346
   *                              invalid code unit sequences and the used document type. The default is
2347
   *                              ENT_COMPAT | ENT_HTML401.
2348
   *                              <table>
2349
   *                              Available <i>flags</i> constants
2350
   *                              <tr valign="top">
2351
   *                              <td>Constant Name</td>
2352
   *                              <td>Description</td>
2353
   *                              </tr>
2354
   *                              <tr valign="top">
2355
   *                              <td><b>ENT_COMPAT</b></td>
2356
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2357
   *                              </tr>
2358
   *                              <tr valign="top">
2359
   *                              <td><b>ENT_QUOTES</b></td>
2360
   *                              <td>Will convert both double and single quotes.</td>
2361
   *                              </tr>
2362
   *                              <tr valign="top">
2363
   *                              <td><b>ENT_NOQUOTES</b></td>
2364
   *                              <td>Will leave both double and single quotes unconverted.</td>
2365
   *                              </tr>
2366
   *                              <tr valign="top">
2367
   *                              <td><b>ENT_IGNORE</b></td>
2368
   *                              <td>
2369
   *                              Silently discard invalid code unit sequences instead of returning
2370
   *                              an empty string. Using this flag is discouraged as it
2371
   *                              may have security implications.
2372
   *                              </td>
2373
   *                              </tr>
2374
   *                              <tr valign="top">
2375
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2376
   *                              <td>
2377
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2378
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2379
   *                              </td>
2380
   *                              </tr>
2381
   *                              <tr valign="top">
2382
   *                              <td><b>ENT_DISALLOWED</b></td>
2383
   *                              <td>
2384
   *                              Replace invalid code points for the given document type with a
2385
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2386
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2387
   *                              instance, to ensure the well-formedness of XML documents with
2388
   *                              embedded external content.
2389
   *                              </td>
2390
   *                              </tr>
2391
   *                              <tr valign="top">
2392
   *                              <td><b>ENT_HTML401</b></td>
2393
   *                              <td>
2394
   *                              Handle code as HTML 4.01.
2395
   *                              </td>
2396
   *                              </tr>
2397
   *                              <tr valign="top">
2398
   *                              <td><b>ENT_XML1</b></td>
2399
   *                              <td>
2400
   *                              Handle code as XML 1.
2401
   *                              </td>
2402
   *                              </tr>
2403
   *                              <tr valign="top">
2404
   *                              <td><b>ENT_XHTML</b></td>
2405
   *                              <td>
2406
   *                              Handle code as XHTML.
2407
   *                              </td>
2408
   *                              </tr>
2409
   *                              <tr valign="top">
2410
   *                              <td><b>ENT_HTML5</b></td>
2411
   *                              <td>
2412
   *                              Handle code as HTML 5.
2413
   *                              </td>
2414
   *                              </tr>
2415
   *                              </table>
2416
   *                              </p>
2417
   * @param string $encoding      [optional] <p>
2418
   *                              Like <b>htmlspecialchars</b>,
2419
   *                              <b>htmlentities</b> takes an optional third argument
2420
   *                              <i>encoding</i> which defines encoding used in
2421
   *                              conversion.
2422
   *                              Although this argument is technically optional, you are highly
2423
   *                              encouraged to specify the correct value for your code.
2424
   *                              </p>
2425
   * @param bool   $double_encode [optional] <p>
2426
   *                              When <i>double_encode</i> is turned off PHP will not
2427
   *                              encode existing html entities. The default is to convert everything.
2428
   *                              </p>
2429
   *
2430
   *
2431
   * @return string the encoded string.
2432
   * </p>
2433
   * <p>
2434
   * If the input <i>string</i> contains an invalid code unit
2435
   * sequence within the given <i>encoding</i> an empty string
2436
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2437
   * <b>ENT_SUBSTITUTE</b> flags are set.
2438
   */
2439 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2440
  {
2441 2
    if ($encoding !== 'UTF-8') {
2442 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2443 1
    }
2444
2445 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2446
2447
    /**
2448
     * PHP doesn't replace a backslash to its html entity since this is something
2449
     * that's mostly used to escape characters when inserting in a database. Since
2450
     * we're using a decent database layer, we don't need this shit and we're replacing
2451
     * the double backslashes by its' html entity equivalent.
2452
     *
2453
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2454
     */
2455 2
    $str = str_replace('\\', '&#92;', $str);
2456
2457 2
    if ($encoding !== 'UTF-8') {
2458 1
      return $str;
2459
    }
2460
2461 2
    $byteLengths = self::chr_size_list($str);
2462 2
    $search = array();
2463 2
    $replacements = array();
2464 2
    foreach ($byteLengths as $counter => $byteLength) {
2465 2
      if ($byteLength >= 3) {
2466 1
        $char = self::access($str, $counter);
2467
2468 1
        if (!isset($replacements[$char])) {
2469 1
          $search[$char] = $char;
2470 1
          $replacements[$char] = self::html_encode($char);
2471 1
        }
2472 1
      }
2473 2
    }
2474
2475 2
    return str_replace($search, $replacements, $str);
2476
  }
2477
2478
  /**
2479
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2480
   *
2481
   * INFO: Take a look at "UTF8::htmlentities()"
2482
   *
2483
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2484
   *
2485
   * @param string $str           <p>
2486
   *                              The string being converted.
2487
   *                              </p>
2488
   * @param int    $flags         [optional] <p>
2489
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2490
   *                              invalid code unit sequences and the used document type. The default is
2491
   *                              ENT_COMPAT | ENT_HTML401.
2492
   *                              <table>
2493
   *                              Available <i>flags</i> constants
2494
   *                              <tr valign="top">
2495
   *                              <td>Constant Name</td>
2496
   *                              <td>Description</td>
2497
   *                              </tr>
2498
   *                              <tr valign="top">
2499
   *                              <td><b>ENT_COMPAT</b></td>
2500
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2501
   *                              </tr>
2502
   *                              <tr valign="top">
2503
   *                              <td><b>ENT_QUOTES</b></td>
2504
   *                              <td>Will convert both double and single quotes.</td>
2505
   *                              </tr>
2506
   *                              <tr valign="top">
2507
   *                              <td><b>ENT_NOQUOTES</b></td>
2508
   *                              <td>Will leave both double and single quotes unconverted.</td>
2509
   *                              </tr>
2510
   *                              <tr valign="top">
2511
   *                              <td><b>ENT_IGNORE</b></td>
2512
   *                              <td>
2513
   *                              Silently discard invalid code unit sequences instead of returning
2514
   *                              an empty string. Using this flag is discouraged as it
2515
   *                              may have security implications.
2516
   *                              </td>
2517
   *                              </tr>
2518
   *                              <tr valign="top">
2519
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2520
   *                              <td>
2521
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2522
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2523
   *                              </td>
2524
   *                              </tr>
2525
   *                              <tr valign="top">
2526
   *                              <td><b>ENT_DISALLOWED</b></td>
2527
   *                              <td>
2528
   *                              Replace invalid code points for the given document type with a
2529
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2530
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2531
   *                              instance, to ensure the well-formedness of XML documents with
2532
   *                              embedded external content.
2533
   *                              </td>
2534
   *                              </tr>
2535
   *                              <tr valign="top">
2536
   *                              <td><b>ENT_HTML401</b></td>
2537
   *                              <td>
2538
   *                              Handle code as HTML 4.01.
2539
   *                              </td>
2540
   *                              </tr>
2541
   *                              <tr valign="top">
2542
   *                              <td><b>ENT_XML1</b></td>
2543
   *                              <td>
2544
   *                              Handle code as XML 1.
2545
   *                              </td>
2546
   *                              </tr>
2547
   *                              <tr valign="top">
2548
   *                              <td><b>ENT_XHTML</b></td>
2549
   *                              <td>
2550
   *                              Handle code as XHTML.
2551
   *                              </td>
2552
   *                              </tr>
2553
   *                              <tr valign="top">
2554
   *                              <td><b>ENT_HTML5</b></td>
2555
   *                              <td>
2556
   *                              Handle code as HTML 5.
2557
   *                              </td>
2558
   *                              </tr>
2559
   *                              </table>
2560
   *                              </p>
2561
   * @param string $encoding      [optional] <p>
2562
   *                              Defines encoding used in conversion.
2563
   *                              </p>
2564
   *                              <p>
2565
   *                              For the purposes of this function, the encodings
2566
   *                              ISO-8859-1, ISO-8859-15,
2567
   *                              UTF-8, cp866,
2568
   *                              cp1251, cp1252, and
2569
   *                              KOI8-R are effectively equivalent, provided the
2570
   *                              <i>string</i> itself is valid for the encoding, as
2571
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2572
   *                              the same positions in all of these encodings.
2573
   *                              </p>
2574
   * @param bool   $double_encode [optional] <p>
2575
   *                              When <i>double_encode</i> is turned off PHP will not
2576
   *                              encode existing html entities, the default is to convert everything.
2577
   *                              </p>
2578
   *
2579
   * @return string The converted string.
2580
   * </p>
2581
   * <p>
2582
   * If the input <i>string</i> contains an invalid code unit
2583
   * sequence within the given <i>encoding</i> an empty string
2584
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2585
   * <b>ENT_SUBSTITUTE</b> flags are set.
2586
   */
2587 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2588
  {
2589 1
    if ($encoding !== 'UTF-8') {
2590 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2591 1
    }
2592
2593 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2594
  }
2595
2596
  /**
2597
   * Checks whether iconv is available on the server.
2598
   *
2599
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2600
   */
2601 1
  public static function iconv_loaded()
2602
  {
2603 1
    $return = extension_loaded('iconv') ? true : false;
2604
2605
    // INFO: "default_charset" is already set by the "Bootup"-class
2606
2607 1
    if (Bootup::is_php('5.6') === false) {
2608
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2609 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2610 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2611 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2612 1
    }
2613
2614 1
    return $return;
2615
  }
2616
2617
  /**
2618
   * alias for "UTF8::decimal_to_chr()"
2619
   *
2620
   * @see UTF8::decimal_to_chr()
2621
   *
2622
   * @param mixed $int
2623
   *
2624
   * @return string
2625
   */
2626 2
  public static function int_to_chr($int)
2627
  {
2628 2
    return self::decimal_to_chr($int);
2629
  }
2630
2631
  /**
2632
   * Converts Integer to hexadecimal U+xxxx code point representation.
2633
   *
2634
   * INFO: opposite to UTF8::hex_to_int()
2635
   *
2636
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2637
   * @param string $pfix [optional]
2638
   *
2639
   * @return string <p>The code point, or empty string on failure.</p>
2640
   */
2641 3
  public static function int_to_hex($int, $pfix = 'U+')
2642
  {
2643 3
    if ((int)$int === $int) {
2644 3
      $hex = dechex($int);
2645
2646 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2647
2648 3
      return $pfix . $hex;
2649
    }
2650
2651 1
    return '';
2652
  }
2653
2654
  /**
2655
   * Checks whether intl-char is available on the server.
2656
   *
2657
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2658
   */
2659 1
  public static function intlChar_loaded()
2660
  {
2661
    return (
2662 1
        Bootup::is_php('7.0') === true
2663 1
        &&
2664
        class_exists('IntlChar') === true
2665 1
    );
2666
  }
2667
2668
  /**
2669
   * Checks whether intl is available on the server.
2670
   *
2671
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2672
   */
2673 4
  public static function intl_loaded()
2674
  {
2675 4
    return extension_loaded('intl') ? true : false;
2676
  }
2677
2678
  /**
2679
   * alias for "UTF8::is_ascii()"
2680
   *
2681
   * @see UTF8::is_ascii()
2682
   *
2683
   * @param string $str
2684
   *
2685
   * @return boolean
2686
   *
2687
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2688
   */
2689
  public static function isAscii($str)
2690
  {
2691
    return self::is_ascii($str);
2692
  }
2693
2694
  /**
2695
   * alias for "UTF8::is_base64()"
2696
   *
2697
   * @see UTF8::is_base64()
2698
   *
2699
   * @param string $str
2700
   *
2701
   * @return bool
2702
   *
2703
   * @deprecated <p>use "UTF8::is_base64()"</p>
2704
   */
2705
  public static function isBase64($str)
2706
  {
2707
    return self::is_base64($str);
2708
  }
2709
2710
  /**
2711
   * alias for "UTF8::is_binary()"
2712
   *
2713
   * @see UTF8::is_binary()
2714
   *
2715
   * @param string $str
2716
   *
2717
   * @return bool
2718
   *
2719
   * @deprecated <p>use "UTF8::is_binary()"</p>
2720
   */
2721
  public static function isBinary($str)
2722
  {
2723
    return self::is_binary($str);
2724
  }
2725
2726
  /**
2727
   * alias for "UTF8::is_bom()"
2728
   *
2729
   * @see UTF8::is_bom()
2730
   *
2731
   * @param string $utf8_chr
2732
   *
2733
   * @return boolean
2734
   *
2735
   * @deprecated <p>use "UTF8::is_bom()"</p>
2736
   */
2737
  public static function isBom($utf8_chr)
2738
  {
2739
    return self::is_bom($utf8_chr);
2740
  }
2741
2742
  /**
2743
   * alias for "UTF8::is_html()"
2744
   *
2745
   * @see UTF8::is_html()
2746
   *
2747
   * @param string $str
2748
   *
2749
   * @return boolean
2750
   *
2751
   * @deprecated <p>use "UTF8::is_html()"</p>
2752
   */
2753
  public static function isHtml($str)
2754
  {
2755
    return self::is_html($str);
2756
  }
2757
2758
  /**
2759
   * alias for "UTF8::is_json()"
2760
   *
2761
   * @see UTF8::is_json()
2762
   *
2763
   * @param string $str
2764
   *
2765
   * @return bool
2766
   *
2767
   * @deprecated <p>use "UTF8::is_json()"</p>
2768
   */
2769
  public static function isJson($str)
2770
  {
2771
    return self::is_json($str);
2772
  }
2773
2774
  /**
2775
   * alias for "UTF8::is_utf16()"
2776
   *
2777
   * @see UTF8::is_utf16()
2778
   *
2779
   * @param string $str
2780
   *
2781
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2782
   *
2783
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2784
   */
2785
  public static function isUtf16($str)
2786
  {
2787
    return self::is_utf16($str);
2788
  }
2789
2790
  /**
2791
   * alias for "UTF8::is_utf32()"
2792
   *
2793
   * @see UTF8::is_utf32()
2794
   *
2795
   * @param string $str
2796
   *
2797
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2798
   *
2799
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2800
   */
2801
  public static function isUtf32($str)
2802
  {
2803
    return self::is_utf32($str);
2804
  }
2805
2806
  /**
2807
   * alias for "UTF8::is_utf8()"
2808
   *
2809
   * @see UTF8::is_utf8()
2810
   *
2811
   * @param string $str
2812
   * @param bool   $strict
2813
   *
2814
   * @return bool
2815
   *
2816
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2817
   */
2818
  public static function isUtf8($str, $strict = false)
2819
  {
2820
    return self::is_utf8($str, $strict);
2821
  }
2822
2823
  /**
2824
   * Checks if a string is 7 bit ASCII.
2825
   *
2826
   * @param string $str <p>The string to check.</p>
2827
   *
2828
   * @return bool <p>
2829
   *              <strong>true</strong> if it is ASCII<br>
2830
   *              <strong>false</strong> otherwise
2831
   *              </p>
2832
   */
2833 55
  public static function is_ascii($str)
2834
  {
2835 55
    $str = (string)$str;
2836
2837 55
    if (!isset($str[0])) {
2838 6
      return true;
2839
    }
2840
2841 54
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2842
  }
2843
2844
  /**
2845
   * Returns true if the string is base64 encoded, false otherwise.
2846
   *
2847
   * @param string $str <p>The input string.</p>
2848
   *
2849
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2850
   */
2851 1
  public static function is_base64($str)
2852
  {
2853 1
    $str = (string)$str;
2854
2855 1
    if (!isset($str[0])) {
2856 1
      return false;
2857
    }
2858
2859 1
    $base64String = (string)base64_decode($str, true);
2860 1
    if ($base64String && base64_encode($base64String) === $str) {
2861 1
      return true;
2862
    }
2863
2864 1
    return false;
2865
  }
2866
2867
  /**
2868
   * Check if the input is binary... (is look like a hack).
2869
   *
2870
   * @param mixed $input
2871
   *
2872
   * @return bool
2873
   */
2874 16
  public static function is_binary($input)
2875
  {
2876 16
    $input = (string)$input;
2877
2878 16
    if (!isset($input[0])) {
2879 4
      return false;
2880
    }
2881
2882 16
    if (preg_match('~^[01]+$~', $input)) {
2883 4
      return true;
2884
    }
2885
2886 16
    $testLength = strlen($input);
2887 16
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2888 5
      return true;
2889
    }
2890
2891 15
    if (substr_count($input, "\x00") > 0) {
2892 1
      return true;
2893
    }
2894
2895 15
    return false;
2896
  }
2897
2898
  /**
2899
   * Check if the file is binary.
2900
   *
2901
   * @param string $file
2902
   *
2903
   * @return boolean
2904
   */
2905 1
  public static function is_binary_file($file)
2906
  {
2907
    try {
2908 1
      $fp = fopen($file, 'rb');
2909 1
      $block = fread($fp, 512);
2910 1
      fclose($fp);
2911 1
    } catch (\Exception $e) {
2912
      $block = '';
2913
    }
2914
2915 1
    return self::is_binary($block);
2916
  }
2917
2918
  /**
2919
   * Checks if the given string is equal to any "Byte Order Mark".
2920
   *
2921
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2922
   *
2923
   * @param string $str <p>The input string.</p>
2924
   *
2925
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2926
   */
2927 1
  public static function is_bom($str)
2928
  {
2929 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2930 1
      if ($str === $bomString) {
2931 1
        return true;
2932
      }
2933 1
    }
2934
2935 1
    return false;
2936
  }
2937
2938
  /**
2939
   * Check if the string contains any html-tags <lall>.
2940
   *
2941
   * @param string $str <p>The input string.</p>
2942
   *
2943
   * @return boolean
2944
   */
2945 1
  public static function is_html($str)
2946
  {
2947 1
    $str = (string)$str;
2948
2949 1
    if (!isset($str[0])) {
2950 1
      return false;
2951
    }
2952
2953
    // init
2954 1
    $matches = array();
2955
2956 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2957
2958 1
    if (count($matches) === 0) {
2959 1
      return false;
2960
    }
2961
2962 1
    return true;
2963
  }
2964
2965
  /**
2966
   * Try to check if "$str" is an json-string.
2967
   *
2968
   * @param string $str <p>The input string.</p>
2969
   *
2970
   * @return bool
2971
   */
2972 1
  public static function is_json($str)
2973
  {
2974 1
    $str = (string)$str;
2975
2976 1
    if (!isset($str[0])) {
2977 1
      return false;
2978
    }
2979
2980 1
    $json = self::json_decode($str);
2981
2982
    if (
2983
        (
2984 1
            is_object($json) === true
2985 1
            ||
2986 1
            is_array($json) === true
2987 1
        )
2988 1
        &&
2989 1
        json_last_error() === JSON_ERROR_NONE
2990 1
    ) {
2991 1
      return true;
2992
    }
2993
2994 1
    return false;
2995
  }
2996
2997
  /**
2998
   * Check if the string is UTF-16.
2999
   *
3000
   * @param string $str <p>The input string.</p>
3001
   *
3002
   * @return int|false <p>
3003
   *                   <strong>false</strong> if is't not UTF-16,<br>
3004
   *                   <strong>1</strong> for UTF-16LE,<br>
3005
   *                   <strong>2</strong> for UTF-16BE.
3006
   *                   </p>
3007
   */
3008 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3009
  {
3010 5
    $str = self::remove_bom($str);
3011
3012 5
    if (self::is_binary($str) === true) {
3013
3014 5
      $maybeUTF16LE = 0;
3015 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
3016 5
      if ($test) {
3017 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
3018 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
3019 5
        if ($test3 === $test) {
3020 5
          $strChars = self::count_chars($str, true);
3021 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3022 4
            if (in_array($test3char, $strChars, true) === true) {
3023 2
              $maybeUTF16LE++;
3024 2
            }
3025 5
          }
3026 5
        }
3027 5
      }
3028
3029 5
      $maybeUTF16BE = 0;
3030 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3031 5
      if ($test) {
3032 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3033 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3034 5
        if ($test3 === $test) {
3035 5
          $strChars = self::count_chars($str, true);
3036 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3037 4
            if (in_array($test3char, $strChars, true) === true) {
3038 3
              $maybeUTF16BE++;
3039 3
            }
3040 5
          }
3041 5
        }
3042 5
      }
3043
3044 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3045 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3046 2
          return 1;
3047
        }
3048
3049 3
        return 2;
3050
      }
3051
3052 3
    }
3053
3054 3
    return false;
3055
  }
3056
3057
  /**
3058
   * Check if the string is UTF-32.
3059
   *
3060
   * @param string $str
3061
   *
3062
   * @return int|false <p>
3063
   *                   <strong>false</strong> if is't not UTF-32,<br>
3064
   *                   <strong>1</strong> for UTF-32LE,<br>
3065
   *                   <strong>2</strong> for UTF-32BE.
3066
   *                   </p>
3067
   */
3068 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3069
  {
3070 3
    $str = self::remove_bom($str);
3071
3072 3
    if (self::is_binary($str) === true) {
3073
3074 3
      $maybeUTF32LE = 0;
3075 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3076 3
      if ($test) {
3077 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3078 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3079 2
        if ($test3 === $test) {
3080 2
          $strChars = self::count_chars($str, true);
3081 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3082 2
            if (in_array($test3char, $strChars, true) === true) {
3083 1
              $maybeUTF32LE++;
3084 1
            }
3085 2
          }
3086 2
        }
3087 2
      }
3088
3089 3
      $maybeUTF32BE = 0;
3090 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3091 3
      if ($test) {
3092 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3093 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3094 2
        if ($test3 === $test) {
3095 2
          $strChars = self::count_chars($str, true);
3096 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3097 2
            if (in_array($test3char, $strChars, true) === true) {
3098 1
              $maybeUTF32BE++;
3099 1
            }
3100 2
          }
3101 2
        }
3102 2
      }
3103
3104 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3105 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3106 1
          return 1;
3107
        }
3108
3109 1
        return 2;
3110
      }
3111
3112 3
    }
3113
3114 3
    return false;
3115
  }
3116
3117
  /**
3118
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3119
   *
3120
   * @see    http://hsivonen.iki.fi/php-utf8/
3121
   *
3122
   * @param string $str    <p>The string to be checked.</p>
3123
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3124
   *
3125
   * @return bool
3126
   */
3127 60
  public static function is_utf8($str, $strict = false)
3128
  {
3129 60
    $str = (string)$str;
3130
3131 60
    if (!isset($str[0])) {
3132 3
      return true;
3133
    }
3134
3135 58
    if ($strict === true) {
3136 1
      if (self::is_utf16($str) !== false) {
3137 1
        return false;
3138
      }
3139
3140
      if (self::is_utf32($str) !== false) {
3141
        return false;
3142
      }
3143
    }
3144
3145 58
    if (self::pcre_utf8_support() !== true) {
3146
3147
      // If even just the first character can be matched, when the /u
3148
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3149
      // invalid, nothing at all will match, even if the string contains
3150
      // some valid sequences
3151
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3152
    }
3153
3154 58
    $mState = 0; // cached expected number of octets after the current octet
3155
    // until the beginning of the next UTF8 character sequence
3156 58
    $mUcs4 = 0; // cached Unicode character
3157 58
    $mBytes = 1; // cached expected number of octets in the current sequence
3158
3159 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3160
      self::checkForSupport();
3161
    }
3162
3163 58 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3164
      $len = \mb_strlen($str, '8BIT');
3165
    } else {
3166 58
      $len = strlen($str);
3167
    }
3168
3169
    /** @noinspection ForeachInvariantsInspection */
3170 58
    for ($i = 0; $i < $len; $i++) {
3171 58
      $in = ord($str[$i]);
3172 58
      if ($mState === 0) {
3173
        // When mState is zero we expect either a US-ASCII character or a
3174
        // multi-octet sequence.
3175 58
        if (0 === (0x80 & $in)) {
3176
          // US-ASCII, pass straight through.
3177 52
          $mBytes = 1;
3178 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3179
          // First octet of 2 octet sequence.
3180 48
          $mUcs4 = $in;
3181 48
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3182 48
          $mState = 1;
3183 48
          $mBytes = 2;
3184 55
        } elseif (0xE0 === (0xF0 & $in)) {
3185
          // First octet of 3 octet sequence.
3186 29
          $mUcs4 = $in;
3187 29
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3188 29
          $mState = 2;
3189 29
          $mBytes = 3;
3190 46 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3191
          // First octet of 4 octet sequence.
3192 11
          $mUcs4 = $in;
3193 11
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3194 11
          $mState = 3;
3195 11
          $mBytes = 4;
3196 22
        } elseif (0xF8 === (0xFC & $in)) {
3197
          /* First octet of 5 octet sequence.
3198
          *
3199
          * This is illegal because the encoded codepoint must be either
3200
          * (a) not the shortest form or
3201
          * (b) outside the Unicode range of 0-0x10FFFF.
3202
          * Rather than trying to resynchronize, we will carry on until the end
3203
          * of the sequence and let the later error handling code catch it.
3204
          */
3205 4
          $mUcs4 = $in;
3206 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3207 4
          $mState = 4;
3208 4
          $mBytes = 5;
3209 12 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3210
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3211 4
          $mUcs4 = $in;
3212 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3213 4
          $mState = 5;
3214 4
          $mBytes = 6;
3215 4
        } else {
3216
          /* Current octet is neither in the US-ASCII range nor a legal first
3217
           * octet of a multi-octet sequence.
3218
           */
3219 6
          return false;
3220
        }
3221 57
      } else {
3222
        // When mState is non-zero, we expect a continuation of the multi-octet
3223
        // sequence
3224 52
        if (0x80 === (0xC0 & $in)) {
3225
          // Legal continuation.
3226 48
          $shift = ($mState - 1) * 6;
3227 48
          $tmp = $in;
3228 48
          $tmp = ($tmp & 0x0000003F) << $shift;
3229 48
          $mUcs4 |= $tmp;
3230
          /**
3231
           * End of the multi-octet sequence. mUcs4 now contains the final
3232
           * Unicode code point to be output
3233
           */
3234 48
          if (0 === --$mState) {
3235
            /*
3236
            * Check for illegal sequences and code points.
3237
            */
3238
            // From Unicode 3.1, non-shortest form is illegal
3239
            if (
3240 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3241 48
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3242 48
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3243 48
                (4 < $mBytes) ||
3244
                // From Unicode 3.2, surrogate characters are illegal.
3245 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3246
                // Code points outside the Unicode range are illegal.
3247 48
                ($mUcs4 > 0x10FFFF)
3248 48
            ) {
3249 7
              return false;
3250
            }
3251
            // initialize UTF8 cache
3252 48
            $mState = 0;
3253 48
            $mUcs4 = 0;
3254 48
            $mBytes = 1;
3255 48
          }
3256 48
        } else {
3257
          /**
3258
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3259
           * Incomplete multi-octet sequence.
3260
           */
3261 26
          return false;
3262
        }
3263
      }
3264 57
    }
3265
3266 27
    return true;
3267
  }
3268
3269
  /**
3270
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3271
   * Decodes a JSON string
3272
   *
3273
   * @link http://php.net/manual/en/function.json-decode.php
3274
   *
3275
   * @param string $json    <p>
3276
   *                        The <i>json</i> string being decoded.
3277
   *                        </p>
3278
   *                        <p>
3279
   *                        This function only works with UTF-8 encoded strings.
3280
   *                        </p>
3281
   *                        <p>PHP implements a superset of
3282
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3283
   *                        only supports these values when they are nested inside an array or an object.
3284
   *                        </p>
3285
   * @param bool   $assoc   [optional] <p>
3286
   *                        When <b>TRUE</b>, returned objects will be converted into
3287
   *                        associative arrays.
3288
   *                        </p>
3289
   * @param int    $depth   [optional] <p>
3290
   *                        User specified recursion depth.
3291
   *                        </p>
3292
   * @param int    $options [optional] <p>
3293
   *                        Bitmask of JSON decode options. Currently only
3294
   *                        <b>JSON_BIGINT_AS_STRING</b>
3295
   *                        is supported (default is to cast large integers as floats)
3296
   *                        </p>
3297
   *
3298
   * @return mixed the value encoded in <i>json</i> in appropriate
3299
   * PHP type. Values true, false and
3300
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3301
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3302
   * <i>json</i> cannot be decoded or if the encoded
3303
   * data is deeper than the recursion limit.
3304
   */
3305 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3306
  {
3307 2
    $json = (string)self::filter($json);
3308
3309 2
    if (Bootup::is_php('5.4') === true) {
3310
      $json = json_decode($json, $assoc, $depth, $options);
3311
    } else {
3312 2
      $json = json_decode($json, $assoc, $depth);
3313
    }
3314
3315 2
    return $json;
3316
  }
3317
3318
  /**
3319
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3320
   * Returns the JSON representation of a value.
3321
   *
3322
   * @link http://php.net/manual/en/function.json-encode.php
3323
   *
3324
   * @param mixed $value   <p>
3325
   *                       The <i>value</i> being encoded. Can be any type except
3326
   *                       a resource.
3327
   *                       </p>
3328
   *                       <p>
3329
   *                       All string data must be UTF-8 encoded.
3330
   *                       </p>
3331
   *                       <p>PHP implements a superset of
3332
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3333
   *                       only supports these values when they are nested inside an array or an object.
3334
   *                       </p>
3335
   * @param int   $options [optional] <p>
3336
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3337
   *                       <b>JSON_HEX_TAG</b>,
3338
   *                       <b>JSON_HEX_AMP</b>,
3339
   *                       <b>JSON_HEX_APOS</b>,
3340
   *                       <b>JSON_NUMERIC_CHECK</b>,
3341
   *                       <b>JSON_PRETTY_PRINT</b>,
3342
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3343
   *                       <b>JSON_FORCE_OBJECT</b>,
3344
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3345
   *                       constants is described on
3346
   *                       the JSON constants page.
3347
   *                       </p>
3348
   * @param int   $depth   [optional] <p>
3349
   *                       Set the maximum depth. Must be greater than zero.
3350
   *                       </p>
3351
   *
3352
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3353
   */
3354 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3355
  {
3356 2
    $value = self::filter($value);
3357
3358 2
    if (Bootup::is_php('5.5') === true) {
3359
      $json = json_encode($value, $options, $depth);
3360
    } else {
3361 2
      $json = json_encode($value, $options);
3362
    }
3363
3364 2
    return $json;
3365
  }
3366
3367
  /**
3368
   * Makes string's first char lowercase.
3369
   *
3370
   * @param string $str <p>The input string</p>
3371
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3372
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3373
   *
3374
   * @return string <p>The resulting string</p>
3375
   */
3376 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3377
  {
3378 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3379 7
    if ($strPartTwo === false) {
3380
      $strPartTwo = '';
3381
    }
3382
3383 7
    $strPartOne = self::strtolower(
3384 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3385 7
        $encoding,
3386
        $cleanUtf8
3387 7
    );
3388
3389 7
    return $strPartOne . $strPartTwo;
3390
  }
3391
3392
  /**
3393
   * alias for "UTF8::lcfirst()"
3394
   *
3395
   * @see UTF8::lcfirst()
3396
   *
3397
   * @param string  $word
3398
   * @param string  $encoding
3399
   * @param boolean $cleanUtf8
3400
   *
3401
   * @return string
3402
   */
3403 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3404
  {
3405 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3406
  }
3407
3408
  /**
3409
   * Lowercase for all words in the string.
3410
   *
3411
   * @param string   $str        <p>The input string.</p>
3412
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3413
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3414
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3415
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3416
   *
3417
   * @return string
3418
   */
3419 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3420
  {
3421 1
    if (!$str) {
3422 1
      return '';
3423
    }
3424
3425 1
    $words = self::str_to_words($str, $charlist);
3426 1
    $newWords = array();
3427
3428 1
    if (count($exceptions) > 0) {
3429 1
      $useExceptions = true;
3430 1
    } else {
3431 1
      $useExceptions = false;
3432
    }
3433
3434 1 View Code Duplication
    foreach ($words as $word) {
3435
3436 1
      if (!$word) {
3437 1
        continue;
3438
      }
3439
3440
      if (
3441
          $useExceptions === false
3442 1
          ||
3443
          (
3444
              $useExceptions === true
3445 1
              &&
3446 1
              !in_array($word, $exceptions, true)
3447 1
          )
3448 1
      ) {
3449 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3450 1
      }
3451
3452 1
      $newWords[] = $word;
3453 1
    }
3454
3455 1
    return implode('', $newWords);
3456
  }
3457
3458
  /**
3459
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3460
   *
3461
   * @param string $str   <p>The string to be trimmed</p>
3462
   * @param string $chars <p>Optional characters to be stripped</p>
3463
   *
3464
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3465
   */
3466 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3467
  {
3468 24
    $str = (string)$str;
3469
3470 24
    if (!isset($str[0])) {
3471 2
      return '';
3472
    }
3473
3474
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3475 23
    if ($chars === INF || !$chars) {
3476 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3477
    }
3478
3479 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3480
  }
3481
3482
  /**
3483
   * Returns the UTF-8 character with the maximum code point in the given data.
3484
   *
3485
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3486
   *
3487
   * @return string <p>The character with the highest code point than others.</p>
3488
   */
3489 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3490
  {
3491 1
    if (is_array($arg) === true) {
3492 1
      $arg = implode('', $arg);
3493 1
    }
3494
3495 1
    return self::chr(max(self::codepoints($arg)));
3496
  }
3497
3498
  /**
3499
   * Calculates and returns the maximum number of bytes taken by any
3500
   * UTF-8 encoded character in the given string.
3501
   *
3502
   * @param string $str <p>The original Unicode string.</p>
3503
   *
3504
   * @return int <p>Max byte lengths of the given chars.</p>
3505
   */
3506 1
  public static function max_chr_width($str)
3507
  {
3508 1
    $bytes = self::chr_size_list($str);
3509 1
    if (count($bytes) > 0) {
3510 1
      return (int)max($bytes);
3511
    }
3512
3513 1
    return 0;
3514
  }
3515
3516
  /**
3517
   * Checks whether mbstring is available on the server.
3518
   *
3519
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3520
   */
3521 12
  public static function mbstring_loaded()
3522
  {
3523 12
    $return = extension_loaded('mbstring') ? true : false;
3524
3525 12
    if ($return === true) {
3526 12
      \mb_internal_encoding('UTF-8');
3527 12
    }
3528
3529 12
    return $return;
3530
  }
3531
3532
  /**
3533
   * Returns the UTF-8 character with the minimum code point in the given data.
3534
   *
3535
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3536
   *
3537
   * @return string <p>The character with the lowest code point than others.</p>
3538
   */
3539 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3540
  {
3541 1
    if (is_array($arg) === true) {
3542 1
      $arg = implode('', $arg);
3543 1
    }
3544
3545 1
    return self::chr(min(self::codepoints($arg)));
3546
  }
3547
3548
  /**
3549
   * alias for "UTF8::normalize_encoding()"
3550
   *
3551
   * @see UTF8::normalize_encoding()
3552
   *
3553
   * @param string $encoding
3554
   * @param mixed  $fallback
3555
   *
3556
   * @return string
3557
   *
3558
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3559
   */
3560
  public static function normalizeEncoding($encoding, $fallback = false)
3561
  {
3562
    return self::normalize_encoding($encoding, $fallback);
3563
  }
3564
3565
  /**
3566
   * Normalize the encoding-"name" input.
3567
   *
3568
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3569
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3570
   *
3571
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3572
   */
3573 80
  public static function normalize_encoding($encoding, $fallback = false)
3574
  {
3575 80
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3576
3577 80
    if (!$encoding) {
3578 3
      return $fallback;
3579
    }
3580
3581 79
    if ('UTF-8' === $encoding) {
3582 1
      return $encoding;
3583
    }
3584
3585 79
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3586 7
      return $encoding;
3587
    }
3588
3589 78
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3590 77
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3591
    }
3592
3593 5
    $encodingOrig = $encoding;
3594 5
    $encoding = strtoupper($encoding);
3595 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3596
3597
    $equivalences = array(
3598 5
        'ISO8859'     => 'ISO-8859-1',
3599 5
        'ISO88591'    => 'ISO-8859-1',
3600 5
        'ISO'         => 'ISO-8859-1',
3601 5
        'LATIN'       => 'ISO-8859-1',
3602 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3603 5
        'ISO88592'    => 'ISO-8859-2',
3604 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3605 5
        'ISO88593'    => 'ISO-8859-3',
3606 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3607 5
        'ISO88594'    => 'ISO-8859-4',
3608 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3609 5
        'ISO88595'    => 'ISO-8859-5',
3610 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3611 5
        'ISO88597'    => 'ISO-8859-7',
3612 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3613 5
        'ISO88599'    => 'ISO-8859-9',
3614 5
        'LATIN5'      => 'ISO-8859-9', // Turkish
3615 5
        'ISO885911'   => 'ISO-8859-11',
3616 5
        'TIS620'      => 'ISO-8859-11', // Thai
3617 5
        'ISO885910'   => 'ISO-8859-10',
3618 5
        'LATIN6'      => 'ISO-8859-10', // Nordic
3619 5
        'ISO885913'   => 'ISO-8859-13',
3620 5
        'LATIN7'      => 'ISO-8859-13', // Baltic
3621 5
        'ISO885914'   => 'ISO-8859-14',
3622 5
        'LATIN8'      => 'ISO-8859-14', // Celtic
3623 5
        'ISO885915'   => 'ISO-8859-15',
3624 5
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3625 5
        'ISO885916'   => 'ISO-8859-16',
3626 5
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3627 5
        'CP1250'      => 'WINDOWS-1250',
3628 5
        'WIN1250'     => 'WINDOWS-1250',
3629 5
        'WINDOWS1250' => 'WINDOWS-1250',
3630 5
        'CP1251'      => 'WINDOWS-1251',
3631 5
        'WIN1251'     => 'WINDOWS-1251',
3632 5
        'WINDOWS1251' => 'WINDOWS-1251',
3633 5
        'CP1252'      => 'WINDOWS-1252',
3634 5
        'WIN1252'     => 'WINDOWS-1252',
3635 5
        'WINDOWS1252' => 'WINDOWS-1252',
3636 5
        'CP1253'      => 'WINDOWS-1253',
3637 5
        'WIN1253'     => 'WINDOWS-1253',
3638 5
        'WINDOWS1253' => 'WINDOWS-1253',
3639 5
        'CP1254'      => 'WINDOWS-1254',
3640 5
        'WIN1254'     => 'WINDOWS-1254',
3641 5
        'WINDOWS1254' => 'WINDOWS-1254',
3642 5
        'CP1255'      => 'WINDOWS-1255',
3643 5
        'WIN1255'     => 'WINDOWS-1255',
3644 5
        'WINDOWS1255' => 'WINDOWS-1255',
3645 5
        'CP1256'      => 'WINDOWS-1256',
3646 5
        'WIN1256'     => 'WINDOWS-1256',
3647 5
        'WINDOWS1256' => 'WINDOWS-1256',
3648 5
        'CP1257'      => 'WINDOWS-1257',
3649 5
        'WIN1257'     => 'WINDOWS-1257',
3650 5
        'WINDOWS1257' => 'WINDOWS-1257',
3651 5
        'CP1258'      => 'WINDOWS-1258',
3652 5
        'WIN1258'     => 'WINDOWS-1258',
3653 5
        'WINDOWS1258' => 'WINDOWS-1258',
3654 5
        'UTF16'       => 'UTF-16',
3655 5
        'UTF32'       => 'UTF-32',
3656 5
        'UTF8'        => 'UTF-8',
3657 5
        'UTF'         => 'UTF-8',
3658 5
        'UTF7'        => 'UTF-7',
3659 5
        '8BIT'        => 'CP850',
3660 5
        'BINARY'      => 'CP850',
3661 5
    );
3662
3663 5
    if (!empty($equivalences[$encodingUpperHelper])) {
3664 5
      $encoding = $equivalences[$encodingUpperHelper];
3665 5
    }
3666
3667 5
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3668
3669 5
    return $encoding;
3670
  }
3671
3672
  /**
3673
   * Normalize some MS Word special characters.
3674
   *
3675
   * @param string $str <p>The string to be normalized.</p>
3676
   *
3677
   * @return string
3678
   */
3679 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3680
  {
3681 16
    $str = (string)$str;
3682
3683 16
    if (!isset($str[0])) {
3684 1
      return '';
3685
    }
3686
3687 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3688 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3689
3690 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3691 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3692 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3693 1
    }
3694
3695 16
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3696
  }
3697
3698
  /**
3699
   * Normalize the whitespace.
3700
   *
3701
   * @param string $str                     <p>The string to be normalized.</p>
3702
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3703
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3704
   *                                        bidirectional text chars.</p>
3705
   *
3706
   * @return string
3707
   */
3708 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3709
  {
3710 37
    $str = (string)$str;
3711
3712 37
    if (!isset($str[0])) {
3713 4
      return '';
3714
    }
3715
3716 37
    static $WHITESPACE_CACHE = array();
3717 37
    $cacheKey = (int)$keepNonBreakingSpace;
3718
3719 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3720
3721 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3722
3723 2
      if ($keepNonBreakingSpace === true) {
3724
        /** @noinspection OffsetOperationsInspection */
3725 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3726 1
      }
3727
3728 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3729 2
    }
3730
3731 37
    if ($keepBidiUnicodeControls === false) {
3732 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3733
3734 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3735 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3736 1
      }
3737
3738 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3739 37
    }
3740
3741 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3742
  }
3743
3744
  /**
3745
   * Strip all whitespace characters. This includes tabs and newline
3746
   * characters, as well as multibyte whitespace such as the thin space
3747
   * and ideographic space.
3748
   *
3749
   * @param string $str
3750
   *
3751
   * @return string
3752
   */
3753 12
  public static function strip_whitespace($str)
3754
  {
3755 12
    $str = (string)$str;
3756
3757 12
    if (!isset($str[0])) {
3758 1
      return '';
3759
    }
3760
3761 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3762
  }
3763
3764
  /**
3765
   * Format a number with grouped thousands.
3766
   *
3767
   * @param float  $number
3768
   * @param int    $decimals
3769
   * @param string $dec_point
3770
   * @param string $thousands_sep
3771
   *
3772
   * @return string
3773
   *
3774
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3775
   */
3776
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3777
  {
3778
    $thousands_sep = (string)$thousands_sep;
3779
    $dec_point = (string)$dec_point;
3780
    $number = (float)$number;
3781
3782
    if (
3783
        isset($thousands_sep[1], $dec_point[1])
3784
        &&
3785
        Bootup::is_php('5.4') === true
3786
    ) {
3787
      return str_replace(
3788
          array(
3789
              '.',
3790
              ',',
3791
          ),
3792
          array(
3793
              $dec_point,
3794
              $thousands_sep,
3795
          ),
3796
          number_format($number, $decimals, '.', ',')
3797
      );
3798
    }
3799
3800
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3801
  }
3802
3803
  /**
3804
   * Calculates Unicode code point of the given UTF-8 encoded character.
3805
   *
3806
   * INFO: opposite to UTF8::chr()
3807
   *
3808
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3809
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3810
   *
3811
   * @return int <p>
3812
   *             Unicode code point of the given character,<br>
3813
   *             0 on invalid UTF-8 byte sequence.
3814
   *             </p>
3815
   */
3816 23
  public static function ord($chr, $encoding = 'UTF-8')
3817
  {
3818
    // init
3819 23
    static $CHAR_CACHE = array();
3820 23
    $encoding = (string)$encoding;
3821
3822
    // save the original string
3823 23
    $chr_orig = $chr;
3824
3825 23
    if ($encoding !== 'UTF-8') {
3826 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3827
3828
      // check again, if it's still not UTF-8
3829
      /** @noinspection NotOptimalIfConditionsInspection */
3830 2
      if ($encoding !== 'UTF-8') {
3831 2
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3832 2
      }
3833 2
    }
3834
3835 23
    $cacheKey = $chr_orig . $encoding;
3836 23
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
3837 23
      return $CHAR_CACHE[$cacheKey];
3838
    }
3839
3840 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3841
      self::checkForSupport();
3842
    }
3843
3844 11
    if (self::$SUPPORT['intlChar'] === true) {
3845
      $code = \IntlChar::ord($chr);
3846
      if ($code) {
3847
        return $CHAR_CACHE[$cacheKey] = $code;
3848
      }
3849
    }
3850
3851
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3852 11
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3853 11
    $code = $chr ? $chr[1] : 0;
3854
3855 11
    if (0xF0 <= $code && isset($chr[4])) {
3856 1
      return $CHAR_CACHE[$cacheKey] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3857
    }
3858
3859 11
    if (0xE0 <= $code && isset($chr[3])) {
3860 4
      return $CHAR_CACHE[$cacheKey] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3861
    }
3862
3863 11
    if (0xC0 <= $code && isset($chr[2])) {
3864 7
      return $CHAR_CACHE[$cacheKey] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3865
    }
3866
3867 10
    return $CHAR_CACHE[$cacheKey] = $code;
3868
  }
3869
3870
  /**
3871
   * Parses the string into an array (into the the second parameter).
3872
   *
3873
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3874
   *          if the second parameter is not set!
3875
   *
3876
   * @link http://php.net/manual/en/function.parse-str.php
3877
   *
3878
   * @param string  $str       <p>The input string.</p>
3879
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3880
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3881
   *
3882
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3883
   */
3884 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3885
  {
3886 1
    if ($cleanUtf8 === true) {
3887 1
      $str = self::clean($str);
3888 1
    }
3889
3890
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3891 1
    $return = \mb_parse_str($str, $result);
3892 1
    if ($return === false || empty($result)) {
3893 1
      return false;
3894
    }
3895
3896 1
    return true;
3897
  }
3898
3899
  /**
3900
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3901
   *
3902
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3903
   */
3904 58
  public static function pcre_utf8_support()
3905
  {
3906
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3907 58
    return (bool)@preg_match('//u', '');
3908
  }
3909
3910
  /**
3911
   * Create an array containing a range of UTF-8 characters.
3912
   *
3913
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3914
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3915
   *
3916
   * @return array
3917
   */
3918 1
  public static function range($var1, $var2)
3919
  {
3920 1
    if (!$var1 || !$var2) {
3921 1
      return array();
3922
    }
3923
3924 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3925 1
      $start = (int)$var1;
3926 1
    } elseif (ctype_xdigit($var1)) {
3927
      $start = (int)self::hex_to_int($var1);
3928
    } else {
3929 1
      $start = self::ord($var1);
3930
    }
3931
3932 1
    if (!$start) {
3933
      return array();
3934
    }
3935
3936 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3937 1
      $end = (int)$var2;
3938 1
    } elseif (ctype_xdigit($var2)) {
3939
      $end = (int)self::hex_to_int($var2);
3940
    } else {
3941 1
      $end = self::ord($var2);
3942
    }
3943
3944 1
    if (!$end) {
3945
      return array();
3946
    }
3947
3948 1
    return array_map(
3949
        array(
3950 1
            '\\voku\\helper\\UTF8',
3951 1
            'chr',
3952 1
        ),
3953 1
        range($start, $end)
3954 1
    );
3955
  }
3956
3957
  /**
3958
   * Multi decode html entity & fix urlencoded-win1252-chars.
3959
   *
3960
   * e.g:
3961
   * 'test+test'                     => 'test+test'
3962
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3963
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3964
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3965
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3966
   * 'Düsseldorf'                   => 'Düsseldorf'
3967
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3968
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3969
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3970
   *
3971
   * @param string $str          <p>The input string.</p>
3972
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3973
   *
3974
   * @return string
3975
   */
3976 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3977
  {
3978 2
    $str = (string)$str;
3979
3980 2
    if (!isset($str[0])) {
3981 1
      return '';
3982
    }
3983
3984 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3985 2
    if (preg_match($pattern, $str)) {
3986 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3987 1
    }
3988
3989 2
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3990
3991
    do {
3992 2
      $str_compare = $str;
3993
3994 2
      $str = self::fix_simple_utf8(
3995 2
          rawurldecode(
3996 2
              self::html_entity_decode(
3997 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3998
                  $flags
3999 2
              )
4000 2
          )
4001 2
      );
4002
4003 2
    } while ($multi_decode === true && $str_compare !== $str);
4004
4005 2
    return (string)$str;
4006
  }
4007
4008
  /**
4009
   * alias for "UTF8::remove_bom()"
4010
   *
4011
   * @see UTF8::remove_bom()
4012
   *
4013
   * @param string $str
4014
   *
4015
   * @return string
4016
   *
4017
   * @deprecated <p>use "UTF8::remove_bom()"</p>
4018
   */
4019
  public static function removeBOM($str)
4020
  {
4021
    return self::remove_bom($str);
4022
  }
4023
4024
  /**
4025
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
4026
   *
4027
   * @param string $str <p>The input string.</p>
4028
   *
4029
   * @return string <p>String without UTF-BOM</p>
4030
   */
4031 40
  public static function remove_bom($str)
4032
  {
4033 40
    $str = (string)$str;
4034
4035 40
    if (!isset($str[0])) {
4036 5
      return '';
4037
    }
4038
4039 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
4040 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4041 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4042 5
        if ($strTmp === false) {
4043
          $strTmp = '';
4044
        }
4045 5
        $str = (string)$strTmp;
4046 5
      }
4047 40
    }
4048
4049 40
    return $str;
4050
  }
4051
4052
  /**
4053
   * Removes duplicate occurrences of a string in another string.
4054
   *
4055
   * @param string          $str  <p>The base string.</p>
4056
   * @param string|string[] $what <p>String to search for in the base string.</p>
4057
   *
4058
   * @return string <p>The result string with removed duplicates.</p>
4059
   */
4060 1
  public static function remove_duplicates($str, $what = ' ')
4061
  {
4062 1
    if (is_string($what) === true) {
4063 1
      $what = array($what);
4064 1
    }
4065
4066 1
    if (is_array($what) === true) {
4067
      /** @noinspection ForeachSourceInspection */
4068 1
      foreach ($what as $item) {
4069 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4070 1
      }
4071 1
    }
4072
4073 1
    return $str;
4074
  }
4075
4076
  /**
4077
   * Remove invisible characters from a string.
4078
   *
4079
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4080
   *
4081
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4082
   *
4083
   * @param string $str
4084
   * @param bool   $url_encoded
4085
   * @param string $replacement
4086
   *
4087
   * @return string
4088
   */
4089 62
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4090
  {
4091
    // init
4092 62
    $non_displayables = array();
4093
4094
    // every control character except newline (dec 10),
4095
    // carriage return (dec 13) and horizontal tab (dec 09)
4096 62
    if ($url_encoded) {
4097 62
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4098 62
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4099 62
    }
4100
4101 62
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4102
4103
    do {
4104 62
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4105 62
    } while ($count !== 0);
4106
4107 62
    return $str;
4108
  }
4109
4110
  /**
4111
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4112
   *
4113
   * @param string $str                <p>The input string</p>
4114
   * @param string $replacementChar    <p>The replacement character.</p>
4115
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4116
   *
4117
   * @return string
4118
   */
4119 62
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4120
  {
4121 62
    $str = (string)$str;
4122
4123 62
    if (!isset($str[0])) {
4124 4
      return '';
4125
    }
4126
4127 62
    if ($processInvalidUtf8 === true) {
4128 62
      $replacementCharHelper = $replacementChar;
4129 62
      if ($replacementChar === '') {
4130 62
        $replacementCharHelper = 'none';
4131 62
      }
4132
4133 62
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4134
        self::checkForSupport();
4135
      }
4136
4137 62
      $save = \mb_substitute_character();
4138 62
      \mb_substitute_character($replacementCharHelper);
4139 62
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4140 62
      \mb_substitute_character($save);
4141 62
    }
4142
4143 62
    return str_replace(
4144
        array(
4145 62
            "\xEF\xBF\xBD",
4146 62
            '�',
4147 62
        ),
4148
        array(
4149 62
            $replacementChar,
4150 62
            $replacementChar,
4151 62
        ),
4152
        $str
4153 62
    );
4154
  }
4155
4156
  /**
4157
   * Strip whitespace or other characters from end of a UTF-8 string.
4158
   *
4159
   * @param string $str   <p>The string to be trimmed.</p>
4160
   * @param string $chars <p>Optional characters to be stripped.</p>
4161
   *
4162
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4163
   */
4164 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4165
  {
4166 23
    $str = (string)$str;
4167
4168 23
    if (!isset($str[0])) {
4169 5
      return '';
4170
    }
4171
4172
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4173 19
    if ($chars === INF || !$chars) {
4174 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4175
    }
4176
4177 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4178
  }
4179
4180
  /**
4181
   * rxClass
4182
   *
4183
   * @param string $s
4184
   * @param string $class
4185
   *
4186
   * @return string
4187
   */
4188 60
  private static function rxClass($s, $class = '')
4189
  {
4190 60
    static $RX_CLASSS_CACHE = array();
4191
4192 60
    $cacheKey = $s . $class;
4193
4194 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4195 48
      return $RX_CLASSS_CACHE[$cacheKey];
4196
    }
4197
4198
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4199 20
    $class = array($class);
4200
4201
    /** @noinspection SuspiciousLoopInspection */
4202 20
    foreach (self::str_split($s) as $s) {
4203 19
      if ('-' === $s) {
4204
        $class[0] = '-' . $class[0];
4205 19
      } elseif (!isset($s[2])) {
4206 19
        $class[0] .= preg_quote($s, '/');
4207 19
      } elseif (1 === self::strlen($s)) {
4208 2
        $class[0] .= $s;
4209 2
      } else {
4210
        $class[] = $s;
4211
      }
4212 20
    }
4213
4214 20
    if ($class[0]) {
4215 20
      $class[0] = '[' . $class[0] . ']';
4216 20
    }
4217
4218 20
    if (1 === count($class)) {
4219 20
      $return = $class[0];
4220 20
    } else {
4221
      $return = '(?:' . implode('|', $class) . ')';
4222
    }
4223
4224 20
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4225
4226 20
    return $return;
4227
  }
4228
4229
  /**
4230
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4231
   */
4232 1
  public static function showSupport()
4233
  {
4234 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4235
      self::checkForSupport();
4236
    }
4237
4238 1
    echo '<pre>';
4239 1
    foreach (self::$SUPPORT as $key => $value) {
4240 1
      echo $key . ' - ' . print_r($value, true) . "\n<br>";
4241 1
    }
4242 1
    echo '</pre>';
4243 1
  }
4244
4245
  /**
4246
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4247
   *
4248
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4249
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4250
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4251
   *
4252
   * @return string <p>The HTML numbered entity.</p>
4253
   */
4254 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4255
  {
4256 1
    $char = (string)$char;
4257
4258 1
    if (!isset($char[0])) {
4259 1
      return '';
4260
    }
4261
4262
    if (
4263
        $keepAsciiChars === true
4264 1
        &&
4265 1
        self::is_ascii($char) === true
4266 1
    ) {
4267 1
      return $char;
4268
    }
4269
4270 1
    if ($encoding !== 'UTF-8') {
4271 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4272 1
    }
4273
4274 1
    return '&#' . self::ord($char, $encoding) . ';';
4275
  }
4276
4277
  /**
4278
   * Convert a string to an array of Unicode characters.
4279
   *
4280
   * @param string  $str       <p>The string to split into array.</p>
4281
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4282
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4283
   *
4284
   * @return string[] <p>An array containing chunks of the string.</p>
4285
   */
4286 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4287
  {
4288 39
    $str = (string)$str;
4289
4290 39
    if (!isset($str[0])) {
4291 3
      return array();
4292
    }
4293
4294
    // init
4295 38
    $ret = array();
4296
4297 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4298
      self::checkForSupport();
4299
    }
4300
4301 38
    if ($cleanUtf8 === true) {
4302 7
      $str = self::clean($str);
4303 7
    }
4304
4305 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4306
4307 38
      preg_match_all('/./us', $str, $retArray);
4308 38
      if (isset($retArray[0])) {
4309 38
        $ret = $retArray[0];
4310 38
      }
4311 38
      unset($retArray);
4312
4313 38
    } else {
4314
4315
      // fallback
4316
4317 2
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4318
        self::checkForSupport();
4319
      }
4320
4321 2 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4322
        $len = \mb_strlen($str, '8BIT');
4323
      } else {
4324 2
        $len = strlen($str);
4325
      }
4326
4327
      /** @noinspection ForeachInvariantsInspection */
4328 2
      for ($i = 0; $i < $len; $i++) {
4329
4330 2
        if (($str[$i] & "\x80") === "\x00") {
4331
4332 2
          $ret[] = $str[$i];
4333
4334 2
        } elseif (
4335 2
            isset($str[$i + 1])
4336 2
            &&
4337 2
            ($str[$i] & "\xE0") === "\xC0"
4338 2
        ) {
4339
4340
          if (($str[$i + 1] & "\xC0") === "\x80") {
4341
            $ret[] = $str[$i] . $str[$i + 1];
4342
4343
            $i++;
4344
          }
4345
4346 View Code Duplication
        } elseif (
4347 2
            isset($str[$i + 2])
4348 2
            &&
4349 2
            ($str[$i] & "\xF0") === "\xE0"
4350 2
        ) {
4351
4352
          if (
4353 2
              ($str[$i + 1] & "\xC0") === "\x80"
4354 2
              &&
4355 2
              ($str[$i + 2] & "\xC0") === "\x80"
4356 2
          ) {
4357 2
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4358
4359 2
            $i += 2;
4360 2
          }
4361
4362 2
        } elseif (
4363
            isset($str[$i + 3])
4364
            &&
4365
            ($str[$i] & "\xF8") === "\xF0"
4366
        ) {
4367
4368 View Code Duplication
          if (
4369
              ($str[$i + 1] & "\xC0") === "\x80"
4370
              &&
4371
              ($str[$i + 2] & "\xC0") === "\x80"
4372
              &&
4373
              ($str[$i + 3] & "\xC0") === "\x80"
4374
          ) {
4375
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4376
4377
            $i += 3;
4378
          }
4379
4380
        }
4381 2
      }
4382
    }
4383
4384 38
    if ($length > 1) {
4385 5
      $ret = array_chunk($ret, $length);
4386
4387 5
      return array_map(
4388
          function ($item) {
4389 5
            return implode('', $item);
4390 5
          }, $ret
4391 5
      );
4392
    }
4393
4394 34
    if (isset($ret[0]) && $ret[0] === '') {
4395
      return array();
4396
    }
4397
4398 34
    return $ret;
4399
  }
4400
4401
  /**
4402
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4403
   *
4404
   * @param string $str <p>The input string.</p>
4405
   *
4406
   * @return false|string <p>
4407
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4408
   *                      otherwise it will return false.
4409
   *                      </p>
4410
   */
4411 12
  public static function str_detect_encoding($str)
4412
  {
4413
    //
4414
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4415
    //
4416
4417 12
    if (self::is_binary($str) === true) {
4418
4419 3
      if (self::is_utf16($str) === 1) {
4420 1
        return 'UTF-16LE';
4421
      }
4422
4423 3
      if (self::is_utf16($str) === 2) {
4424 1
        return 'UTF-16BE';
4425
      }
4426
4427 2
      if (self::is_utf32($str) === 1) {
4428
        return 'UTF-32LE';
4429
      }
4430
4431 2
      if (self::is_utf32($str) === 2) {
4432
        return 'UTF-32BE';
4433
      }
4434
4435 2
    }
4436
4437
    //
4438
    // 2.) simple check for ASCII chars
4439
    //
4440
4441 12
    if (self::is_ascii($str) === true) {
4442 3
      return 'ASCII';
4443
    }
4444
4445
    //
4446
    // 3.) simple check for UTF-8 chars
4447
    //
4448
4449 12
    if (self::is_utf8($str) === true) {
4450 9
      return 'UTF-8';
4451
    }
4452
4453
    //
4454
    // 4.) check via "\mb_detect_encoding()"
4455
    //
4456
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4457
4458
    $detectOrder = array(
4459 7
        'ISO-8859-1',
4460 7
        'ISO-8859-2',
4461 7
        'ISO-8859-3',
4462 7
        'ISO-8859-4',
4463 7
        'ISO-8859-5',
4464 7
        'ISO-8859-6',
4465 7
        'ISO-8859-7',
4466 7
        'ISO-8859-8',
4467 7
        'ISO-8859-9',
4468 7
        'ISO-8859-10',
4469 7
        'ISO-8859-13',
4470 7
        'ISO-8859-14',
4471 7
        'ISO-8859-15',
4472 7
        'ISO-8859-16',
4473 7
        'WINDOWS-1251',
4474 7
        'WINDOWS-1252',
4475 7
        'WINDOWS-1254',
4476 7
        'ISO-2022-JP',
4477 7
        'JIS',
4478 7
        'EUC-JP',
4479 7
    );
4480
4481 7
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4482 7
    if ($encoding) {
4483 7
      return $encoding;
4484
    }
4485
4486
    //
4487
    // 5.) check via "iconv()"
4488
    //
4489
4490
    $md5 = md5($str);
4491
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4492
      # INFO: //IGNORE and //TRANSLIT still throw notice
4493
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4494
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4495
        return $encodingTmp;
4496
      }
4497
    }
4498
4499
    return false;
4500
  }
4501
4502
  /**
4503
   * Check if the string ends with the given substring.
4504
   *
4505
   * @param string $haystack <p>The string to search in.</p>
4506
   * @param string $needle   <p>The substring to search for.</p>
4507
   *
4508
   * @return bool
4509
   */
4510 2
  public static function str_ends_with($haystack, $needle)
4511
  {
4512 2
    $haystack = (string)$haystack;
4513 2
    $needle = (string)$needle;
4514
4515 2
    if (!isset($haystack[0], $needle[0])) {
4516 1
      return false;
4517
    }
4518
4519 2
    if (substr($haystack, -strlen($needle)) === $needle) {
4520 2
      return true;
4521
    }
4522
4523 2
    return false;
4524
  }
4525
4526
  /**
4527
   * Check if the string ends with the given substring, case insensitive.
4528
   *
4529
   * @param string $haystack <p>The string to search in.</p>
4530
   * @param string $needle   <p>The substring to search for.</p>
4531
   *
4532
   * @return bool
4533
   */
4534 2
  public static function str_iends_with($haystack, $needle)
4535
  {
4536 2
    $haystack = (string)$haystack;
4537 2
    $needle = (string)$needle;
4538
4539 2
    if (!isset($haystack[0], $needle[0])) {
4540 1
      return false;
4541
    }
4542
4543 2
    if (self::strcasecmp(substr($haystack, -strlen($needle)), $needle) === 0) {
4544 2
      return true;
4545
    }
4546
4547 2
    return false;
4548
  }
4549
4550
  /**
4551
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4552
   *
4553
   * @link  http://php.net/manual/en/function.str-ireplace.php
4554
   *
4555
   * @param mixed $search  <p>
4556
   *                       Every replacement with search array is
4557
   *                       performed on the result of previous replacement.
4558
   *                       </p>
4559
   * @param mixed $replace <p>
4560
   *                       </p>
4561
   * @param mixed $subject <p>
4562
   *                       If subject is an array, then the search and
4563
   *                       replace is performed with every entry of
4564
   *                       subject, and the return value is an array as
4565
   *                       well.
4566
   *                       </p>
4567
   * @param int   $count   [optional] <p>
4568
   *                       The number of matched and replaced needles will
4569
   *                       be returned in count which is passed by
4570
   *                       reference.
4571
   *                       </p>
4572
   *
4573
   * @return mixed <p>A string or an array of replacements.</p>
4574
   */
4575 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4576
  {
4577 26
    $search = (array)$search;
4578
4579
    /** @noinspection AlterInForeachInspection */
4580 26
    foreach ($search as &$s) {
4581 26
      if ('' === $s .= '') {
4582 2
        $s = '/^(?<=.)$/';
4583 2
      } else {
4584 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4585
      }
4586 26
    }
4587
4588 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4589 26
    $count = $replace; // used as reference parameter
4590
4591 26
    return $subject;
4592
  }
4593
4594
  /**
4595
   * Check if the string starts with the given substring, case insensitive.
4596
   *
4597
   * @param string $haystack <p>The string to search in.</p>
4598
   * @param string $needle   <p>The substring to search for.</p>
4599
   *
4600
   * @return bool
4601
   */
4602 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4603
  {
4604 2
    $haystack = (string)$haystack;
4605 2
    $needle = (string)$needle;
4606
4607 2
    if (!isset($haystack[0], $needle[0])) {
4608 1
      return false;
4609
    }
4610
4611 2
    if (self::stripos($haystack, $needle) === 0) {
4612 2
      return true;
4613
    }
4614
4615 2
    return false;
4616
  }
4617
4618
  /**
4619
   * Limit the number of characters in a string, but also after the next word.
4620
   *
4621
   * @param string $str
4622
   * @param int    $length
4623
   * @param string $strAddOn
4624
   *
4625
   * @return string
4626
   */
4627 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4628
  {
4629 1
    $str = (string)$str;
4630
4631 1
    if (!isset($str[0])) {
4632 1
      return '';
4633
    }
4634
4635 1
    $length = (int)$length;
4636
4637 1
    if (self::strlen($str) <= $length) {
4638 1
      return $str;
4639
    }
4640
4641 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4642 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4643
    }
4644
4645 1
    $str = (string)self::substr($str, 0, $length);
4646 1
    $array = explode(' ', $str);
4647 1
    array_pop($array);
4648 1
    $new_str = implode(' ', $array);
4649
4650 1
    if ($new_str === '') {
4651 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4652 1
    } else {
4653 1
      $str = $new_str . $strAddOn;
4654
    }
4655
4656 1
    return $str;
4657
  }
4658
4659
  /**
4660
   * Pad a UTF-8 string to given length with another string.
4661
   *
4662
   * @param string $str        <p>The input string.</p>
4663
   * @param int    $pad_length <p>The length of return string.</p>
4664
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4665
   * @param int    $pad_type   [optional] <p>
4666
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4667
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4668
   *                           </p>
4669
   *
4670
   * @return string <strong>Returns the padded string</strong>
4671
   */
4672 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4673
  {
4674 2
    $str_length = self::strlen($str);
4675
4676
    if (
4677 2
        is_int($pad_length) === true
4678 2
        &&
4679
        $pad_length > 0
4680 2
        &&
4681
        $pad_length >= $str_length
4682 2
    ) {
4683 2
      $ps_length = self::strlen($pad_string);
4684
4685 2
      $diff = $pad_length - $str_length;
4686
4687
      switch ($pad_type) {
4688 2 View Code Duplication
        case STR_PAD_LEFT:
4689 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4690 2
          $pre = (string)self::substr($pre, 0, $diff);
4691 2
          $post = '';
4692 2
          break;
4693
4694 2
        case STR_PAD_BOTH:
4695 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4696 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4697 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4698 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4699 2
          break;
4700
4701 2
        case STR_PAD_RIGHT:
4702 2 View Code Duplication
        default:
4703 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4704 2
          $post = (string)self::substr($post, 0, $diff);
4705 2
          $pre = '';
4706 2
      }
4707
4708 2
      return $pre . $str . $post;
4709
    }
4710
4711 2
    return $str;
4712
  }
4713
4714
  /**
4715
   * Repeat a string.
4716
   *
4717
   * @param string $str        <p>
4718
   *                           The string to be repeated.
4719
   *                           </p>
4720
   * @param int    $multiplier <p>
4721
   *                           Number of time the input string should be
4722
   *                           repeated.
4723
   *                           </p>
4724
   *                           <p>
4725
   *                           multiplier has to be greater than or equal to 0.
4726
   *                           If the multiplier is set to 0, the function
4727
   *                           will return an empty string.
4728
   *                           </p>
4729
   *
4730
   * @return string <p>The repeated string.</p>
4731
   */
4732 1
  public static function str_repeat($str, $multiplier)
4733
  {
4734 1
    $str = self::filter($str);
4735
4736 1
    return str_repeat($str, $multiplier);
4737
  }
4738
4739
  /**
4740
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4741
   *
4742
   * Replace all occurrences of the search string with the replacement string
4743
   *
4744
   * @link http://php.net/manual/en/function.str-replace.php
4745
   *
4746
   * @param mixed $search  <p>
4747
   *                       The value being searched for, otherwise known as the needle.
4748
   *                       An array may be used to designate multiple needles.
4749
   *                       </p>
4750
   * @param mixed $replace <p>
4751
   *                       The replacement value that replaces found search
4752
   *                       values. An array may be used to designate multiple replacements.
4753
   *                       </p>
4754
   * @param mixed $subject <p>
4755
   *                       The string or array being searched and replaced on,
4756
   *                       otherwise known as the haystack.
4757
   *                       </p>
4758
   *                       <p>
4759
   *                       If subject is an array, then the search and
4760
   *                       replace is performed with every entry of
4761
   *                       subject, and the return value is an array as
4762
   *                       well.
4763
   *                       </p>
4764
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4765
   *
4766
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4767
   */
4768 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4769
  {
4770 12
    return str_replace($search, $replace, $subject, $count);
4771
  }
4772
4773
  /**
4774
   * Replace the first "$search"-term with the "$replace"-term.
4775
   *
4776
   * @param string $search
4777
   * @param string $replace
4778
   * @param string $subject
4779
   *
4780
   * @return string
4781
   */
4782 1
  public static function str_replace_first($search, $replace, $subject)
4783
  {
4784 1
    $pos = self::strpos($subject, $search);
4785
4786 1
    if ($pos !== false) {
4787 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4788
    }
4789
4790 1
    return $subject;
4791
  }
4792
4793
  /**
4794
   * Shuffles all the characters in the string.
4795
   *
4796
   * @param string $str <p>The input string</p>
4797
   *
4798
   * @return string <p>The shuffled string.</p>
4799
   */
4800 1
  public static function str_shuffle($str)
4801
  {
4802 1
    $array = self::split($str);
4803
4804 1
    shuffle($array);
4805
4806 1
    return implode('', $array);
4807
  }
4808
4809
  /**
4810
   * Sort all characters according to code points.
4811
   *
4812
   * @param string $str    <p>A UTF-8 string.</p>
4813
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4814
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4815
   *
4816
   * @return string <p>String of sorted characters.</p>
4817
   */
4818 1
  public static function str_sort($str, $unique = false, $desc = false)
4819
  {
4820 1
    $array = self::codepoints($str);
4821
4822 1
    if ($unique) {
4823 1
      $array = array_flip(array_flip($array));
4824 1
    }
4825
4826 1
    if ($desc) {
4827 1
      arsort($array);
4828 1
    } else {
4829 1
      asort($array);
4830
    }
4831
4832 1
    return self::string($array);
4833
  }
4834
4835
  /**
4836
   * Split a string into an array.
4837
   *
4838
   * @param string $str
4839
   * @param int    $len
4840
   *
4841
   * @return array
4842
   */
4843 23
  public static function str_split($str, $len = 1)
4844
  {
4845 23
    $str = (string)$str;
4846
4847 23
    if (!isset($str[0])) {
4848 1
      return array();
4849
    }
4850
4851 22
    $len = (int)$len;
4852
4853 22
    if ($len < 1) {
4854
      return str_split($str, $len);
4855
    }
4856
4857
    /** @noinspection PhpInternalEntityUsedInspection */
4858 22
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4859 22
    $a = $a[0];
4860
4861 22
    if ($len === 1) {
4862 22
      return $a;
4863
    }
4864
4865 1
    $arrayOutput = array();
4866 1
    $p = -1;
4867
4868
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4869 1
    foreach ($a as $l => $a) {
4870 1
      if ($l % $len) {
4871 1
        $arrayOutput[$p] .= $a;
4872 1
      } else {
4873 1
        $arrayOutput[++$p] = $a;
4874
      }
4875 1
    }
4876
4877 1
    return $arrayOutput;
4878
  }
4879
4880
  /**
4881
   * Check if the string starts with the given substring.
4882
   *
4883
   * @param string $haystack <p>The string to search in.</p>
4884
   * @param string $needle   <p>The substring to search for.</p>
4885
   *
4886
   * @return bool
4887
   */
4888 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4889
  {
4890 2
    $haystack = (string)$haystack;
4891 2
    $needle = (string)$needle;
4892
4893 2
    if (!isset($haystack[0], $needle[0])) {
4894 1
      return false;
4895
    }
4896
4897 2
    if (strpos($haystack, $needle) === 0) {
4898 2
      return true;
4899
    }
4900
4901 2
    return false;
4902
  }
4903
4904
  /**
4905
   * Get a binary representation of a specific string.
4906
   *
4907
   * @param string $str <p>The input string.</p>
4908
   *
4909
   * @return string
4910
   */
4911 1
  public static function str_to_binary($str)
4912
  {
4913 1
    $str = (string)$str;
4914
4915 1
    $value = unpack('H*', $str);
4916
4917 1
    return base_convert($value[1], 16, 2);
4918
  }
4919
4920
  /**
4921
   * Convert a string into an array of words.
4922
   *
4923
   * @param string   $str
4924
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4925
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4926
   * @param null|int $removeShortValues
4927
   *
4928
   * @return array
4929
   */
4930 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4931
  {
4932 10
    $str = (string)$str;
4933
4934 10
    if ($removeShortValues !== null) {
4935 1
      $removeShortValues = (int)$removeShortValues;
4936 1
    }
4937
4938 10
    if (!isset($str[0])) {
4939 2
      if ($removeEmptyValues === true) {
4940
        return array();
4941
      }
4942
4943 2
      return array('');
4944
    }
4945
4946 10
    $charList = self::rxClass($charList, '\pL');
4947
4948 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4949
4950
    if (
4951
        $removeShortValues === null
4952 10
        &&
4953
        $removeEmptyValues === false
4954 10
    ) {
4955 10
      return $return;
4956
    }
4957
4958 1
    $tmpReturn = array();
4959 1
    foreach ($return as $returnValue) {
4960
      if (
4961
          $removeShortValues !== null
4962 1
          &&
4963 1
          self::strlen($returnValue) <= $removeShortValues
4964 1
      ) {
4965 1
        continue;
4966
      }
4967
4968
      if (
4969
          $removeEmptyValues === true
4970 1
          &&
4971 1
          trim($returnValue) === ''
4972 1
      ) {
4973 1
        continue;
4974
      }
4975
4976 1
      $tmpReturn[] = $returnValue;
4977 1
    }
4978
4979 1
    return $tmpReturn;
4980
  }
4981
4982
  /**
4983
   * alias for "UTF8::to_ascii()"
4984
   *
4985
   * @see UTF8::to_ascii()
4986
   *
4987
   * @param string $str
4988
   * @param string $unknown
4989
   * @param bool   $strict
4990
   *
4991
   * @return string
4992
   */
4993 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4994
  {
4995 7
    return self::to_ascii($str, $unknown, $strict);
4996
  }
4997
4998
  /**
4999
   * Counts number of words in the UTF-8 string.
5000
   *
5001
   * @param string $str      <p>The input string.</p>
5002
   * @param int    $format   [optional] <p>
5003
   *                         <strong>0</strong> => return a number of words (default)<br>
5004
   *                         <strong>1</strong> => return an array of words<br>
5005
   *                         <strong>2</strong> => return an array of words with word-offset as key
5006
   *                         </p>
5007
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
5008
   *
5009
   * @return array|int <p>The number of words in the string</p>
5010
   */
5011 1
  public static function str_word_count($str, $format = 0, $charlist = '')
5012
  {
5013 1
    $strParts = self::str_to_words($str, $charlist);
5014
5015 1
    $len = count($strParts);
5016
5017 1
    if ($format === 1) {
5018
5019 1
      $numberOfWords = array();
5020 1
      for ($i = 1; $i < $len; $i += 2) {
5021 1
        $numberOfWords[] = $strParts[$i];
5022 1
      }
5023
5024 1
    } elseif ($format === 2) {
5025
5026 1
      $numberOfWords = array();
5027 1
      $offset = self::strlen($strParts[0]);
5028 1
      for ($i = 1; $i < $len; $i += 2) {
5029 1
        $numberOfWords[$offset] = $strParts[$i];
5030 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5031 1
      }
5032
5033 1
    } else {
5034
5035 1
      $numberOfWords = ($len - 1) / 2;
5036
5037
    }
5038
5039 1
    return $numberOfWords;
5040
  }
5041
5042
  /**
5043
   * Case-insensitive string comparison.
5044
   *
5045
   * INFO: Case-insensitive version of UTF8::strcmp()
5046
   *
5047
   * @param string $str1
5048
   * @param string $str2
5049
   *
5050
   * @return int <p>
5051
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5052
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5053
   *             <strong>0</strong> if they are equal.
5054
   *             </p>
5055
   */
5056 11
  public static function strcasecmp($str1, $str2)
5057
  {
5058 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5059
  }
5060
5061
  /**
5062
   * alias for "UTF8::strstr()"
5063
   *
5064
   * @see UTF8::strstr()
5065
   *
5066
   * @param string  $haystack
5067
   * @param string  $needle
5068
   * @param bool    $before_needle
5069
   * @param string  $encoding
5070
   * @param boolean $cleanUtf8
5071
   *
5072
   * @return string|false
5073
   */
5074 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5075
  {
5076 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5077
  }
5078
5079
  /**
5080
   * Case-sensitive string comparison.
5081
   *
5082
   * @param string $str1
5083
   * @param string $str2
5084
   *
5085
   * @return int  <p>
5086
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5087
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5088
   *              <strong>0</strong> if they are equal.
5089
   *              </p>
5090
   */
5091 14
  public static function strcmp($str1, $str2)
5092
  {
5093
    /** @noinspection PhpUndefinedClassInspection */
5094 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5095 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5096 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
5097 14
    );
5098
  }
5099
5100
  /**
5101
   * Find length of initial segment not matching mask.
5102
   *
5103
   * @param string $str
5104
   * @param string $charList
5105
   * @param int    $offset
5106
   * @param int    $length
5107
   *
5108
   * @return int|null
5109
   */
5110 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5111
  {
5112 15
    if ('' === $charList .= '') {
5113 1
      return null;
5114
    }
5115
5116 14 View Code Duplication
    if ($offset || $length !== null) {
5117 2
      $strTmp = self::substr($str, $offset, $length);
5118 2
      if ($strTmp === false) {
5119
        return null;
5120
      }
5121 2
      $str = (string)$strTmp;
5122 2
    }
5123
5124 14
    $str = (string)$str;
5125 14
    if (!isset($str[0])) {
5126 1
      return null;
5127
    }
5128
5129 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5130
      /** @noinspection OffsetOperationsInspection */
5131 13
      return self::strlen($length[1]);
5132
    }
5133
5134 1
    return self::strlen($str);
5135
  }
5136
5137
  /**
5138
   * alias for "UTF8::stristr()"
5139
   *
5140
   * @see UTF8::stristr()
5141
   *
5142
   * @param string  $haystack
5143
   * @param string  $needle
5144
   * @param bool    $before_needle
5145
   * @param string  $encoding
5146
   * @param boolean $cleanUtf8
5147
   *
5148
   * @return string|false
5149
   */
5150 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5151
  {
5152 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5153
  }
5154
5155
  /**
5156
   * Create a UTF-8 string from code points.
5157
   *
5158
   * INFO: opposite to UTF8::codepoints()
5159
   *
5160
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5161
   *
5162
   * @return string <p>UTF-8 encoded string.</p>
5163
   */
5164 2
  public static function string(array $array)
5165
  {
5166 2
    return implode(
5167 2
        '',
5168 2
        array_map(
5169
            array(
5170 2
                '\\voku\\helper\\UTF8',
5171 2
                'chr',
5172 2
            ),
5173
            $array
5174 2
        )
5175 2
    );
5176
  }
5177
5178
  /**
5179
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5180
   *
5181
   * @param string $str <p>The input string.</p>
5182
   *
5183
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5184
   */
5185 3
  public static function string_has_bom($str)
5186
  {
5187 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5188 3
      if (0 === strpos($str, $bomString)) {
5189 3
        return true;
5190
      }
5191 3
    }
5192
5193 3
    return false;
5194
  }
5195
5196
  /**
5197
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5198
   *
5199
   * @link http://php.net/manual/en/function.strip-tags.php
5200
   *
5201
   * @param string  $str            <p>
5202
   *                                The input string.
5203
   *                                </p>
5204
   * @param string  $allowable_tags [optional] <p>
5205
   *                                You can use the optional second parameter to specify tags which should
5206
   *                                not be stripped.
5207
   *                                </p>
5208
   *                                <p>
5209
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5210
   *                                can not be changed with allowable_tags.
5211
   *                                </p>
5212
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5213
   *
5214
   * @return string <p>The stripped string.</p>
5215
   */
5216 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5217
  {
5218 2
    $str = (string)$str;
5219
5220 2
    if (!isset($str[0])) {
5221 1
      return '';
5222
    }
5223
5224 2
    if ($cleanUtf8 === true) {
5225 1
      $str = self::clean($str);
5226 1
    }
5227
5228 2
    return strip_tags($str, $allowable_tags);
5229
  }
5230
5231
  /**
5232
   * Finds position of first occurrence of a string within another, case insensitive.
5233
   *
5234
   * @link http://php.net/manual/en/function.mb-stripos.php
5235
   *
5236
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5237
   * @param string  $needle    <p>The string to find in haystack.</p>
5238
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5239
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5240
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5241
   *
5242
   * @return int|false <p>
5243
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5244
   *                   or false if needle is not found.
5245
   *                   </p>
5246
   */
5247 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5248
  {
5249 10
    $haystack = (string)$haystack;
5250 10
    $needle = (string)$needle;
5251 10
    $offset = (int)$offset;
5252
5253 10
    if (!isset($haystack[0], $needle[0])) {
5254 3
      return false;
5255
    }
5256
5257 9
    if ($cleanUtf8 === true) {
5258
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5259
      // if invalid characters are found in $haystack before $needle
5260 1
      $haystack = self::clean($haystack);
5261 1
      $needle = self::clean($needle);
5262 1
    }
5263
5264 View Code Duplication
    if (
5265
        $encoding === 'UTF-8'
5266 9
        ||
5267 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5268 9
    ) {
5269 9
      $encoding = 'UTF-8';
5270 9
    } else {
5271 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5272
    }
5273
5274 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5275
      self::checkForSupport();
5276
    }
5277
5278
    if (
5279
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5280 9
        &&
5281 9
        self::$SUPPORT['intl'] === true
5282 9
        &&
5283 9
        Bootup::is_php('5.4') === true
5284 9
    ) {
5285
      return \grapheme_stripos($haystack, $needle, $offset);
5286
    }
5287
5288
    // fallback to "mb_"-function via polyfill
5289 9
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5290
  }
5291
5292
  /**
5293
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5294
   *
5295
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5296
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5297
   * @param bool    $before_needle [optional] <p>
5298
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5299
   *                               haystack before the first occurrence of the needle (excluding the needle).
5300
   *                               </p>
5301
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5302
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5303
   *
5304
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5305
   */
5306 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5307
  {
5308 17
    $haystack = (string)$haystack;
5309 17
    $needle = (string)$needle;
5310 17
    $before_needle = (bool)$before_needle;
5311
5312 17
    if (!isset($haystack[0], $needle[0])) {
5313 6
      return false;
5314
    }
5315
5316 11
    if ($encoding !== 'UTF-8') {
5317 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5318 1
    }
5319
5320 11
    if ($cleanUtf8 === true) {
5321
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5322
      // if invalid characters are found in $haystack before $needle
5323 1
      $needle = self::clean($needle);
5324 1
      $haystack = self::clean($haystack);
5325 1
    }
5326
5327 11
    if (!$needle) {
5328
      return $haystack;
5329
    }
5330
5331 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5332
      self::checkForSupport();
5333
    }
5334
5335 View Code Duplication
    if (
5336
        $encoding !== 'UTF-8'
5337 11
        &&
5338 1
        self::$SUPPORT['mbstring'] === false
5339 11
    ) {
5340
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5341
    }
5342
5343 11
    if (self::$SUPPORT['mbstring'] === true) {
5344 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5345
    }
5346
5347
    if (
5348
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5349
        &&
5350
        self::$SUPPORT['intl'] === true
5351
        &&
5352
        Bootup::is_php('5.4') === true
5353
    ) {
5354
      return \grapheme_stristr($haystack, $needle, $before_needle);
5355
    }
5356
5357
    if (self::is_ascii($needle) && self::is_ascii($haystack)) {
5358
      return stristr($haystack, $needle, $before_needle);
5359
    }
5360
5361
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5362
5363
    if (!isset($match[1])) {
5364
      return false;
5365
    }
5366
5367
    if ($before_needle) {
5368
      return $match[1];
5369
    }
5370
5371
    return self::substr($haystack, self::strlen($match[1]));
5372
  }
5373
5374
  /**
5375
   * Get the string length, not the byte-length!
5376
   *
5377
   * @link     http://php.net/manual/en/function.mb-strlen.php
5378
   *
5379
   * @param string  $str       <p>The string being checked for length.</p>
5380
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5381
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5382
   *
5383
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5384
   *             character counted as +1)</p>
5385
   */
5386 87
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5387
  {
5388 87
    $str = (string)$str;
5389
5390 87
    if (!isset($str[0])) {
5391 6
      return 0;
5392
    }
5393
5394 View Code Duplication
    if (
5395
        $encoding === 'UTF-8'
5396 86
        ||
5397 14
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5398 86
    ) {
5399 77
      $encoding = 'UTF-8';
5400 77
    } else {
5401 13
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5402
    }
5403
5404 86
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5405
      self::checkForSupport();
5406
    }
5407
5408
    switch ($encoding) {
5409 86
      case 'ASCII':
5410 86
      case 'CP850':
5411 86
      case '8BIT':
5412
        if (
5413
            $encoding === 'CP850'
5414 10
            &&
5415 10
            self::$SUPPORT['mbstring_func_overload'] === false
5416 10
        ) {
5417 10
          return strlen($str);
5418
        }
5419
5420
        return \mb_strlen($str, '8BIT');
5421
    }
5422
5423 78
    if ($cleanUtf8 === true) {
5424
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5425
      // if invalid characters are found in $str
5426 2
      $str = self::clean($str);
5427 2
    }
5428
5429 View Code Duplication
    if (
5430
        $encoding !== 'UTF-8'
5431 78
        &&
5432 2
        self::$SUPPORT['mbstring'] === false
5433 78
        &&
5434
        self::$SUPPORT['iconv'] === false
5435 78
    ) {
5436
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5437
    }
5438
5439
    if (
5440
        $encoding !== 'UTF-8'
5441 78
        &&
5442 2
        self::$SUPPORT['iconv'] === true
5443 78
        &&
5444 2
        self::$SUPPORT['mbstring'] === false
5445 78
    ) {
5446
      return \iconv_strlen($str, $encoding);
5447
    }
5448
5449 78
    if (self::$SUPPORT['mbstring'] === true) {
5450 77
      return \mb_strlen($str, $encoding);
5451
    }
5452
5453 2
    if (self::$SUPPORT['iconv'] === true) {
5454
      return \iconv_strlen($str, $encoding);
5455
    }
5456
5457
    if (
5458
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5459 2
        &&
5460 2
        self::$SUPPORT['intl'] === true
5461 2
        &&
5462
        Bootup::is_php('5.4') === true
5463 2
    ) {
5464
      return \grapheme_strlen($str);
5465
    }
5466
5467 2
    if (self::is_ascii($str)) {
5468 1
      return strlen($str);
5469
    }
5470
5471
    // fallback via vanilla php
5472 2
    preg_match_all('/./us', $str, $parts);
5473 2
    $returnTmp = count($parts[0]);
5474 2
    if ($returnTmp !== 0) {
5475 2
      return $returnTmp;
5476
    }
5477
5478
    // fallback to "mb_"-function via polyfill
5479
    return \mb_strlen($str, $encoding);
5480
  }
5481
5482
  /**
5483
   * Case insensitive string comparisons using a "natural order" algorithm.
5484
   *
5485
   * INFO: natural order version of UTF8::strcasecmp()
5486
   *
5487
   * @param string $str1 <p>The first string.</p>
5488
   * @param string $str2 <p>The second string.</p>
5489
   *
5490
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5491
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5492
   *             <strong>0</strong> if they are equal
5493
   */
5494 1
  public static function strnatcasecmp($str1, $str2)
5495
  {
5496 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5497
  }
5498
5499
  /**
5500
   * String comparisons using a "natural order" algorithm
5501
   *
5502
   * INFO: natural order version of UTF8::strcmp()
5503
   *
5504
   * @link  http://php.net/manual/en/function.strnatcmp.php
5505
   *
5506
   * @param string $str1 <p>The first string.</p>
5507
   * @param string $str2 <p>The second string.</p>
5508
   *
5509
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5510
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5511
   *             <strong>0</strong> if they are equal
5512
   */
5513 2
  public static function strnatcmp($str1, $str2)
5514
  {
5515 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5516
  }
5517
5518
  /**
5519
   * Case-insensitive string comparison of the first n characters.
5520
   *
5521
   * @link  http://php.net/manual/en/function.strncasecmp.php
5522
   *
5523
   * @param string $str1 <p>The first string.</p>
5524
   * @param string $str2 <p>The second string.</p>
5525
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5526
   *
5527
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5528
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5529
   *             <strong>0</strong> if they are equal
5530
   */
5531 1
  public static function strncasecmp($str1, $str2, $len)
5532
  {
5533 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5534
  }
5535
5536
  /**
5537
   * String comparison of the first n characters.
5538
   *
5539
   * @link  http://php.net/manual/en/function.strncmp.php
5540
   *
5541
   * @param string $str1 <p>The first string.</p>
5542
   * @param string $str2 <p>The second string.</p>
5543
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5544
   *
5545
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5546
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5547
   *             <strong>0</strong> if they are equal
5548
   */
5549 2
  public static function strncmp($str1, $str2, $len)
5550
  {
5551 2
    $str1 = (string)self::substr($str1, 0, $len);
5552 2
    $str2 = (string)self::substr($str2, 0, $len);
5553
5554 2
    return self::strcmp($str1, $str2);
5555
  }
5556
5557
  /**
5558
   * Search a string for any of a set of characters.
5559
   *
5560
   * @link  http://php.net/manual/en/function.strpbrk.php
5561
   *
5562
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5563
   * @param string $char_list <p>This parameter is case sensitive.</p>
5564
   *
5565
   * @return string String starting from the character found, or false if it is not found.
5566
   */
5567 1
  public static function strpbrk($haystack, $char_list)
5568
  {
5569 1
    $haystack = (string)$haystack;
5570 1
    $char_list = (string)$char_list;
5571
5572 1
    if (!isset($haystack[0], $char_list[0])) {
5573 1
      return false;
5574
    }
5575
5576 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5577 1
      return substr($haystack, strpos($haystack, $m[0]));
5578
    }
5579
5580 1
    return false;
5581
  }
5582
5583
  /**
5584
   * Find position of first occurrence of string in a string.
5585
   *
5586
   * @link http://php.net/manual/en/function.mb-strpos.php
5587
   *
5588
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5589
   * @param string  $needle    <p>The string to find in haystack.</p>
5590
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5591
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5592
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5593
   *
5594
   * @return int|false <p>
5595
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5596
   *                   If needle is not found it returns false.
5597
   *                   </p>
5598
   */
5599 56
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5600
  {
5601 56
    $haystack = (string)$haystack;
5602 56
    $needle = (string)$needle;
5603
5604 56
    if (!isset($haystack[0], $needle[0])) {
5605 3
      return false;
5606
    }
5607
5608
    // init
5609 55
    $offset = (int)$offset;
5610
5611
    // iconv and mbstring do not support integer $needle
5612
5613 55
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5614
      $needle = (string)self::chr($needle);
5615
    }
5616
5617 55
    if ($cleanUtf8 === true) {
5618
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5619
      // if invalid characters are found in $haystack before $needle
5620 2
      $needle = self::clean($needle);
5621 2
      $haystack = self::clean($haystack);
5622 2
    }
5623
5624 View Code Duplication
    if (
5625
        $encoding === 'UTF-8'
5626 55
        ||
5627 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5628 55
    ) {
5629 15
      $encoding = 'UTF-8';
5630 15
    } else {
5631 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5632
    }
5633
5634 55
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5635
      self::checkForSupport();
5636
    }
5637
5638
    if (
5639
        $encoding === 'CP850'
5640 55
        &&
5641 41
        self::$SUPPORT['mbstring_func_overload'] === false
5642 55
    ) {
5643 41
      return strpos($haystack, $needle, $offset);
5644
    }
5645
5646 View Code Duplication
    if (
5647
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5648 15
        &
5649 15
        self::$SUPPORT['iconv'] === true
5650 15
        &&
5651 1
        self::$SUPPORT['mbstring'] === false
5652 15
    ) {
5653
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5654
    }
5655
5656
    if (
5657
        $offset >= 0 // iconv_strpos() can't handle negative offset
5658 15
        &&
5659
        $encoding !== 'UTF-8'
5660 15
        &&
5661 1
        self::$SUPPORT['mbstring'] === false
5662 15
        &&
5663
        self::$SUPPORT['iconv'] === true
5664 15
    ) {
5665
      // ignore invalid negative offset to keep compatibility
5666
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5667
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5668
    }
5669
5670 15
    if (self::$SUPPORT['mbstring'] === true) {
5671 15
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5672
    }
5673
5674
    if (
5675
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5676 1
        &&
5677 1
        self::$SUPPORT['intl'] === true
5678 1
        &&
5679
        Bootup::is_php('5.4') === true
5680 1
    ) {
5681
      return \grapheme_strpos($haystack, $needle, $offset);
5682
    }
5683
5684
    if (
5685
        $offset >= 0 // iconv_strpos() can't handle negative offset
5686 1
        &&
5687 1
        self::$SUPPORT['iconv'] === true
5688 1
    ) {
5689
      // ignore invalid negative offset to keep compatibility
5690
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5691
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5692
    }
5693
5694 1
    $haystackIsAscii = self::is_ascii($haystack);
5695 1
    if ($haystackIsAscii && self::is_ascii($needle)) {
5696 1
      return strpos($haystack, $needle, $offset);
5697
    }
5698
5699
    // fallback via vanilla php
5700
5701 1
    if ($haystackIsAscii) {
5702
      $haystackTmp = substr($haystack, $offset);
5703
    } else {
5704 1
      $haystackTmp = self::substr($haystack, $offset);
5705
    }
5706 1
    if ($haystackTmp === false) {
5707
      $haystackTmp = '';
5708
    }
5709 1
    $haystack = (string)$haystackTmp;
5710
5711 1
    if ($offset < 0) {
5712
      $offset = 0;
5713
    }
5714
5715 1
    $pos = strpos($haystack, $needle);
5716 1
    if ($pos === false) {
5717
      return false;
5718
    }
5719
5720 1
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5721 1
    if ($returnTmp !== false) {
5722 1
      return $returnTmp;
5723
    }
5724
5725
    // fallback to "mb_"-function via polyfill
5726
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5727
  }
5728
5729
  /**
5730
   * Finds the last occurrence of a character in a string within another.
5731
   *
5732
   * @link http://php.net/manual/en/function.mb-strrchr.php
5733
   *
5734
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5735
   * @param string $needle        <p>The string to find in haystack</p>
5736
   * @param bool   $before_needle [optional] <p>
5737
   *                              Determines which portion of haystack
5738
   *                              this function returns.
5739
   *                              If set to true, it returns all of haystack
5740
   *                              from the beginning to the last occurrence of needle.
5741
   *                              If set to false, it returns all of haystack
5742
   *                              from the last occurrence of needle to the end,
5743
   *                              </p>
5744
   * @param string $encoding      [optional] <p>
5745
   *                              Character encoding name to use.
5746
   *                              If it is omitted, internal character encoding is used.
5747
   *                              </p>
5748
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5749
   *
5750
   * @return string|false The portion of haystack or false if needle is not found.
5751
   */
5752 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5753
  {
5754 1
    if ($encoding !== 'UTF-8') {
5755 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5756 1
    }
5757
5758 1
    if ($cleanUtf8 === true) {
5759
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5760
      // if invalid characters are found in $haystack before $needle
5761 1
      $needle = self::clean($needle);
5762 1
      $haystack = self::clean($haystack);
5763 1
    }
5764
5765
    // fallback to "mb_"-function via polyfill
5766 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5767
  }
5768
5769
  /**
5770
   * Reverses characters order in the string.
5771
   *
5772
   * @param string $str The input string
5773
   *
5774
   * @return string The string with characters in the reverse sequence
5775
   */
5776 4
  public static function strrev($str)
5777
  {
5778 4
    $str = (string)$str;
5779
5780 4
    if (!isset($str[0])) {
5781 2
      return '';
5782
    }
5783
5784 3
    return implode('', array_reverse(self::split($str)));
5785
  }
5786
5787
  /**
5788
   * Finds the last occurrence of a character in a string within another, case insensitive.
5789
   *
5790
   * @link http://php.net/manual/en/function.mb-strrichr.php
5791
   *
5792
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5793
   * @param string  $needle        <p>The string to find in haystack.</p>
5794
   * @param bool    $before_needle [optional] <p>
5795
   *                               Determines which portion of haystack
5796
   *                               this function returns.
5797
   *                               If set to true, it returns all of haystack
5798
   *                               from the beginning to the last occurrence of needle.
5799
   *                               If set to false, it returns all of haystack
5800
   *                               from the last occurrence of needle to the end,
5801
   *                               </p>
5802
   * @param string  $encoding      [optional] <p>
5803
   *                               Character encoding name to use.
5804
   *                               If it is omitted, internal character encoding is used.
5805
   *                               </p>
5806
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5807
   *
5808
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5809
   */
5810 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5811
  {
5812 1
    if ($encoding !== 'UTF-8') {
5813 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5814 1
    }
5815
5816 1
    if ($cleanUtf8 === true) {
5817
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5818
      // if invalid characters are found in $haystack before $needle
5819 1
      $needle = self::clean($needle);
5820 1
      $haystack = self::clean($haystack);
5821 1
    }
5822
5823 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5824
  }
5825
5826
  /**
5827
   * Find position of last occurrence of a case-insensitive string.
5828
   *
5829
   * @param string  $haystack  <p>The string to look in.</p>
5830
   * @param string  $needle    <p>The string to look for.</p>
5831
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5832
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5833
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5834
   *
5835
   * @return int|false <p>
5836
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5837
   *                   not found, it returns false.
5838
   *                   </p>
5839
   */
5840 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5841
  {
5842 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5843
      $needle = (string)self::chr($needle);
5844
    }
5845
5846
    // init
5847 1
    $haystack = (string)$haystack;
5848 1
    $needle = (string)$needle;
5849 1
    $offset = (int)$offset;
5850
5851 1
    if (!isset($haystack[0], $needle[0])) {
5852
      return false;
5853
    }
5854
5855 View Code Duplication
    if (
5856
        $cleanUtf8 === true
5857 1
        ||
5858
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5859 1
    ) {
5860
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5861
5862 1
      $needle = self::clean($needle);
5863 1
      $haystack = self::clean($haystack);
5864 1
    }
5865
5866 View Code Duplication
    if (
5867
        $encoding === 'UTF-8'
5868 1
        ||
5869 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5870 1
    ) {
5871 1
      $encoding = 'UTF-8';
5872 1
    } else {
5873 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5874
    }
5875
5876 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5877
      self::checkForSupport();
5878
    }
5879
5880 View Code Duplication
    if (
5881
        $encoding !== 'UTF-8'
5882 1
        &&
5883
        self::$SUPPORT['mbstring'] === false
5884 1
    ) {
5885
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5886
    }
5887
5888 1
    if (self::$SUPPORT['mbstring'] === true) {
5889 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5890
    }
5891
5892
    if (
5893
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5894
        &&
5895
        self::$SUPPORT['intl'] === true
5896
        &&
5897
        Bootup::is_php('5.4') === true
5898
    ) {
5899
      return \grapheme_strripos($haystack, $needle, $offset);
5900
    }
5901
5902
    // fallback via vanilla php
5903
5904
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5905
  }
5906
5907
  /**
5908
   * Find position of last occurrence of a string in a string.
5909
   *
5910
   * @link http://php.net/manual/en/function.mb-strrpos.php
5911
   *
5912
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5913
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5914
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5915
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5916
   *                              the end of the string.
5917
   *                              </p>
5918
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5919
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5920
   *
5921
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5922
   *                   is not found, it returns false.</p>
5923
   */
5924 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5925
  {
5926 10
    if ((int)$needle === $needle && $needle >= 0) {
5927 2
      $needle = (string)self::chr($needle);
5928 2
    }
5929
5930
    // init
5931 10
    $haystack = (string)$haystack;
5932 10
    $needle = (string)$needle;
5933 10
    $offset = (int)$offset;
5934
5935 10
    if (!isset($haystack[0], $needle[0])) {
5936 2
      return false;
5937
    }
5938
5939 View Code Duplication
    if (
5940
        $cleanUtf8 === true
5941 9
        ||
5942
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5943 9
    ) {
5944
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5945 3
      $needle = self::clean($needle);
5946 3
      $haystack = self::clean($haystack);
5947 3
    }
5948
5949 View Code Duplication
    if (
5950
        $encoding === 'UTF-8'
5951 9
        ||
5952 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5953 9
    ) {
5954 9
      $encoding = 'UTF-8';
5955 9
    } else {
5956 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5957
    }
5958
5959 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5960
      self::checkForSupport();
5961
    }
5962
5963 View Code Duplication
    if (
5964
        $encoding !== 'UTF-8'
5965 9
        &&
5966 1
        self::$SUPPORT['mbstring'] === false
5967 9
    ) {
5968
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5969
    }
5970
5971 9
    if (self::$SUPPORT['mbstring'] === true) {
5972 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5973
    }
5974
5975
    if (
5976
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5977
        &&
5978
        self::$SUPPORT['intl'] === true
5979
        &&
5980
        Bootup::is_php('5.4') === true
5981
    ) {
5982
      return \grapheme_strrpos($haystack, $needle, $offset);
5983
    }
5984
5985
    // fallback via vanilla php
5986
5987
    $haystackTmp = null;
5988
    if ($offset > 0) {
5989
      $haystackTmp = self::substr($haystack, $offset);
5990
    } elseif ($offset < 0) {
5991
      $haystackTmp = self::substr($haystack, 0, $offset);
5992
      $offset = 0;
5993
    }
5994
5995
    if ($haystackTmp !== null) {
5996
      if ($haystackTmp === false) {
5997
        $haystackTmp = '';
5998
      }
5999
      $haystack = (string)$haystackTmp;
6000
    }
6001
6002
    $pos = strrpos($haystack, $needle);
6003
    if ($pos === false) {
6004
      return false;
6005
    }
6006
6007
    return $offset + self::strlen(substr($haystack, 0, $pos));
6008
  }
6009
6010
  /**
6011
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
6012
   * mask.
6013
   *
6014
   * @param string $str    <p>The input string.</p>
6015
   * @param string $mask   <p>The mask of chars</p>
6016
   * @param int    $offset [optional]
6017
   * @param int    $length [optional]
6018
   *
6019
   * @return int
6020
   */
6021 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
6022
  {
6023 10 View Code Duplication
    if ($offset || $length !== null) {
6024 2
      $strTmp = self::substr($str, $offset, $length);
6025 2
      if ($strTmp === false) {
6026
        $strTmp = '';
6027
      }
6028 2
      $str = (string)$strTmp;
6029 2
    }
6030
6031 10
    $str = (string)$str;
6032 10
    if (!isset($str[0], $mask[0])) {
6033 2
      return 0;
6034
    }
6035
6036 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6037
  }
6038
6039
  /**
6040
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6041
   *
6042
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6043
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6044
   * @param bool    $before_needle [optional] <p>
6045
   *                               If <b>TRUE</b>, strstr() returns the part of the
6046
   *                               haystack before the first occurrence of the needle (excluding the needle).
6047
   *                               </p>
6048
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6049
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6050
   *
6051
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6052
   */
6053 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6054
  {
6055 2
    $haystack = (string)$haystack;
6056 2
    $needle = (string)$needle;
6057
6058 2
    if (!isset($haystack[0], $needle[0])) {
6059 1
      return false;
6060
    }
6061
6062 2
    if ($cleanUtf8 === true) {
6063
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6064
      // if invalid characters are found in $haystack before $needle
6065
      $needle = self::clean($needle);
6066
      $haystack = self::clean($haystack);
6067
    }
6068
6069 2
    if ($encoding !== 'UTF-8') {
6070 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6071 1
    }
6072
6073 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6074
      self::checkForSupport();
6075
    }
6076
6077 View Code Duplication
    if (
6078
        $encoding !== 'UTF-8'
6079 2
        &&
6080 1
        self::$SUPPORT['mbstring'] === false
6081 2
    ) {
6082
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6083
    }
6084
6085 2
    if (self::$SUPPORT['mbstring'] === true) {
6086 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6087
    }
6088
6089
    if (
6090
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6091
        &&
6092
        self::$SUPPORT['intl'] === true
6093
        &&
6094
        Bootup::is_php('5.4') === true
6095
    ) {
6096
      return \grapheme_strstr($haystack, $needle, $before_needle);
6097
    }
6098
6099
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6100
6101
    if (!isset($match[1])) {
6102
      return false;
6103
    }
6104
6105
    if ($before_needle) {
6106
      return $match[1];
6107
    }
6108
6109
    return self::substr($haystack, self::strlen($match[1]));
6110
  }
6111
6112
  /**
6113
   * Unicode transformation for case-less matching.
6114
   *
6115
   * @link http://unicode.org/reports/tr21/tr21-5.html
6116
   *
6117
   * @param string  $str       <p>The input string.</p>
6118
   * @param bool    $full      [optional] <p>
6119
   *                           <b>true</b>, replace full case folding chars (default)<br>
6120
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6121
   *                           </p>
6122
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6123
   *
6124
   * @return string
6125
   */
6126 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6127
  {
6128
    // init
6129 13
    $str = (string)$str;
6130
6131 13
    if (!isset($str[0])) {
6132 4
      return '';
6133
    }
6134
6135 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6136 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6137
6138 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6139 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6140 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6141 1
    }
6142
6143 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6144
6145 12
    if ($full) {
6146
6147 12
      static $FULL_CASE_FOLD = null;
6148
6149 12
      if ($FULL_CASE_FOLD === null) {
6150 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6151 1
      }
6152
6153
      /** @noinspection OffsetOperationsInspection */
6154 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6155 12
    }
6156
6157 12
    if ($cleanUtf8 === true) {
6158 1
      $str = self::clean($str);
6159 1
    }
6160
6161 12
    return self::strtolower($str);
6162
  }
6163
6164
  /**
6165
   * Make a string lowercase.
6166
   *
6167
   * @link http://php.net/manual/en/function.mb-strtolower.php
6168
   *
6169
   * @param string      $str       <p>The string being lowercased.</p>
6170
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6171
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6172
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6173
   *
6174
   * @return string str with all alphabetic characters converted to lowercase.
6175
   */
6176 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6177
  {
6178
    // init
6179 25
    $str = (string)$str;
6180
6181 25
    if (!isset($str[0])) {
6182 3
      return '';
6183
    }
6184
6185 23
    if ($cleanUtf8 === true) {
6186
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6187
      // if invalid characters are found in $haystack before $needle
6188 1
      $str = self::clean($str);
6189 1
    }
6190
6191 23
    if ($encoding !== 'UTF-8') {
6192 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6193 2
    }
6194
6195 23
    if ($lang !== null) {
6196
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6197
        self::checkForSupport();
6198
      }
6199
6200
      if (
6201
          self::$SUPPORT['intl'] === true
6202
          &&
6203
          Bootup::is_php('5.4') === true
6204
      ) {
6205
6206
        $langCode = $lang . '-Lower';
6207
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6208
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6209
6210
          $langCode = 'Any-Lower';
6211
        }
6212
6213
        return transliterator_transliterate($langCode, $str);
6214
      }
6215
6216
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6217
    }
6218
6219 23
    return \mb_strtolower($str, $encoding);
6220
  }
6221
6222
  /**
6223
   * Generic case sensitive transformation for collation matching.
6224
   *
6225
   * @param string $str <p>The input string</p>
6226
   *
6227
   * @return string
6228
   */
6229 3
  private static function strtonatfold($str)
6230
  {
6231
    /** @noinspection PhpUndefinedClassInspection */
6232 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6233
  }
6234
6235
  /**
6236
   * Make a string uppercase.
6237
   *
6238
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6239
   *
6240
   * @param string      $str       <p>The string being uppercased.</p>
6241
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6242
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6243
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6244
   *
6245
   * @return string str with all alphabetic characters converted to uppercase.
6246
   */
6247 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6248
  {
6249 19
    $str = (string)$str;
6250
6251 19
    if (!isset($str[0])) {
6252 3
      return '';
6253
    }
6254
6255 17
    if ($cleanUtf8 === true) {
6256
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6257
      // if invalid characters are found in $haystack before $needle
6258 2
      $str = self::clean($str);
6259 2
    }
6260
6261 17
    if ($encoding !== 'UTF-8') {
6262 3
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6263 3
    }
6264
6265 17
    if ($lang !== null) {
6266
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6267
        self::checkForSupport();
6268
      }
6269
6270
      if (
6271
          self::$SUPPORT['intl'] === true
6272
          &&
6273
          Bootup::is_php('5.4') === true
6274
      ) {
6275
6276
        $langCode = $lang . '-Upper';
6277
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6278
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6279
6280
          $langCode = 'Any-Upper';
6281
        }
6282
6283
        return transliterator_transliterate($langCode, $str);
6284
      }
6285
6286
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6287
    }
6288
6289 17
    return \mb_strtoupper($str, $encoding);
6290
  }
6291
6292
  /**
6293
   * Translate characters or replace sub-strings.
6294
   *
6295
   * @link  http://php.net/manual/en/function.strtr.php
6296
   *
6297
   * @param string          $str  <p>The string being translated.</p>
6298
   * @param string|string[] $from <p>The string replacing from.</p>
6299
   * @param string|string[] $to   <p>The string being translated to to.</p>
6300
   *
6301
   * @return string <p>
6302
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6303
   *                corresponding character in to.
6304
   *                </p>
6305
   */
6306 1
  public static function strtr($str, $from, $to = INF)
6307
  {
6308 1
    $str = (string)$str;
6309
6310 1
    if (!isset($str[0])) {
6311
      return '';
6312
    }
6313
6314 1
    if ($from === $to) {
6315
      return $str;
6316
    }
6317
6318 1
    if (INF !== $to) {
6319 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6319 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6320 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6320 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6321 1
      $countFrom = count($from);
6322 1
      $countTo = count($to);
6323
6324 1
      if ($countFrom > $countTo) {
6325 1
        $from = array_slice($from, 0, $countTo);
6326 1
      } elseif ($countFrom < $countTo) {
6327 1
        $to = array_slice($to, 0, $countFrom);
6328 1
      }
6329
6330 1
      $from = array_combine($from, $to);
6331 1
    }
6332
6333 1
    if (is_string($from)) {
6334 1
      return str_replace($from, '', $str);
6335
    }
6336
6337 1
    return strtr($str, $from);
6338
  }
6339
6340
  /**
6341
   * Return the width of a string.
6342
   *
6343
   * @param string  $str       <p>The input string.</p>
6344
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6345
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6346
   *
6347
   * @return int
6348
   */
6349 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6350
  {
6351 1
    if ($encoding !== 'UTF-8') {
6352 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6353 1
    }
6354
6355 1
    if ($cleanUtf8 === true) {
6356
      // iconv and mbstring are not tolerant to invalid encoding
6357
      // further, their behaviour is inconsistent with that of PHP's substr
6358 1
      $str = self::clean($str);
6359 1
    }
6360
6361
    // fallback to "mb_"-function via polyfill
6362 1
    return \mb_strwidth($str, $encoding);
6363
  }
6364
6365
  /**
6366
   * Changes all keys in an array.
6367
   *
6368
   * @param array $array <p>The array to work on</p>
6369
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6370
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6371
   *
6372
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6373
   *                     input is not an array.</p>
6374
   */
6375 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
6376
  {
6377 1
    if (!is_array($array)) {
6378
      return false;
6379
    }
6380
6381
    if (
6382
        $case !== CASE_LOWER
6383 1
        &&
6384
        $case !== CASE_UPPER
6385 1
    ) {
6386
      $case = CASE_UPPER;
6387
    }
6388
6389 1
    $return = array();
6390 1
    foreach ($array as $key => $value) {
6391 1
      if ($case  === CASE_LOWER) {
6392 1
        $key = self::strtolower($key);
6393 1
      } else {
6394 1
        $key = self::strtoupper($key);
6395
      }
6396
6397 1
      $return[$key] = $value;
6398 1
    }
6399
6400 1
    return $return;
6401
  }
6402
6403
  /**
6404
   * Get part of a string.
6405
   *
6406
   * @link http://php.net/manual/en/function.mb-substr.php
6407
   *
6408
   * @param string  $str       <p>The string being checked.</p>
6409
   * @param int     $offset    <p>The first position used in str.</p>
6410
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6411
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6412
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6413
   *
6414
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6415
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6416
   *                      characters long, <b>FALSE</b> will be returned.</p>
6417
   */
6418 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6419
  {
6420
    // init
6421 76
    $str = (string)$str;
6422
6423 76
    if (!isset($str[0])) {
6424 10
      return '';
6425
    }
6426
6427
    // Empty string
6428 74
    if ($length === 0) {
6429 3
      return '';
6430
    }
6431
6432 73
    if ($cleanUtf8 === true) {
6433
      // iconv and mbstring are not tolerant to invalid encoding
6434
      // further, their behaviour is inconsistent with that of PHP's substr
6435 1
      $str = self::clean($str);
6436 1
    }
6437
6438
    // Whole string
6439 73
    if (!$offset && $length === null) {
6440 2
      return $str;
6441
    }
6442
6443 71
    $str_length = 0;
6444 71
    if ($offset || $length === null) {
6445 45
      $str_length = (int)self::strlen($str, $encoding);
6446 45
    }
6447
6448
    // Impossible
6449 71
    if ($offset && $offset > $str_length) {
6450 2
      return false;
6451
    }
6452
6453 69
    if ($length === null) {
6454 30
      $length = $str_length;
6455 30
    } else {
6456 60
      $length = (int)$length;
6457
    }
6458
6459 View Code Duplication
    if (
6460
        $encoding === 'UTF-8'
6461 69
        ||
6462 25
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6463 69
    ) {
6464 47
      $encoding = 'UTF-8';
6465 47
    } else {
6466 24
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6467
    }
6468
6469 69
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6470
      self::checkForSupport();
6471
    }
6472
6473
    if (
6474
        $encoding === 'CP850'
6475 69
        &&
6476 22
        self::$SUPPORT['mbstring_func_overload'] === false
6477 69
    ) {
6478 22
      return substr($str, $offset, $length === null ? $str_length : $length);
6479
    }
6480
6481 View Code Duplication
    if (
6482
        $encoding !== 'UTF-8'
6483 47
        &&
6484 1
        self::$SUPPORT['mbstring'] === false
6485 47
    ) {
6486
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6487
    }
6488
6489 47
    if (self::$SUPPORT['mbstring'] === true) {
6490 47
      return \mb_substr($str, $offset, $length, $encoding);
6491
    }
6492
6493
    if (
6494
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6495
        &&
6496
        self::$SUPPORT['intl'] === true
6497
        &&
6498
        Bootup::is_php('5.4') === true
6499
    ) {
6500
      return \grapheme_substr($str, $offset, $length);
6501
    }
6502
6503
    if (
6504
        $length >= 0 // "iconv_substr()" can't handle negative length
6505
        &&
6506
        self::$SUPPORT['iconv'] === true
6507
    ) {
6508
      return \iconv_substr($str, $offset, $length);
6509
    }
6510
6511
    if (self::is_ascii($str)) {
6512
      return ($length === null) ?
6513
          substr($str, $offset) :
6514
          substr($str, $offset, $length);
6515
    }
6516
6517
    // fallback via vanilla php
6518
6519
    // split to array, and remove invalid characters
6520
    $array = self::split($str);
6521
6522
    // extract relevant part, and join to make sting again
6523
    return implode('', array_slice($array, $offset, $length));
6524
  }
6525
6526
  /**
6527
   * Binary safe comparison of two strings from an offset, up to length characters.
6528
   *
6529
   * @param string  $str1               <p>The main string being compared.</p>
6530
   * @param string  $str2               <p>The secondary string being compared.</p>
6531
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6532
   *                                    counting from the end of the string.</p>
6533
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6534
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6535
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6536
   *                                    insensitive.</p>
6537
   *
6538
   * @return int <p>
6539
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6540
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6541
   *             <strong>0</strong> if they are equal.
6542
   *             </p>
6543
   */
6544 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6545
  {
6546
    if (
6547
        $offset !== 0
6548 1
        ||
6549
        $length !== null
6550 1
    ) {
6551 1
      $str1Tmp = self::substr($str1, $offset, $length);
6552 1
      if ($str1Tmp === false) {
6553
        $str1Tmp = '';
6554
      }
6555 1
      $str1 = (string)$str1Tmp;
6556
6557 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6558 1
      if ($str2Tmp === false) {
6559
        $str2Tmp = '';
6560
      }
6561 1
      $str2 = (string)$str2Tmp;
6562 1
    }
6563
6564 1
    if ($case_insensitivity === true) {
6565 1
      return self::strcasecmp($str1, $str2);
6566
    }
6567
6568 1
    return self::strcmp($str1, $str2);
6569
  }
6570
6571
  /**
6572
   * Count the number of substring occurrences.
6573
   *
6574
   * @link  http://php.net/manual/en/function.substr-count.php
6575
   *
6576
   * @param string  $haystack  <p>The string to search in.</p>
6577
   * @param string  $needle    <p>The substring to search for.</p>
6578
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6579
   * @param int     $length    [optional] <p>
6580
   *                           The maximum length after the specified offset to search for the
6581
   *                           substring. It outputs a warning if the offset plus the length is
6582
   *                           greater than the haystack length.
6583
   *                           </p>
6584
   * @param string  $encoding  <p>Set the charset.</p>
6585
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6586
   *
6587
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6588
   */
6589 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6590
  {
6591
    // init
6592 1
    $haystack = (string)$haystack;
6593 1
    $needle = (string)$needle;
6594
6595 1
    if (!isset($haystack[0], $needle[0])) {
6596 1
      return false;
6597
    }
6598
6599 1
    if ($offset || $length !== null) {
6600
6601 1
      if ($length === null) {
6602 1
        $length = (int)self::strlen($haystack);
6603 1
      }
6604
6605 1
      $offset = (int)$offset;
6606 1
      $length = (int)$length;
6607
6608
      if (
6609
          (
6610
              $length !== 0
6611 1
              &&
6612
              $offset !== 0
6613 1
          )
6614 1
          &&
6615 1
          $length + $offset <= 0
6616 1
          &&
6617 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6618 1
      ) {
6619 1
        return false;
6620
      }
6621
6622 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6623 1
      if ($haystackTmp === false) {
6624
        $haystackTmp = '';
6625
      }
6626 1
      $haystack = (string)$haystackTmp;
6627 1
    }
6628
6629 1
    if ($encoding !== 'UTF-8') {
6630 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6631 1
    }
6632
6633 1
    if ($cleanUtf8 === true) {
6634
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6635
      // if invalid characters are found in $haystack before $needle
6636
      $needle = self::clean($needle);
6637
      $haystack = self::clean($haystack);
6638
    }
6639
6640 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6641
      self::checkForSupport();
6642
    }
6643
6644 View Code Duplication
    if (
6645
        $encoding !== 'UTF-8'
6646 1
        &&
6647 1
        self::$SUPPORT['mbstring'] === false
6648 1
    ) {
6649
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6650
    }
6651
6652 1
    if (self::$SUPPORT['mbstring'] === true) {
6653 1
      return \mb_substr_count($haystack, $needle, $encoding);
6654
    }
6655
6656
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6657
6658
    return count($matches);
6659
  }
6660
6661
  /**
6662
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6663
   *
6664
   * @param string $haystack <p>The string to search in.</p>
6665
   * @param string $needle   <p>The substring to search for.</p>
6666
   *
6667
   * @return string <p>Return the sub-string.</p>
6668
   */
6669 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6670
  {
6671
    // init
6672 1
    $haystack = (string)$haystack;
6673 1
    $needle = (string)$needle;
6674
6675 1
    if (!isset($haystack[0])) {
6676 1
      return '';
6677
    }
6678
6679 1
    if (!isset($needle[0])) {
6680 1
      return $haystack;
6681
    }
6682
6683 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6684 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6685 1
      if ($haystackTmp === false) {
6686
        $haystackTmp = '';
6687
      }
6688 1
      $haystack = (string)$haystackTmp;
6689 1
    }
6690
6691 1
    return $haystack;
6692
  }
6693
6694
  /**
6695
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6696
   *
6697
   * @param string $haystack <p>The string to search in.</p>
6698
   * @param string $needle   <p>The substring to search for.</p>
6699
   *
6700
   * @return string <p>Return the sub-string.</p>
6701
   */
6702 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6703
  {
6704
    // init
6705 1
    $haystack = (string)$haystack;
6706 1
    $needle = (string)$needle;
6707
6708 1
    if (!isset($haystack[0])) {
6709 1
      return '';
6710
    }
6711
6712 1
    if (!isset($needle[0])) {
6713 1
      return $haystack;
6714
    }
6715
6716 1
    if (self::str_iends_with($haystack, $needle) === true) {
6717 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6718 1
      if ($haystackTmp === false) {
6719
        $haystackTmp = '';
6720
      }
6721 1
      $haystack = (string)$haystackTmp;
6722 1
    }
6723
6724 1
    return $haystack;
6725
  }
6726
6727
  /**
6728
   * Removes an prefix ($needle) from start of the string ($haystack).
6729
   *
6730
   * @param string $haystack <p>The string to search in.</p>
6731
   * @param string $needle   <p>The substring to search for.</p>
6732
   *
6733
   * @return string <p>Return the sub-string.</p>
6734
   */
6735 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6736
  {
6737
    // init
6738 1
    $haystack = (string)$haystack;
6739 1
    $needle = (string)$needle;
6740
6741 1
    if (!isset($haystack[0])) {
6742 1
      return '';
6743
    }
6744
6745 1
    if (!isset($needle[0])) {
6746 1
      return $haystack;
6747
    }
6748
6749 1
    if (self::str_starts_with($haystack, $needle) === true) {
6750 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6751 1
      if ($haystackTmp === false) {
6752
        $haystackTmp = '';
6753
      }
6754 1
      $haystack = (string)$haystackTmp;
6755 1
    }
6756
6757 1
    return $haystack;
6758
  }
6759
6760
  /**
6761
   * Replace text within a portion of a string.
6762
   *
6763
   * source: https://gist.github.com/stemar/8287074
6764
   *
6765
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6766
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6767
   * @param int|int[]       $offset           <p>
6768
   *                                          If start is positive, the replacing will begin at the start'th offset
6769
   *                                          into string.
6770
   *                                          <br><br>
6771
   *                                          If start is negative, the replacing will begin at the start'th character
6772
   *                                          from the end of string.
6773
   *                                          </p>
6774
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6775
   *                                          portion of string which is to be replaced. If it is negative, it
6776
   *                                          represents the number of characters from the end of string at which to
6777
   *                                          stop replacing. If it is not given, then it will default to strlen(
6778
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6779
   *                                          length is zero then this function will have the effect of inserting
6780
   *                                          replacement into string at the given start offset.</p>
6781
   *
6782
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6783
   */
6784 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6785
  {
6786 7
    if (is_array($str) === true) {
6787 1
      $num = count($str);
6788
6789
      // the replacement
6790 1
      if (is_array($replacement) === true) {
6791 1
        $replacement = array_slice($replacement, 0, $num);
6792 1
      } else {
6793 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6794
      }
6795
6796
      // the offset
6797 1 View Code Duplication
      if (is_array($offset) === true) {
6798 1
        $offset = array_slice($offset, 0, $num);
6799 1
        foreach ($offset as &$valueTmp) {
6800 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6801 1
        }
6802 1
        unset($valueTmp);
6803 1
      } else {
6804 1
        $offset = array_pad(array($offset), $num, $offset);
6805
      }
6806
6807
      // the length
6808 1
      if (!isset($length)) {
6809 1
        $length = array_fill(0, $num, 0);
6810 1 View Code Duplication
      } elseif (is_array($length) === true) {
6811 1
        $length = array_slice($length, 0, $num);
6812 1
        foreach ($length as &$valueTmpV2) {
6813 1
          if (isset($valueTmpV2)) {
6814 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6815 1
          } else {
6816
            $valueTmpV2 = 0;
6817
          }
6818 1
        }
6819 1
        unset($valueTmpV2);
6820 1
      } else {
6821 1
        $length = array_pad(array($length), $num, $length);
6822
      }
6823
6824
      // recursive call
6825 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6826
    }
6827
6828 7
    if (is_array($replacement) === true) {
6829 1
      if (count($replacement) > 0) {
6830 1
        $replacement = $replacement[0];
6831 1
      } else {
6832 1
        $replacement = '';
6833
      }
6834 1
    }
6835
6836
    // init
6837 7
    $str = (string)$str;
6838 7
    $replacement = (string)$replacement;
6839
6840 7
    if (!isset($str[0])) {
6841 1
      return $replacement;
6842
    }
6843
6844 6
    if (self::is_ascii($str)) {
6845 3
      return ($length === null) ?
6846 3
          substr_replace($str, $replacement, $offset) :
6847 3
          substr_replace($str, $replacement, $offset, $length);
6848
    }
6849
6850 5
    preg_match_all('/./us', $str, $smatches);
6851 5
    preg_match_all('/./us', $replacement, $rmatches);
6852
6853 5
    if ($length === null) {
6854 3
      $length = (int)self::strlen($str);
6855 3
    }
6856
6857 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6858
6859 5
    return implode('', $smatches[0]);
6860
  }
6861
6862
  /**
6863
   * Removes an suffix ($needle) from end of the string ($haystack).
6864
   *
6865
   * @param string $haystack <p>The string to search in.</p>
6866
   * @param string $needle   <p>The substring to search for.</p>
6867
   *
6868
   * @return string <p>Return the sub-string.</p>
6869
   */
6870 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6871
  {
6872 1
    $haystack = (string)$haystack;
6873 1
    $needle = (string)$needle;
6874
6875 1
    if (!isset($haystack[0])) {
6876 1
      return '';
6877
    }
6878
6879 1
    if (!isset($needle[0])) {
6880 1
      return $haystack;
6881
    }
6882
6883 1
    if (self::str_ends_with($haystack, $needle) === true) {
6884 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6885 1
      if ($haystackTmp === false) {
6886
        $haystackTmp = '';
6887
      }
6888 1
      $haystack = (string)$haystackTmp;
6889 1
    }
6890
6891 1
    return $haystack;
6892
  }
6893
6894
  /**
6895
   * Returns a case swapped version of the string.
6896
   *
6897
   * @param string  $str       <p>The input string.</p>
6898
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6899
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6900
   *
6901
   * @return string <p>Each character's case swapped.</p>
6902
   */
6903 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6904
  {
6905 1
    $str = (string)$str;
6906
6907 1
    if (!isset($str[0])) {
6908 1
      return '';
6909
    }
6910
6911 1
    if ($encoding !== 'UTF-8') {
6912 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6913 1
    }
6914
6915 1
    if ($cleanUtf8 === true) {
6916
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6917
      // if invalid characters are found in $haystack before $needle
6918 1
      $str = self::clean($str);
6919 1
    }
6920
6921 1
    $strSwappedCase = preg_replace_callback(
6922 1
        '/[\S]/u',
6923
        function ($match) use ($encoding) {
6924 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6925
6926 1
          if ($match[0] === $marchToUpper) {
6927 1
            return UTF8::strtolower($match[0], $encoding);
6928
          }
6929
6930 1
          return $marchToUpper;
6931 1
        },
6932
        $str
6933 1
    );
6934
6935 1
    return $strSwappedCase;
6936
  }
6937
6938
  /**
6939
   * alias for "UTF8::to_ascii()"
6940
   *
6941
   * @see UTF8::to_ascii()
6942
   *
6943
   * @param string $s
6944
   * @param string $subst_chr
6945
   * @param bool   $strict
6946
   *
6947
   * @return string
6948
   *
6949
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6950
   */
6951
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6952
  {
6953
    return self::to_ascii($s, $subst_chr, $strict);
6954
  }
6955
6956
  /**
6957
   * alias for "UTF8::to_iso8859()"
6958
   *
6959
   * @see UTF8::to_iso8859()
6960
   *
6961
   * @param string $str
6962
   *
6963
   * @return string|string[]
6964
   *
6965
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6966
   */
6967
  public static function toIso8859($str)
6968
  {
6969
    return self::to_iso8859($str);
6970
  }
6971
6972
  /**
6973
   * alias for "UTF8::to_latin1()"
6974
   *
6975
   * @see UTF8::to_latin1()
6976
   *
6977
   * @param $str
6978
   *
6979
   * @return string
6980
   *
6981
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6982
   */
6983
  public static function toLatin1($str)
6984
  {
6985
    return self::to_latin1($str);
6986
  }
6987
6988
  /**
6989
   * alias for "UTF8::to_utf8()"
6990
   *
6991
   * @see UTF8::to_utf8()
6992
   *
6993
   * @param string $str
6994
   *
6995
   * @return string
6996
   *
6997
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6998
   */
6999
  public static function toUTF8($str)
7000
  {
7001
    return self::to_utf8($str);
7002
  }
7003
7004
  /**
7005
   * Convert a string into ASCII.
7006
   *
7007
   * @param string $str     <p>The input string.</p>
7008
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
7009
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
7010
   *                        performance</p>
7011
   *
7012
   * @return string
7013
   */
7014 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
7015
  {
7016 21
    static $UTF8_TO_ASCII;
7017
7018
    // init
7019 21
    $str = (string)$str;
7020
7021 21
    if (!isset($str[0])) {
7022 4
      return '';
7023
    }
7024
7025
    // check if we only have ASCII, first (better performance)
7026 18
    if (self::is_ascii($str) === true) {
7027 6
      return $str;
7028
    }
7029
7030 13
    $str = self::clean($str, true, true, true);
7031
7032
    // check again, if we only have ASCII, now ...
7033 13
    if (self::is_ascii($str) === true) {
7034 7
      return $str;
7035
    }
7036
7037 7
    if ($strict === true) {
7038
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7039
        self::checkForSupport();
7040
      }
7041
7042
      if (
7043
          self::$SUPPORT['intl'] === true
7044
          &&
7045
          Bootup::is_php('5.4') === true
7046
      ) {
7047
7048
        // HACK for issue from "transliterator_transliterate()"
7049
        $str = str_replace(
7050
            'ℌ',
7051
            'H',
7052
            $str
7053
        );
7054
7055
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7056
7057
        // check again, if we only have ASCII, now ...
7058
        if (self::is_ascii($str) === true) {
7059
          return $str;
7060
        }
7061
7062
      }
7063
    }
7064
7065 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7066 7
    $chars = $ar[0];
7067 7
    foreach ($chars as &$c) {
7068
7069 7
      $ordC0 = ord($c[0]);
7070
7071 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7072 7
        continue;
7073
      }
7074
7075 7
      $ordC1 = ord($c[1]);
7076
7077
      // ASCII - next please
7078 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7079 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7080 7
      }
7081
7082 7
      if ($ordC0 >= 224) {
7083 2
        $ordC2 = ord($c[2]);
7084
7085 2
        if ($ordC0 <= 239) {
7086 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7087 2
        }
7088
7089 2
        if ($ordC0 >= 240) {
7090 1
          $ordC3 = ord($c[3]);
7091
7092 1
          if ($ordC0 <= 247) {
7093 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7094 1
          }
7095
7096 1
          if ($ordC0 >= 248) {
7097
            $ordC4 = ord($c[4]);
7098
7099 View Code Duplication
            if ($ordC0 <= 251) {
7100
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7101
            }
7102
7103
            if ($ordC0 >= 252) {
7104
              $ordC5 = ord($c[5]);
7105
7106 View Code Duplication
              if ($ordC0 <= 253) {
7107
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7108
              }
7109
            }
7110
          }
7111 1
        }
7112 2
      }
7113
7114 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7115
        $c = $unknown;
7116
        continue;
7117
      }
7118
7119 7
      if (!isset($ord)) {
7120
        $c = $unknown;
7121
        continue;
7122
      }
7123
7124 7
      $bank = $ord >> 8;
7125 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7126 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7127 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7128 1
          $UTF8_TO_ASCII[$bank] = array();
7129 1
        }
7130 3
      }
7131
7132 7
      $newchar = $ord & 255;
7133
7134 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7135
7136
        // keep for debugging
7137
        /*
7138
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7139
        echo "char: " . $c . "\n";
7140
        echo "ord: " . $ord . "\n";
7141
        echo "newchar: " . $newchar . "\n";
7142
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7143
        echo "bank:" . $bank . "\n\n";
7144
        */
7145
7146 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7147 7
      } else {
7148
7149
        // keep for debugging missing chars
7150
        /*
7151
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7152
        echo "char: " . $c . "\n";
7153
        echo "ord: " . $ord . "\n";
7154
        echo "newchar: " . $newchar . "\n";
7155
        echo "bank:" . $bank . "\n\n";
7156
        */
7157
7158 1
        $c = $unknown;
7159
      }
7160 7
    }
7161
7162 7
    return implode('', $chars);
7163
  }
7164
7165
  /**
7166
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7167
   *
7168
   * @param string|string[] $str
7169
   *
7170
   * @return string|string[]
7171
   */
7172 3
  public static function to_iso8859($str)
7173
  {
7174 3
    if (is_array($str) === true) {
7175
7176
      /** @noinspection ForeachSourceInspection */
7177 1
      foreach ($str as $k => $v) {
7178
        /** @noinspection AlterInForeachInspection */
7179
        /** @noinspection OffsetOperationsInspection */
7180 1
        $str[$k] = self::to_iso8859($v);
7181 1
      }
7182
7183 1
      return $str;
7184
    }
7185
7186 3
    $str = (string)$str;
7187
7188 3
    if (!isset($str[0])) {
7189 1
      return '';
7190
    }
7191
7192 3
    return self::utf8_decode($str);
7193
  }
7194
7195
  /**
7196
   * alias for "UTF8::to_iso8859()"
7197
   *
7198
   * @see UTF8::to_iso8859()
7199
   *
7200
   * @param string|string[] $str
7201
   *
7202
   * @return string|string[]
7203
   */
7204 1
  public static function to_latin1($str)
7205
  {
7206 1
    return self::to_iso8859($str);
7207
  }
7208
7209
  /**
7210
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7211
   *
7212
   * <ul>
7213
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7214
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7215
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7216
   * case.</li>
7217
   * </ul>
7218
   *
7219
   * @param string|string[] $str                    <p>Any string or array.</p>
7220
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7221
   *
7222
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7223
   */
7224 22
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7225
  {
7226 22
    if (is_array($str) === true) {
7227
      /** @noinspection ForeachSourceInspection */
7228 2
      foreach ($str as $k => $v) {
7229
        /** @noinspection AlterInForeachInspection */
7230
        /** @noinspection OffsetOperationsInspection */
7231 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7232 2
      }
7233
7234 2
      return $str;
7235
    }
7236
7237 22
    $str = (string)$str;
7238
7239 22
    if (!isset($str[0])) {
7240 3
      return $str;
7241
    }
7242
7243 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7244
      self::checkForSupport();
7245
    }
7246
7247 22 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7248
      $max = \mb_strlen($str, '8BIT');
7249
    } else {
7250 22
      $max = strlen($str);
7251
    }
7252
7253 22
    $buf = '';
7254
7255
    /** @noinspection ForeachInvariantsInspection */
7256 22
    for ($i = 0; $i < $max; $i++) {
7257 22
      $c1 = $str[$i];
7258
7259 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7260
7261 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7262
7263 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7264
7265 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7266 18
            $buf .= $c1 . $c2;
7267 18
            $i++;
7268 18
          } else { // not valid UTF8 - convert it
7269 8
            $buf .= self::to_utf8_convert($c1);
7270
          }
7271
7272 22
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7273
7274 21
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7275 21
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7276
7277 21
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7278 15
            $buf .= $c1 . $c2 . $c3;
7279 15
            $i += 2;
7280 15
          } else { // not valid UTF8 - convert it
7281 11
            $buf .= self::to_utf8_convert($c1);
7282
          }
7283
7284 22
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7285
7286 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7287 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7288 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7289
7290 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7291 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7292 5
            $i += 3;
7293 5
          } else { // not valid UTF8 - convert it
7294 9
            $buf .= self::to_utf8_convert($c1);
7295
          }
7296
7297 12
        } else { // doesn't look like UTF8, but should be converted
7298 9
          $buf .= self::to_utf8_convert($c1);
7299
        }
7300
7301 22
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7302
7303 5
        $buf .= self::to_utf8_convert($c1);
7304
7305 5
      } else { // it doesn't need conversion
7306 20
        $buf .= $c1;
7307
      }
7308 22
    }
7309
7310
    // decode unicode escape sequences
7311 22
    $buf = preg_replace_callback(
7312 22
        '/\\\\u([0-9a-f]{4})/i',
7313 22
        function ($match) {
7314 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7315 22
        },
7316
        $buf
7317 22
    );
7318
7319
    // decode UTF-8 codepoints
7320 22
    if ($decodeHtmlEntityToUtf8 === true) {
7321 1
      $buf = self::html_entity_decode($buf);
7322 1
    }
7323
7324 22
    return $buf;
7325
  }
7326
7327
  /**
7328
   * @param int $int
7329
   *
7330
   * @return string
7331
   */
7332 16
  private static function to_utf8_convert($int)
7333
  {
7334 16
    $buf = '';
7335
7336 16
    $ordC1 = ord($int);
7337 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7338 2
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7339 2
    } else {
7340 16
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7341 16
      $cc2 = ($int & "\x3F") | "\x80";
7342 16
      $buf .= $cc1 . $cc2;
7343
    }
7344
7345 16
    return $buf;
7346
  }
7347
7348
  /**
7349
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7350
   *
7351
   * INFO: This is slower then "trim()"
7352
   *
7353
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7354
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7355
   *
7356
   * @param string $str   <p>The string to be trimmed</p>
7357
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7358
   *
7359
   * @return string <p>The trimmed string.</p>
7360
   */
7361 26
  public static function trim($str = '', $chars = INF)
7362
  {
7363 26
    $str = (string)$str;
7364
7365 26
    if (!isset($str[0])) {
7366 5
      return '';
7367
    }
7368
7369
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7370 22
    if ($chars === INF || !$chars) {
7371 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7372
    }
7373
7374 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7375
  }
7376
7377
  /**
7378
   * Makes string's first char uppercase.
7379
   *
7380
   * @param string  $str       <p>The input string.</p>
7381
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7382
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7383
   *
7384
   * @return string <p>The resulting string</p>
7385
   */
7386 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7387
  {
7388 14
    if ($cleanUtf8 === true) {
7389
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7390
      // if invalid characters are found in $haystack before $needle
7391 1
      $str = self::clean($str);
7392 1
    }
7393
7394 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7395 14
    if ($strPartTwo === false) {
7396
      $strPartTwo = '';
7397
    }
7398
7399 14
    $strPartOne = self::strtoupper(
7400 14
        (string)self::substr($str, 0, 1, $encoding),
7401 14
        $encoding,
7402
        $cleanUtf8
7403 14
    );
7404
7405 14
    return $strPartOne . $strPartTwo;
7406
  }
7407
7408
  /**
7409
   * alias for "UTF8::ucfirst()"
7410
   *
7411
   * @see UTF8::ucfirst()
7412
   *
7413
   * @param string  $word
7414
   * @param string  $encoding
7415
   * @param boolean $cleanUtf8
7416
   *
7417
   * @return string
7418
   */
7419 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7420
  {
7421 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7422
  }
7423
7424
  /**
7425
   * Uppercase for all words in the string.
7426
   *
7427
   * @param string   $str        <p>The input string.</p>
7428
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7429
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7430
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7431
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7432
   *
7433
   * @return string
7434
   */
7435 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7436
  {
7437 8
    if (!$str) {
7438 2
      return '';
7439
    }
7440
7441
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7442
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7443
7444 7
    if ($cleanUtf8 === true) {
7445
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7446
      // if invalid characters are found in $haystack before $needle
7447 1
      $str = self::clean($str);
7448 1
    }
7449
7450 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7451
7452
    if (
7453
        $usePhpDefaultFunctions === true
7454 7
        &&
7455 7
        self::is_ascii($str) === true
7456 7
    ) {
7457
      return ucwords($str);
7458
    }
7459
7460 7
    $words = self::str_to_words($str, $charlist);
7461 7
    $newWords = array();
7462
7463 7
    if (count($exceptions) > 0) {
7464 1
      $useExceptions = true;
7465 1
    } else {
7466 7
      $useExceptions = false;
7467
    }
7468
7469 7 View Code Duplication
    foreach ($words as $word) {
7470
7471 7
      if (!$word) {
7472 7
        continue;
7473
      }
7474
7475
      if (
7476
          $useExceptions === false
7477 7
          ||
7478
          (
7479
              $useExceptions === true
7480 1
              &&
7481 1
              !in_array($word, $exceptions, true)
7482 1
          )
7483 7
      ) {
7484 7
        $word = self::ucfirst($word, $encoding);
7485 7
      }
7486
7487 7
      $newWords[] = $word;
7488 7
    }
7489
7490 7
    return implode('', $newWords);
7491
  }
7492
7493
  /**
7494
   * Multi decode html entity & fix urlencoded-win1252-chars.
7495
   *
7496
   * e.g:
7497
   * 'test+test'                     => 'test test'
7498
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7499
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7500
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7501
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7502
   * 'Düsseldorf'                   => 'Düsseldorf'
7503
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7504
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7505
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7506
   *
7507
   * @param string $str          <p>The input string.</p>
7508
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7509
   *
7510
   * @return string
7511
   */
7512 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7513
  {
7514 1
    $str = (string)$str;
7515
7516 1
    if (!isset($str[0])) {
7517 1
      return '';
7518
    }
7519
7520 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7521 1
    if (preg_match($pattern, $str)) {
7522 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7523 1
    }
7524
7525 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7526
7527
    do {
7528 1
      $str_compare = $str;
7529
7530 1
      $str = self::fix_simple_utf8(
7531 1
          urldecode(
7532 1
              self::html_entity_decode(
7533 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7534
                  $flags
7535 1
              )
7536 1
          )
7537 1
      );
7538
7539 1
    } while ($multi_decode === true && $str_compare !== $str);
7540
7541 1
    return (string)$str;
7542
  }
7543
7544
  /**
7545
   * Return a array with "urlencoded"-win1252 -> UTF-8
7546
   *
7547
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7548
   *
7549
   * @return array
7550
   */
7551
  public static function urldecode_fix_win1252_chars()
7552
  {
7553
    return array(
7554
        '%20' => ' ',
7555
        '%21' => '!',
7556
        '%22' => '"',
7557
        '%23' => '#',
7558
        '%24' => '$',
7559
        '%25' => '%',
7560
        '%26' => '&',
7561
        '%27' => "'",
7562
        '%28' => '(',
7563
        '%29' => ')',
7564
        '%2A' => '*',
7565
        '%2B' => '+',
7566
        '%2C' => ',',
7567
        '%2D' => '-',
7568
        '%2E' => '.',
7569
        '%2F' => '/',
7570
        '%30' => '0',
7571
        '%31' => '1',
7572
        '%32' => '2',
7573
        '%33' => '3',
7574
        '%34' => '4',
7575
        '%35' => '5',
7576
        '%36' => '6',
7577
        '%37' => '7',
7578
        '%38' => '8',
7579
        '%39' => '9',
7580
        '%3A' => ':',
7581
        '%3B' => ';',
7582
        '%3C' => '<',
7583
        '%3D' => '=',
7584
        '%3E' => '>',
7585
        '%3F' => '?',
7586
        '%40' => '@',
7587
        '%41' => 'A',
7588
        '%42' => 'B',
7589
        '%43' => 'C',
7590
        '%44' => 'D',
7591
        '%45' => 'E',
7592
        '%46' => 'F',
7593
        '%47' => 'G',
7594
        '%48' => 'H',
7595
        '%49' => 'I',
7596
        '%4A' => 'J',
7597
        '%4B' => 'K',
7598
        '%4C' => 'L',
7599
        '%4D' => 'M',
7600
        '%4E' => 'N',
7601
        '%4F' => 'O',
7602
        '%50' => 'P',
7603
        '%51' => 'Q',
7604
        '%52' => 'R',
7605
        '%53' => 'S',
7606
        '%54' => 'T',
7607
        '%55' => 'U',
7608
        '%56' => 'V',
7609
        '%57' => 'W',
7610
        '%58' => 'X',
7611
        '%59' => 'Y',
7612
        '%5A' => 'Z',
7613
        '%5B' => '[',
7614
        '%5C' => '\\',
7615
        '%5D' => ']',
7616
        '%5E' => '^',
7617
        '%5F' => '_',
7618
        '%60' => '`',
7619
        '%61' => 'a',
7620
        '%62' => 'b',
7621
        '%63' => 'c',
7622
        '%64' => 'd',
7623
        '%65' => 'e',
7624
        '%66' => 'f',
7625
        '%67' => 'g',
7626
        '%68' => 'h',
7627
        '%69' => 'i',
7628
        '%6A' => 'j',
7629
        '%6B' => 'k',
7630
        '%6C' => 'l',
7631
        '%6D' => 'm',
7632
        '%6E' => 'n',
7633
        '%6F' => 'o',
7634
        '%70' => 'p',
7635
        '%71' => 'q',
7636
        '%72' => 'r',
7637
        '%73' => 's',
7638
        '%74' => 't',
7639
        '%75' => 'u',
7640
        '%76' => 'v',
7641
        '%77' => 'w',
7642
        '%78' => 'x',
7643
        '%79' => 'y',
7644
        '%7A' => 'z',
7645
        '%7B' => '{',
7646
        '%7C' => '|',
7647
        '%7D' => '}',
7648
        '%7E' => '~',
7649
        '%7F' => '',
7650
        '%80' => '`',
7651
        '%81' => '',
7652
        '%82' => '‚',
7653
        '%83' => 'ƒ',
7654
        '%84' => '„',
7655
        '%85' => '…',
7656
        '%86' => '†',
7657
        '%87' => '‡',
7658
        '%88' => 'ˆ',
7659
        '%89' => '‰',
7660
        '%8A' => 'Š',
7661
        '%8B' => '‹',
7662
        '%8C' => 'Œ',
7663
        '%8D' => '',
7664
        '%8E' => 'Ž',
7665
        '%8F' => '',
7666
        '%90' => '',
7667
        '%91' => '‘',
7668
        '%92' => '’',
7669
        '%93' => '“',
7670
        '%94' => '”',
7671
        '%95' => '•',
7672
        '%96' => '–',
7673
        '%97' => '—',
7674
        '%98' => '˜',
7675
        '%99' => '™',
7676
        '%9A' => 'š',
7677
        '%9B' => '›',
7678
        '%9C' => 'œ',
7679
        '%9D' => '',
7680
        '%9E' => 'ž',
7681
        '%9F' => 'Ÿ',
7682
        '%A0' => '',
7683
        '%A1' => '¡',
7684
        '%A2' => '¢',
7685
        '%A3' => '£',
7686
        '%A4' => '¤',
7687
        '%A5' => '¥',
7688
        '%A6' => '¦',
7689
        '%A7' => '§',
7690
        '%A8' => '¨',
7691
        '%A9' => '©',
7692
        '%AA' => 'ª',
7693
        '%AB' => '«',
7694
        '%AC' => '¬',
7695
        '%AD' => '',
7696
        '%AE' => '®',
7697
        '%AF' => '¯',
7698
        '%B0' => '°',
7699
        '%B1' => '±',
7700
        '%B2' => '²',
7701
        '%B3' => '³',
7702
        '%B4' => '´',
7703
        '%B5' => 'µ',
7704
        '%B6' => '¶',
7705
        '%B7' => '·',
7706
        '%B8' => '¸',
7707
        '%B9' => '¹',
7708
        '%BA' => 'º',
7709
        '%BB' => '»',
7710
        '%BC' => '¼',
7711
        '%BD' => '½',
7712
        '%BE' => '¾',
7713
        '%BF' => '¿',
7714
        '%C0' => 'À',
7715
        '%C1' => 'Á',
7716
        '%C2' => 'Â',
7717
        '%C3' => 'Ã',
7718
        '%C4' => 'Ä',
7719
        '%C5' => 'Å',
7720
        '%C6' => 'Æ',
7721
        '%C7' => 'Ç',
7722
        '%C8' => 'È',
7723
        '%C9' => 'É',
7724
        '%CA' => 'Ê',
7725
        '%CB' => 'Ë',
7726
        '%CC' => 'Ì',
7727
        '%CD' => 'Í',
7728
        '%CE' => 'Î',
7729
        '%CF' => 'Ï',
7730
        '%D0' => 'Ð',
7731
        '%D1' => 'Ñ',
7732
        '%D2' => 'Ò',
7733
        '%D3' => 'Ó',
7734
        '%D4' => 'Ô',
7735
        '%D5' => 'Õ',
7736
        '%D6' => 'Ö',
7737
        '%D7' => '×',
7738
        '%D8' => 'Ø',
7739
        '%D9' => 'Ù',
7740
        '%DA' => 'Ú',
7741
        '%DB' => 'Û',
7742
        '%DC' => 'Ü',
7743
        '%DD' => 'Ý',
7744
        '%DE' => 'Þ',
7745
        '%DF' => 'ß',
7746
        '%E0' => 'à',
7747
        '%E1' => 'á',
7748
        '%E2' => 'â',
7749
        '%E3' => 'ã',
7750
        '%E4' => 'ä',
7751
        '%E5' => 'å',
7752
        '%E6' => 'æ',
7753
        '%E7' => 'ç',
7754
        '%E8' => 'è',
7755
        '%E9' => 'é',
7756
        '%EA' => 'ê',
7757
        '%EB' => 'ë',
7758
        '%EC' => 'ì',
7759
        '%ED' => 'í',
7760
        '%EE' => 'î',
7761
        '%EF' => 'ï',
7762
        '%F0' => 'ð',
7763
        '%F1' => 'ñ',
7764
        '%F2' => 'ò',
7765
        '%F3' => 'ó',
7766
        '%F4' => 'ô',
7767
        '%F5' => 'õ',
7768
        '%F6' => 'ö',
7769
        '%F7' => '÷',
7770
        '%F8' => 'ø',
7771
        '%F9' => 'ù',
7772
        '%FA' => 'ú',
7773
        '%FB' => 'û',
7774
        '%FC' => 'ü',
7775
        '%FD' => 'ý',
7776
        '%FE' => 'þ',
7777
        '%FF' => 'ÿ',
7778
    );
7779
  }
7780
7781
  /**
7782
   * Decodes an UTF-8 string to ISO-8859-1.
7783
   *
7784
   * @param string $str <p>The input string.</p>
7785
   *
7786
   * @return string
7787
   */
7788 6
  public static function utf8_decode($str)
7789
  {
7790
    // init
7791 6
    $str = (string)$str;
7792
7793 6
    if (!isset($str[0])) {
7794 3
      return '';
7795
    }
7796
7797 6
    $str = (string)self::to_utf8($str);
7798
7799 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7800 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7801
7802 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7803 1
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7804 1
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7805 1
    }
7806
7807
    /** @noinspection PhpInternalEntityUsedInspection */
7808 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7809
7810 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7811
      self::checkForSupport();
7812
    }
7813
7814 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7815
      $len = \mb_strlen($str, '8BIT');
7816
    } else {
7817 6
      $len = strlen($str);
7818
    }
7819
7820
    /** @noinspection ForeachInvariantsInspection */
7821 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7822 6
      switch ($str[$i] & "\xF0") {
7823 6
        case "\xC0":
7824 6
        case "\xD0":
7825 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7826 6
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7827 6
          break;
7828
7829
        /** @noinspection PhpMissingBreakStatementInspection */
7830 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7831
          ++$i;
7832 6
        case "\xE0":
7833 4
          $str[$j] = '?';
7834 4
          $i += 2;
7835 4
          break;
7836
7837 6
        default:
7838 6
          $str[$j] = $str[$i];
7839 6
      }
7840 6
    }
7841
7842 6
    return (string)self::substr($str, 0, $j, '8BIT');
7843
  }
7844
7845
  /**
7846
   * Encodes an ISO-8859-1 string to UTF-8.
7847
   *
7848
   * @param string $str <p>The input string.</p>
7849
   *
7850
   * @return string
7851
   */
7852 7
  public static function utf8_encode($str)
7853
  {
7854
    // init
7855 7
    $str = (string)$str;
7856
7857 7
    if (!isset($str[0])) {
7858 7
      return '';
7859
    }
7860
7861 7
    $strTmp = \utf8_encode($str);
7862
7863
    // the polyfill maybe return false
7864 7
    if ($strTmp === false) {
7865
      return '';
7866
    }
7867
7868 7
    $str = (string)$strTmp;
7869 7
    if (false === strpos($str, "\xC2")) {
7870 3
      return $str;
7871
    }
7872
7873 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7874 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7875
7876 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7877 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7878 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7879 1
    }
7880
7881 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7882
  }
7883
7884
  /**
7885
   * fix -> utf8-win1252 chars
7886
   *
7887
   * @param string $str <p>The input string.</p>
7888
   *
7889
   * @return string
7890
   *
7891
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7892
   */
7893
  public static function utf8_fix_win1252_chars($str)
7894
  {
7895
    return self::fix_simple_utf8($str);
7896
  }
7897
7898
  /**
7899
   * Returns an array with all utf8 whitespace characters.
7900
   *
7901
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7902
   *
7903
   * @author: Derek E. [email protected]
7904
   *
7905
   * @return array <p>
7906
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7907
   *               as defined in above URL.
7908
   *               </p>
7909
   */
7910 1
  public static function whitespace_table()
7911
  {
7912 1
    return self::$WHITESPACE_TABLE;
7913
  }
7914
7915
  /**
7916
   * Limit the number of words in a string.
7917
   *
7918
   * @param string $str      <p>The input string.</p>
7919
   * @param int    $limit    <p>The limit of words as integer.</p>
7920
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7921
   *
7922
   * @return string
7923
   */
7924 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7925
  {
7926 1
    $str = (string)$str;
7927
7928 1
    if (!isset($str[0])) {
7929 1
      return '';
7930
    }
7931
7932
    // init
7933 1
    $limit = (int)$limit;
7934
7935 1
    if ($limit < 1) {
7936 1
      return '';
7937
    }
7938
7939 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7940
7941
    if (
7942 1
        !isset($matches[0])
7943 1
        ||
7944 1
        self::strlen($str) === self::strlen($matches[0])
7945 1
    ) {
7946 1
      return $str;
7947
    }
7948
7949 1
    return self::rtrim($matches[0]) . $strAddOn;
7950
  }
7951
7952
  /**
7953
   * Wraps a string to a given number of characters
7954
   *
7955
   * @link  http://php.net/manual/en/function.wordwrap.php
7956
   *
7957
   * @param string $str   <p>The input string.</p>
7958
   * @param int    $width [optional] <p>The column width.</p>
7959
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7960
   * @param bool   $cut   [optional] <p>
7961
   *                      If the cut is set to true, the string is
7962
   *                      always wrapped at or before the specified width. So if you have
7963
   *                      a word that is larger than the given width, it is broken apart.
7964
   *                      </p>
7965
   *
7966
   * @return string <p>The given string wrapped at the specified column.</p>
7967
   */
7968 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7969
  {
7970 10
    $str = (string)$str;
7971 10
    $break = (string)$break;
7972
7973 10
    if (!isset($str[0], $break[0])) {
7974 3
      return '';
7975
    }
7976
7977 8
    $w = '';
7978 8
    $strSplit = explode($break, $str);
7979 8
    $count = count($strSplit);
7980
7981 8
    $chars = array();
7982
    /** @noinspection ForeachInvariantsInspection */
7983 8
    for ($i = 0; $i < $count; ++$i) {
7984
7985 8
      if ($i) {
7986 1
        $chars[] = $break;
7987 1
        $w .= '#';
7988 1
      }
7989
7990 8
      $c = $strSplit[$i];
7991 8
      unset($strSplit[$i]);
7992
7993 8
      foreach (self::split($c) as $c) {
7994 8
        $chars[] = $c;
7995 8
        $w .= ' ' === $c ? ' ' : '?';
7996 8
      }
7997 8
    }
7998
7999 8
    $strReturn = '';
8000 8
    $j = 0;
8001 8
    $b = $i = -1;
8002 8
    $w = wordwrap($w, $width, '#', $cut);
8003
8004 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
8005 6
      for (++$i; $i < $b; ++$i) {
8006 6
        $strReturn .= $chars[$j];
8007 6
        unset($chars[$j++]);
8008 6
      }
8009
8010 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
8011 3
        unset($chars[$j++]);
8012 3
      }
8013
8014 6
      $strReturn .= $break;
8015 6
    }
8016
8017 8
    return $strReturn . implode('', $chars);
8018
  }
8019
8020
  /**
8021
   * Returns an array of Unicode White Space characters.
8022
   *
8023
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
8024
   */
8025 1
  public static function ws()
8026
  {
8027 1
    return self::$WHITESPACE;
8028
  }
8029
8030
}
8031