Completed
Push — master ( 3859ba...faa8e4 )
by Lars
08:23
created

UTF8::stristr()   C

Complexity

Conditions 16
Paths 101

Size

Total Lines 67
Code Lines 37

Duplication

Lines 7
Ratio 10.45 %

Code Coverage

Tests 20
CRAP Score 45.6047

Importance

Changes 0
Metric Value
dl 7
loc 67
ccs 20
cts 39
cp 0.5128
rs 5.7584
c 0
b 0
f 0
cc 16
eloc 37
nc 101
nop 5
crap 45.6047

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
966
      self::checkForSupport();
967
    }
968
969 10
    if ($encoding !== 'UTF-8') {
970 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
971 10
    } elseif (self::$SUPPORT['intlChar'] === true) {
972
      return \IntlChar::chr($code_point);
973
    }
974
975
    // check type of code_point, only if there is no support for "\IntlChar"
976 10
    $i = (int)$code_point;
977 10
    if ($i !== $code_point) {
978 1
      return null;
979
    }
980
981
    // use static cache, only if there is no support for "\IntlChar"
982 10
    static $CHAR_CACHE = array();
983 10
    $cacheKey = $code_point . $encoding;
984 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
985 8
      return $CHAR_CACHE[$cacheKey];
986
    }
987
988 9
    if ($code_point <= 0x7F) {
989 7
      $str = self::chr_and_parse_int($code_point);
990 9
    } elseif ($code_point <= 0x7FF) {
991 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
992 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
993 7
    } elseif ($code_point <= 0xFFFF) {
994 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
995 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
996 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
997 7
    } else {
998 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
999 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1000 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1001 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1002
    }
1003
1004 9
    if ($encoding !== 'UTF-8') {
1005 1
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1006 1
    }
1007
1008
    // add into static cache
1009 9
    $CHAR_CACHE[$cacheKey] = $str;
1010
1011 9
    return $str;
1012
  }
1013
1014
  /**
1015
   * @param int $int
1016
   *
1017
   * @return string
1018
   */
1019 26
  private static function chr_and_parse_int($int)
1020
  {
1021 26
    return chr((int)$int);
1022
  }
1023
1024
  /**
1025
   * Applies callback to all characters of a string.
1026
   *
1027
   * @param string|array $callback <p>The callback function.</p>
1028
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1029
   *
1030
   * @return array <p>The outcome of callback.</p>
1031
   */
1032 1
  public static function chr_map($callback, $str)
1033
  {
1034 1
    $chars = self::split($str);
1035
1036 1
    return array_map($callback, $chars);
1037
  }
1038
1039
  /**
1040
   * Generates an array of byte length of each character of a Unicode string.
1041
   *
1042
   * 1 byte => U+0000  - U+007F
1043
   * 2 byte => U+0080  - U+07FF
1044
   * 3 byte => U+0800  - U+FFFF
1045
   * 4 byte => U+10000 - U+10FFFF
1046
   *
1047
   * @param string $str <p>The original Unicode string.</p>
1048
   *
1049
   * @return array <p>An array of byte lengths of each character.</p>
1050
   */
1051 4
  public static function chr_size_list($str)
1052
  {
1053 4
    $str = (string)$str;
1054
1055 4
    if (!isset($str[0])) {
1056 3
      return array();
1057
    }
1058
1059 4
    return array_map(
1060
        function ($data) {
1061 4
          return UTF8::strlen($data, '8BIT');
1062 4
        },
1063 4
        self::split($str)
1064 4
    );
1065
  }
1066
1067
  /**
1068
   * Get a decimal code representation of a specific character.
1069
   *
1070
   * @param string $char <p>The input character.</p>
1071
   *
1072
   * @return int
1073
   */
1074 2
  public static function chr_to_decimal($char)
1075
  {
1076 2
    $char = (string)$char;
1077 2
    $code = self::ord($char[0]);
1078 2
    $bytes = 1;
1079
1080 2
    if (!($code & 0x80)) {
1081
      // 0xxxxxxx
1082 2
      return $code;
1083
    }
1084
1085 2
    if (($code & 0xe0) === 0xc0) {
1086
      // 110xxxxx
1087 2
      $bytes = 2;
1088 2
      $code &= ~0xc0;
1089 2
    } elseif (($code & 0xf0) === 0xe0) {
1090
      // 1110xxxx
1091 2
      $bytes = 3;
1092 2
      $code &= ~0xe0;
1093 2
    } elseif (($code & 0xf8) === 0xf0) {
1094
      // 11110xxx
1095 1
      $bytes = 4;
1096 1
      $code &= ~0xf0;
1097 1
    }
1098
1099 2
    for ($i = 2; $i <= $bytes; $i++) {
1100
      // 10xxxxxx
1101 2
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1102 2
    }
1103
1104 2
    return $code;
1105
  }
1106
1107
  /**
1108
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1109
   *
1110
   * @param string $char <p>The input character</p>
1111
   * @param string $pfix [optional]
1112
   *
1113
   * @return string <p>The code point encoded as U+xxxx<p>
1114
   */
1115 1
  public static function chr_to_hex($char, $pfix = 'U+')
1116
  {
1117 1
    $char = (string)$char;
1118
1119 1
    if (!isset($char[0])) {
1120 1
      return '';
1121
    }
1122
1123 1
    if ($char === '&#0;') {
1124
      $char = '';
1125
    }
1126
1127 1
    return self::int_to_hex(self::ord($char), $pfix);
1128
  }
1129
1130
  /**
1131
   * alias for "UTF8::chr_to_decimal()"
1132
   *
1133
   * @see UTF8::chr_to_decimal()
1134
   *
1135
   * @param string $chr
1136
   *
1137
   * @return int
1138
   */
1139 1
  public static function chr_to_int($chr)
1140
  {
1141 1
    return self::chr_to_decimal($chr);
1142
  }
1143
1144
  /**
1145
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1146
   *
1147
   * @param string $body     <p>The original string to be split.</p>
1148
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1149
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1150
   *
1151
   * @return string <p>The chunked string</p>
1152
   */
1153 1
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1154
  {
1155 1
    return implode($end, self::split($body, $chunklen));
1156
  }
1157
1158
  /**
1159
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1160
   *
1161
   * @param string $str                     <p>The string to be sanitized.</p>
1162
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1163
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1164
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1165
   *                                        => "..."</p>
1166
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1167
   *                                        $normalize_whitespace</p>
1168
   *
1169
   * @return string <p>Clean UTF-8 encoded string.</p>
1170
   */
1171 56
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1172
  {
1173
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1174
    // caused connection reset problem on larger strings
1175
1176
    $regx = '/
1177
      (
1178
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1179
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1180
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1181
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1182
        ){1,100}                      # ...one or more times
1183
      )
1184
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1185
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1186 56
    /x';
1187 56
    $str = preg_replace($regx, '$1', $str);
1188
1189 56
    $str = self::replace_diamond_question_mark($str, '');
1190 56
    $str = self::remove_invisible_characters($str);
1191
1192 56
    if ($normalize_whitespace === true) {
1193 36
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1194 36
    }
1195
1196 56
    if ($normalize_msword === true) {
1197 15
      $str = self::normalize_msword($str);
1198 15
    }
1199
1200 56
    if ($remove_bom === true) {
1201 35
      $str = self::remove_bom($str);
1202 35
    }
1203
1204 56
    return $str;
1205
  }
1206
1207
  /**
1208
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1209
   *
1210
   * @param string $str <p>The input string.</p>
1211
   *
1212
   * @return string
1213
   */
1214 21 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1215
  {
1216 21
    $str = (string)$str;
1217
1218 21
    if (!isset($str[0])) {
1219 2
      return '';
1220
    }
1221
1222
    // fixed ISO <-> UTF-8 Errors
1223 21
    $str = self::fix_simple_utf8($str);
1224
1225
    // remove all none UTF-8 symbols
1226
    // && remove diamond question mark (�)
1227
    // && remove remove invisible characters (e.g. "\0")
1228
    // && remove BOM
1229
    // && normalize whitespace chars (but keep non-breaking-spaces)
1230 21
    $str = self::clean($str, true, true, false, true);
1231
1232 21
    return (string)$str;
1233
  }
1234
1235
  /**
1236
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1237
   *
1238
   * INFO: opposite to UTF8::string()
1239
   *
1240
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1241
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1242
   *                                    default, code points will be returned as integers.</p>
1243
   *
1244
   * @return array <p>The array of code points.</p>
1245
   */
1246 7
  public static function codepoints($arg, $u_style = false)
1247
  {
1248 7
    if (is_string($arg) === true) {
1249 7
      $arg = self::split($arg);
1250 7
    }
1251
1252 7
    $arg = array_map(
1253
        array(
1254 7
            '\\voku\\helper\\UTF8',
1255 7
            'ord',
1256 7
        ),
1257
        $arg
1258 7
    );
1259
1260 7
    if ($u_style) {
1261 1
      $arg = array_map(
1262
          array(
1263 1
              '\\voku\\helper\\UTF8',
1264 1
              'int_to_hex',
1265 1
          ),
1266
          $arg
1267 1
      );
1268 1
    }
1269
1270 7
    return $arg;
1271
  }
1272
1273
  /**
1274
   * Returns count of characters used in a string.
1275
   *
1276
   * @param string $str       <p>The input string.</p>
1277
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1278
   *
1279
   * @return array <p>An associative array of Character as keys and
1280
   *               their count as values.</p>
1281
   */
1282 7
  public static function count_chars($str, $cleanUtf8 = false)
1283
  {
1284 7
    return array_count_values(self::split($str, 1, $cleanUtf8));
1285
  }
1286
1287
  /**
1288
   * Converts a int-value into an UTF-8 character.
1289
   *
1290
   * @param mixed $int
1291
   *
1292
   * @return string
1293
   */
1294 5
  public static function decimal_to_chr($int)
1295
  {
1296 5
    if (Bootup::is_php('5.4') === true) {
1297
      $flags = ENT_QUOTES | ENT_HTML5;
1298
    } else {
1299 5
      $flags = ENT_QUOTES;
1300
    }
1301
1302 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1303
  }
1304
1305
  /**
1306
   * Encode a string with a new charset-encoding.
1307
   *
1308
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1309
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1310
   *
1311
   * @param string $encoding <p>e.g. 'UTF-8', 'ISO-8859-1', etc.</p>
1312
   * @param string $str      <p>The input string</p>
1313
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for UTF-8)<br
1314
   *                         /> otherwise we auto-detect the current string-encoding</p>
1315
   *
1316
   * @return string
1317
   */
1318 11
  public static function encode($encoding, $str, $force = true)
1319
  {
1320 11
    $str = (string)$str;
1321 11
    $encoding = (string)$encoding;
1322
1323 11
    if (!isset($str[0], $encoding[0])) {
1324 5
      return $str;
1325
    }
1326
1327 11
    if ($encoding !== 'UTF-8') {
1328 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1329 2
    }
1330
1331 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1332
      self::checkForSupport();
1333
    }
1334
1335 11
    $encodingDetected = self::str_detect_encoding($str);
1336
1337
    if (
1338
        $encodingDetected !== false
1339 11
        &&
1340
        (
1341
            $force === true
1342 11
            ||
1343
            $encodingDetected !== $encoding
1344 3
        )
1345 11
    ) {
1346
1347 View Code Duplication
      if (
1348
          $encoding === 'UTF-8'
1349 11
          &&
1350
          (
1351
              $force === true
1352 11
              || $encodingDetected === 'UTF-8'
1353 2
              || $encodingDetected === 'WINDOWS-1252'
1354 2
              || $encodingDetected === 'ISO-8859-1'
1355 2
          )
1356 11
      ) {
1357 11
        return self::to_utf8($str);
1358
      }
1359
1360 View Code Duplication
      if (
1361
          $encoding === 'ISO-8859-1'
1362 3
          &&
1363
          (
1364
              $force === true
1365 2
              || $encodingDetected === 'ISO-8859-1'
1366 1
              || $encodingDetected === 'UTF-8'
1367 1
          )
1368 3
      ) {
1369 2
        return self::to_iso8859($str);
1370
      }
1371
1372 View Code Duplication
      if (
1373
          $encoding !== 'UTF-8'
1374 2
          &&
1375
          $encoding !== 'WINDOWS-1252'
1376 2
          &&
1377 1
          self::$SUPPORT['mbstring'] === false
1378 2
      ) {
1379
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1380
      }
1381
1382 2
      $strEncoded = \mb_convert_encoding(
1383 2
          $str,
1384 2
          $encoding,
1385
          $encodingDetected
1386 2
      );
1387
1388 2
      if ($strEncoded) {
1389 2
        return $strEncoded;
1390
      }
1391
    }
1392
1393 1
    return $str;
1394
  }
1395
1396
  /**
1397
   * Reads entire file into a string.
1398
   *
1399
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1400
   *
1401
   * @link http://php.net/manual/en/function.file-get-contents.php
1402
   *
1403
   * @param string        $filename      <p>
1404
   *                                     Name of the file to read.
1405
   *                                     </p>
1406
   * @param int|false     $flags         [optional] <p>
1407
   *                                     Prior to PHP 6, this parameter is called
1408
   *                                     use_include_path and is a bool.
1409
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1410
   *                                     to trigger include path
1411
   *                                     search.
1412
   *                                     </p>
1413
   *                                     <p>
1414
   *                                     The value of flags can be any combination of
1415
   *                                     the following flags (with some restrictions), joined with the
1416
   *                                     binary OR (|)
1417
   *                                     operator.
1418
   *                                     </p>
1419
   *                                     <p>
1420
   *                                     <table>
1421
   *                                     Available flags
1422
   *                                     <tr valign="top">
1423
   *                                     <td>Flag</td>
1424
   *                                     <td>Description</td>
1425
   *                                     </tr>
1426
   *                                     <tr valign="top">
1427
   *                                     <td>
1428
   *                                     FILE_USE_INCLUDE_PATH
1429
   *                                     </td>
1430
   *                                     <td>
1431
   *                                     Search for filename in the include directory.
1432
   *                                     See include_path for more
1433
   *                                     information.
1434
   *                                     </td>
1435
   *                                     </tr>
1436
   *                                     <tr valign="top">
1437
   *                                     <td>
1438
   *                                     FILE_TEXT
1439
   *                                     </td>
1440
   *                                     <td>
1441
   *                                     As of PHP 6, the default encoding of the read
1442
   *                                     data is UTF-8. You can specify a different encoding by creating a
1443
   *                                     custom context or by changing the default using
1444
   *                                     stream_default_encoding. This flag cannot be
1445
   *                                     used with FILE_BINARY.
1446
   *                                     </td>
1447
   *                                     </tr>
1448
   *                                     <tr valign="top">
1449
   *                                     <td>
1450
   *                                     FILE_BINARY
1451
   *                                     </td>
1452
   *                                     <td>
1453
   *                                     With this flag, the file is read in binary mode. This is the default
1454
   *                                     setting and cannot be used with FILE_TEXT.
1455
   *                                     </td>
1456
   *                                     </tr>
1457
   *                                     </table>
1458
   *                                     </p>
1459
   * @param resource|null $context       [optional] <p>
1460
   *                                     A valid context resource created with
1461
   *                                     stream_context_create. If you don't need to use a
1462
   *                                     custom context, you can skip this parameter by &null;.
1463
   *                                     </p>
1464
   * @param int|null $offset             [optional] <p>
1465
   *                                     The offset where the reading starts.
1466
   *                                     </p>
1467
   * @param int|null $maxLength          [optional] <p>
1468
   *                                     Maximum length of data read. The default is to read until end
1469
   *                                     of file is reached.
1470
   *                                     </p>
1471
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1472
   *
1473
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1474
   *                                     or pdf, because they used non default utf-8 chars</p>
1475
   *
1476
   * @return string <p>The function returns the read data or false on failure.</p>
1477
   */
1478 3
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1479
  {
1480
    // init
1481 3
    $timeout = (int)$timeout;
1482 3
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1483
1484 3
    if ($timeout && $context === null) {
1485 2
      $context = stream_context_create(
1486
          array(
1487
              'http' =>
1488
                  array(
1489 2
                      'timeout' => $timeout,
1490 2
                  ),
1491
          )
1492 2
      );
1493 2
    }
1494
1495 3
    if (!$flags) {
1496 3
      $flags = false;
1497 3
    }
1498
1499 3
    if ($offset === null) {
1500 3
      $offset = 0;
1501 3
    }
1502
1503 3
    if (is_int($maxLength) === true) {
1504 1
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1505 1
    } else {
1506 3
      $data = file_get_contents($filename, $flags, $context, $offset);
1507
    }
1508
1509
    // return false on error
1510 3
    if ($data === false) {
1511 1
      return false;
1512
    }
1513
1514 2
    if ($convertToUtf8 === true) {
1515 2
      $data = self::encode('UTF-8', $data, false);
1516 2
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1517 2
    }
1518
1519 2
    return $data;
1520
  }
1521
1522
  /**
1523
   * Checks if a file starts with BOM (Byte Order Mark) character.
1524
   *
1525
   * @param string $file_path <p>Path to a valid file.</p>
1526
   *
1527
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1528
   */
1529 1
  public static function file_has_bom($file_path)
1530
  {
1531 1
    return self::string_has_bom(file_get_contents($file_path));
1532
  }
1533
1534
  /**
1535
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1536
   *
1537
   * @param mixed  $var
1538
   * @param int    $normalization_form
1539
   * @param string $leading_combining
1540
   *
1541
   * @return mixed
1542
   */
1543 9
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1544
  {
1545 9
    switch (gettype($var)) {
1546 9 View Code Duplication
      case 'array':
1547 3
        foreach ($var as $k => $v) {
1548
          /** @noinspection AlterInForeachInspection */
1549 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1550 3
        }
1551 3
        break;
1552 9 View Code Duplication
      case 'object':
1553 2
        foreach ($var as $k => $v) {
1554 2
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1555 2
        }
1556 2
        break;
1557 9
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1558
1559 8
        if (false !== strpos($var, "\r")) {
1560
          // Workaround https://bugs.php.net/65732
1561 2
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1562 2
        }
1563
1564 8
        if (self::is_ascii($var) === false) {
1565
          /** @noinspection PhpUndefinedClassInspection */
1566 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1567 6
            $n = '-';
1568 6
          } else {
1569
            /** @noinspection PhpUndefinedClassInspection */
1570 6
            $n = \Normalizer::normalize($var, $normalization_form);
1571
1572 6
            if (isset($n[0])) {
1573 3
              $var = $n;
1574 3
            } else {
1575 5
              $var = self::encode('UTF-8', $var);
1576
            }
1577
          }
1578
1579
          if (
1580 8
              $var[0] >= "\x80"
1581 8
              &&
1582 6
              isset($n[0], $leading_combining[0])
1583 8
              &&
1584 5
              preg_match('/^\p{Mn}/u', $var)
1585 8
          ) {
1586
            // Prevent leading combining chars
1587
            // for NFC-safe concatenations.
1588 2
            $var = $leading_combining . $var;
1589 2
          }
1590 8
        }
1591
1592 8
        break;
1593 9
    }
1594
1595 9
    return $var;
1596
  }
1597
1598
  /**
1599
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1600
   *
1601
   * Gets a specific external variable by name and optionally filters it
1602
   *
1603
   * @link  http://php.net/manual/en/function.filter-input.php
1604
   *
1605
   * @param int    $type          <p>
1606
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1607
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1608
   *                              <b>INPUT_ENV</b>.
1609
   *                              </p>
1610
   * @param string $variable_name <p>
1611
   *                              Name of a variable to get.
1612
   *                              </p>
1613
   * @param int    $filter        [optional] <p>
1614
   *                              The ID of the filter to apply. The
1615
   *                              manual page lists the available filters.
1616
   *                              </p>
1617
   * @param mixed  $options       [optional] <p>
1618
   *                              Associative array of options or bitwise disjunction of flags. If filter
1619
   *                              accepts options, flags can be provided in "flags" field of array.
1620
   *                              </p>
1621
   *
1622
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1623
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1624
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1625
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1626
   * @since 5.2.0
1627
   */
1628 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1629
  {
1630
    if (4 > func_num_args()) {
1631
      $var = filter_input($type, $variable_name, $filter);
1632
    } else {
1633
      $var = filter_input($type, $variable_name, $filter, $options);
1634
    }
1635
1636
    return self::filter($var);
1637
  }
1638
1639
  /**
1640
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1641
   *
1642
   * Gets external variables and optionally filters them
1643
   *
1644
   * @link  http://php.net/manual/en/function.filter-input-array.php
1645
   *
1646
   * @param int   $type       <p>
1647
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1648
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1649
   *                          <b>INPUT_ENV</b>.
1650
   *                          </p>
1651
   * @param mixed $definition [optional] <p>
1652
   *                          An array defining the arguments. A valid key is a string
1653
   *                          containing a variable name and a valid value is either a filter type, or an array
1654
   *                          optionally specifying the filter, flags and options. If the value is an
1655
   *                          array, valid keys are filter which specifies the
1656
   *                          filter type,
1657
   *                          flags which specifies any flags that apply to the
1658
   *                          filter, and options which specifies any options that
1659
   *                          apply to the filter. See the example below for a better understanding.
1660
   *                          </p>
1661
   *                          <p>
1662
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1663
   *                          input array are filtered by this filter.
1664
   *                          </p>
1665
   * @param bool  $add_empty  [optional] <p>
1666
   *                          Add missing keys as <b>NULL</b> to the return value.
1667
   *                          </p>
1668
   *
1669
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1670
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1671
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1672
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1673
   * fails.
1674
   * @since 5.2.0
1675
   */
1676 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1677
  {
1678
    if (2 > func_num_args()) {
1679
      $a = filter_input_array($type);
1680
    } else {
1681
      $a = filter_input_array($type, $definition, $add_empty);
1682
    }
1683
1684
    return self::filter($a);
1685
  }
1686
1687
  /**
1688
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1689
   *
1690
   * Filters a variable with a specified filter
1691
   *
1692
   * @link  http://php.net/manual/en/function.filter-var.php
1693
   *
1694
   * @param mixed $variable <p>
1695
   *                        Value to filter.
1696
   *                        </p>
1697
   * @param int   $filter   [optional] <p>
1698
   *                        The ID of the filter to apply. The
1699
   *                        manual page lists the available filters.
1700
   *                        </p>
1701
   * @param mixed $options  [optional] <p>
1702
   *                        Associative array of options or bitwise disjunction of flags. If filter
1703
   *                        accepts options, flags can be provided in "flags" field of array. For
1704
   *                        the "callback" filter, callable type should be passed. The
1705
   *                        callback must accept one argument, the value to be filtered, and return
1706
   *                        the value after filtering/sanitizing it.
1707
   *                        </p>
1708
   *                        <p>
1709
   *                        <code>
1710
   *                        // for filters that accept options, use this format
1711
   *                        $options = array(
1712
   *                        'options' => array(
1713
   *                        'default' => 3, // value to return if the filter fails
1714
   *                        // other options here
1715
   *                        'min_range' => 0
1716
   *                        ),
1717
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1718
   *                        );
1719
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1720
   *                        // for filter that only accept flags, you can pass them directly
1721
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1722
   *                        // for filter that only accept flags, you can also pass as an array
1723
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1724
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1725
   *                        // callback validate filter
1726
   *                        function foo($value)
1727
   *                        {
1728
   *                        // Expected format: Surname, GivenNames
1729
   *                        if (strpos($value, ", ") === false) return false;
1730
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1731
   *                        $empty = (empty($surname) || empty($givennames));
1732
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1733
   *                        if ($empty || $notstrings) {
1734
   *                        return false;
1735
   *                        } else {
1736
   *                        return $value;
1737
   *                        }
1738
   *                        }
1739
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1740
   *                        </code>
1741
   *                        </p>
1742
   *
1743
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1744
   * @since 5.2.0
1745
   */
1746 1 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1747
  {
1748 1
    if (3 > func_num_args()) {
1749 1
      $variable = filter_var($variable, $filter);
1750 1
    } else {
1751 1
      $variable = filter_var($variable, $filter, $options);
1752
    }
1753
1754 1
    return self::filter($variable);
1755
  }
1756
1757
  /**
1758
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1759
   *
1760
   * Gets multiple variables and optionally filters them
1761
   *
1762
   * @link  http://php.net/manual/en/function.filter-var-array.php
1763
   *
1764
   * @param array $data       <p>
1765
   *                          An array with string keys containing the data to filter.
1766
   *                          </p>
1767
   * @param mixed $definition [optional] <p>
1768
   *                          An array defining the arguments. A valid key is a string
1769
   *                          containing a variable name and a valid value is either a
1770
   *                          filter type, or an
1771
   *                          array optionally specifying the filter, flags and options.
1772
   *                          If the value is an array, valid keys are filter
1773
   *                          which specifies the filter type,
1774
   *                          flags which specifies any flags that apply to the
1775
   *                          filter, and options which specifies any options that
1776
   *                          apply to the filter. See the example below for a better understanding.
1777
   *                          </p>
1778
   *                          <p>
1779
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1780
   *                          input array are filtered by this filter.
1781
   *                          </p>
1782
   * @param bool  $add_empty  [optional] <p>
1783
   *                          Add missing keys as <b>NULL</b> to the return value.
1784
   *                          </p>
1785
   *
1786
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1787
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1788
   * the variable is not set.
1789
   * @since 5.2.0
1790
   */
1791 1 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1792
  {
1793 1
    if (2 > func_num_args()) {
1794 1
      $a = filter_var_array($data);
1795 1
    } else {
1796 1
      $a = filter_var_array($data, $definition, $add_empty);
1797
    }
1798
1799 1
    return self::filter($a);
1800
  }
1801
1802
  /**
1803
   * Check if the number of unicode characters are not more than the specified integer.
1804
   *
1805
   * @param string $str      The original string to be checked.
1806
   * @param int    $box_size The size in number of chars to be checked against string.
1807
   *
1808
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1809
   */
1810 1
  public static function fits_inside($str, $box_size)
1811
  {
1812 1
    return (self::strlen($str) <= $box_size);
1813
  }
1814
1815
  /**
1816
   * Try to fix simple broken UTF-8 strings.
1817
   *
1818
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1819
   *
1820
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1821
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1822
   * See: http://en.wikipedia.org/wiki/Windows-1252
1823
   *
1824
   * @param string $str <p>The input string</p>
1825
   *
1826
   * @return string
1827
   */
1828 26 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1829
  {
1830
    // init
1831 26
    $str = (string)$str;
1832
1833 26
    if (!isset($str[0])) {
1834 2
      return '';
1835
    }
1836
1837 26
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1838 26
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1839
1840 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1841 1
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1842 1
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1843 1
    }
1844
1845 26
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1846
  }
1847
1848
  /**
1849
   * Fix a double (or multiple) encoded UTF8 string.
1850
   *
1851
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1852
   *
1853
   * @return string|string[] <p>Will return the fixed input-"array" or
1854
   *                         the fixed input-"string".</p>
1855
   */
1856 1
  public static function fix_utf8($str)
1857
  {
1858 1
    if (is_array($str) === true) {
1859
1860
      /** @noinspection ForeachSourceInspection */
1861 1
      foreach ($str as $k => $v) {
1862
        /** @noinspection AlterInForeachInspection */
1863
        /** @noinspection OffsetOperationsInspection */
1864 1
        $str[$k] = self::fix_utf8($v);
1865 1
      }
1866
1867 1
      return $str;
1868
    }
1869
1870 1
    $last = '';
1871 1
    while ($last !== $str) {
1872 1
      $last = $str;
1873 1
      $str = self::to_utf8(
1874 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1873 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1875 1
      );
1876 1
    }
1877
1878 1
    return $str;
1879
  }
1880
1881
  /**
1882
   * Get character of a specific character.
1883
   *
1884
   * @param string $char
1885
   *
1886
   * @return string <p>'RTL' or 'LTR'</p>
1887
   */
1888 1
  public static function getCharDirection($char)
1889
  {
1890 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1891
      self::checkForSupport();
1892
    }
1893
1894 1
    if (self::$SUPPORT['intlChar'] === true) {
1895
      $tmpReturn = \IntlChar::charDirection($char);
1896
1897
      // from "IntlChar"-Class
1898
      $charDirection = array(
1899
          'RTL' => array(1, 13, 14, 15, 21),
1900
          'LTR' => array(0, 11, 12, 20),
1901
      );
1902
1903
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1904
        return 'LTR';
1905
      }
1906
1907
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1908
        return 'RTL';
1909
      }
1910
    }
1911
1912 1
    $c = static::chr_to_decimal($char);
1913
1914 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1915 1
      return 'LTR';
1916
    }
1917
1918 1
    if (0x85e >= $c) {
1919
1920 1
      if (0x5be === $c ||
1921 1
          0x5c0 === $c ||
1922 1
          0x5c3 === $c ||
1923 1
          0x5c6 === $c ||
1924 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1925 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1926 1
          0x608 === $c ||
1927 1
          0x60b === $c ||
1928 1
          0x60d === $c ||
1929 1
          0x61b === $c ||
1930 1
          (0x61e <= $c && 0x64a >= $c) ||
1931 1
          (0x66d <= $c && 0x66f >= $c) ||
1932 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1933 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1934 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1935 1
          (0x6fa <= $c && 0x70d >= $c) ||
1936 1
          0x710 === $c ||
1937 1
          (0x712 <= $c && 0x72f >= $c) ||
1938 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1939 1
          0x7b1 === $c ||
1940 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1941 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1942 1
          0x7fa === $c ||
1943 1
          (0x800 <= $c && 0x815 >= $c) ||
1944 1
          0x81a === $c ||
1945 1
          0x824 === $c ||
1946 1
          0x828 === $c ||
1947 1
          (0x830 <= $c && 0x83e >= $c) ||
1948 1
          (0x840 <= $c && 0x858 >= $c) ||
1949
          0x85e === $c
1950 1
      ) {
1951 1
        return 'RTL';
1952
      }
1953
1954 1
    } elseif (0x200f === $c) {
1955
1956
      return 'RTL';
1957
1958 1
    } elseif (0xfb1d <= $c) {
1959
1960 1
      if (0xfb1d === $c ||
1961 1
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1962 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1963 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1964 1
          0xfb3e === $c ||
1965 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1966 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1967 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1968 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1969 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1970 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1971 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1972 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1973 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1974 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1975 1
          0x10808 === $c ||
1976 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1977 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1978 1
          0x1083c === $c ||
1979 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1980 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1981 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1982 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1983 1
          0x1093f === $c ||
1984 1
          0x10a00 === $c ||
1985 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1986 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1987 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1988 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1989 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1990 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1991 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1992 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1993 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1994
          (0x10b78 <= $c && 0x10b7f >= $c)
1995 1
      ) {
1996 1
        return 'RTL';
1997
      }
1998
    }
1999
2000 1
    return 'LTR';
2001
  }
2002
2003
  /**
2004
   * get data from "/data/*.ser"
2005
   *
2006
   * @param string $file
2007
   *
2008
   * @return bool|string|array|int <p>Will return false on error.</p>
2009
   */
2010 4
  private static function getData($file)
2011
  {
2012 4
    $file = __DIR__ . '/data/' . $file . '.php';
2013 4
    if (file_exists($file)) {
2014
      /** @noinspection PhpIncludeInspection */
2015 4
      return require $file;
2016
    }
2017
2018 1
    return false;
2019
  }
2020
2021
  /**
2022
   * Check for php-support.
2023
   *
2024
   * @param string|null $key
2025
   *
2026
   * @return mixed <p>Return the full support-"array", if $key === null<br />
2027
   *               return bool-value, if $key is used and available<br />
2028
   *               otherwise return null</p>
2029
   */
2030 7
  public static function getSupportInfo($key = null)
2031
  {
2032 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2033
      self::checkForSupport();
2034
    }
2035
2036 7
    if ($key === null) {
2037 2
      return self::$SUPPORT;
2038
    }
2039
2040 5
    if (!isset(self::$SUPPORT[$key])) {
2041
      return null;
2042
    }
2043
2044 5
    return self::$SUPPORT[$key];
2045
  }
2046
2047
  /**
2048
   * alias for "UTF8::string_has_bom()"
2049
   *
2050
   * @see UTF8::string_has_bom()
2051
   *
2052
   * @param string $str
2053
   *
2054
   * @return bool
2055
   *
2056
   * @deprecated
2057
   */
2058
  public static function hasBom($str)
2059
  {
2060
    return self::string_has_bom($str);
2061
  }
2062
2063
  /**
2064
   * Converts a hexadecimal-value into an UTF-8 character.
2065
   *
2066
   * @param string $hexdec <p>The hexadecimal value.</p>
2067
   *
2068
   * @return string|false <p>One single UTF-8 character.</p>
2069
   */
2070 2
  public static function hex_to_chr($hexdec)
2071
  {
2072 2
    return self::decimal_to_chr(hexdec($hexdec));
2073
  }
2074
2075
  /**
2076
   * Converts hexadecimal U+xxxx code point representation to integer.
2077
   *
2078
   * INFO: opposite to UTF8::int_to_hex()
2079
   *
2080
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2081
   *
2082
   * @return int|false <p>The code point, or false on failure.</p>
2083
   */
2084 1
  public static function hex_to_int($hexDec)
2085
  {
2086 1
    $hexDec = (string)$hexDec;
2087
2088 1
    if (!isset($hexDec[0])) {
2089 1
      return false;
2090
    }
2091
2092 1
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2093 1
      return intval($match[1], 16);
2094
    }
2095
2096 1
    return false;
2097
  }
2098
2099
  /**
2100
   * alias for "UTF8::html_entity_decode()"
2101
   *
2102
   * @see UTF8::html_entity_decode()
2103
   *
2104
   * @param string $str
2105
   * @param int    $flags
2106
   * @param string $encoding
2107
   *
2108
   * @return string
2109
   */
2110 1
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2111
  {
2112 1
    return self::html_entity_decode($str, $flags, $encoding);
2113
  }
2114
2115
  /**
2116
   * Converts a UTF-8 string to a series of HTML numbered entities.
2117
   *
2118
   * INFO: opposite to UTF8::html_decode()
2119
   *
2120
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2121
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2122
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2123
   *
2124
   * @return string <p>HTML numbered entities.</p>
2125
   */
2126 2
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2127
  {
2128
    // init
2129 2
    $str = (string)$str;
2130
2131 2
    if (!isset($str[0])) {
2132 1
      return '';
2133
    }
2134
2135 2
    if ($encoding !== 'UTF-8') {
2136 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2137 1
    }
2138
2139
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2140 2
    if (function_exists('mb_encode_numericentity')) {
2141
2142 2
      $startCode = 0x00;
2143 2
      if ($keepAsciiChars === true) {
2144 1
        $startCode = 0x80;
2145 1
      }
2146
2147 2
      return mb_encode_numericentity(
2148 2
          $str,
2149 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2150
          $encoding
2151 2
      );
2152
    }
2153
2154
    return implode(
2155
        '',
2156
        array_map(
2157
            function ($data) use ($keepAsciiChars, $encoding) {
2158
              return UTF8::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2159
            },
2160
            self::split($str)
2161
        )
2162
    );
2163
  }
2164
2165
  /**
2166
   * UTF-8 version of html_entity_decode()
2167
   *
2168
   * The reason we are not using html_entity_decode() by itself is because
2169
   * while it is not technically correct to leave out the semicolon
2170
   * at the end of an entity most browsers will still interpret the entity
2171
   * correctly. html_entity_decode() does not convert entities without
2172
   * semicolons, so we are left with our own little solution here. Bummer.
2173
   *
2174
   * Convert all HTML entities to their applicable characters
2175
   *
2176
   * INFO: opposite to UTF8::html_encode()
2177
   *
2178
   * @link http://php.net/manual/en/function.html-entity-decode.php
2179
   *
2180
   * @param string $str      <p>
2181
   *                         The input string.
2182
   *                         </p>
2183
   * @param int    $flags    [optional] <p>
2184
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2185
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2186
   *                         <table>
2187
   *                         Available <i>flags</i> constants
2188
   *                         <tr valign="top">
2189
   *                         <td>Constant Name</td>
2190
   *                         <td>Description</td>
2191
   *                         </tr>
2192
   *                         <tr valign="top">
2193
   *                         <td><b>ENT_COMPAT</b></td>
2194
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2195
   *                         </tr>
2196
   *                         <tr valign="top">
2197
   *                         <td><b>ENT_QUOTES</b></td>
2198
   *                         <td>Will convert both double and single quotes.</td>
2199
   *                         </tr>
2200
   *                         <tr valign="top">
2201
   *                         <td><b>ENT_NOQUOTES</b></td>
2202
   *                         <td>Will leave both double and single quotes unconverted.</td>
2203
   *                         </tr>
2204
   *                         <tr valign="top">
2205
   *                         <td><b>ENT_HTML401</b></td>
2206
   *                         <td>
2207
   *                         Handle code as HTML 4.01.
2208
   *                         </td>
2209
   *                         </tr>
2210
   *                         <tr valign="top">
2211
   *                         <td><b>ENT_XML1</b></td>
2212
   *                         <td>
2213
   *                         Handle code as XML 1.
2214
   *                         </td>
2215
   *                         </tr>
2216
   *                         <tr valign="top">
2217
   *                         <td><b>ENT_XHTML</b></td>
2218
   *                         <td>
2219
   *                         Handle code as XHTML.
2220
   *                         </td>
2221
   *                         </tr>
2222
   *                         <tr valign="top">
2223
   *                         <td><b>ENT_HTML5</b></td>
2224
   *                         <td>
2225
   *                         Handle code as HTML 5.
2226
   *                         </td>
2227
   *                         </tr>
2228
   *                         </table>
2229
   *                         </p>
2230
   * @param string $encoding [optional] <p>Encoding to use.</p>
2231
   *
2232
   * @return string <p>The decoded string.</p>
2233
   */
2234 16
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2235
  {
2236
    // init
2237 16
    $str = (string)$str;
2238
2239 16
    if (!isset($str[0])) {
2240 5
      return '';
2241
    }
2242
2243 16
    if (!isset($str[3])) { // examples: &; || &x;
2244 9
      return $str;
2245
    }
2246
2247
    if (
2248 15
        strpos($str, '&') === false
2249 15
        ||
2250
        (
2251 15
            strpos($str, '&#') === false
2252 15
            &&
2253 9
            strpos($str, ';') === false
2254 9
        )
2255 15
    ) {
2256 8
      return $str;
2257
    }
2258
2259 15
    if ($encoding !== 'UTF-8') {
2260 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2261 2
    }
2262
2263 15
    if ($flags === null) {
2264 5
      if (Bootup::is_php('5.4') === true) {
2265
        $flags = ENT_QUOTES | ENT_HTML5;
2266
      } else {
2267 5
        $flags = ENT_QUOTES;
2268
      }
2269 5
    }
2270
2271
    do {
2272 15
      $str_compare = $str;
2273
2274 15
      $str = preg_replace_callback(
2275 15
          "/&#\d{2,6};/",
2276
          function ($matches) use ($encoding) {
2277 13
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2278
2279 13
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2280 13
              return $returnTmp;
2281
            }
2282
2283 6
            return $matches[0];
2284 15
          },
2285
          $str
2286 15
      );
2287
2288
      // decode numeric & UTF16 two byte entities
2289 15
      $str = html_entity_decode(
2290 15
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2291 15
          $flags,
2292
          $encoding
2293 15
      );
2294
2295 15
    } while ($str_compare !== $str);
2296
2297 15
    return $str;
2298
  }
2299
2300
  /**
2301
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2302
   *
2303
   * @link http://php.net/manual/en/function.htmlentities.php
2304
   *
2305
   * @param string $str           <p>
2306
   *                              The input string.
2307
   *                              </p>
2308
   * @param int    $flags         [optional] <p>
2309
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2310
   *                              invalid code unit sequences and the used document type. The default is
2311
   *                              ENT_COMPAT | ENT_HTML401.
2312
   *                              <table>
2313
   *                              Available <i>flags</i> constants
2314
   *                              <tr valign="top">
2315
   *                              <td>Constant Name</td>
2316
   *                              <td>Description</td>
2317
   *                              </tr>
2318
   *                              <tr valign="top">
2319
   *                              <td><b>ENT_COMPAT</b></td>
2320
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2321
   *                              </tr>
2322
   *                              <tr valign="top">
2323
   *                              <td><b>ENT_QUOTES</b></td>
2324
   *                              <td>Will convert both double and single quotes.</td>
2325
   *                              </tr>
2326
   *                              <tr valign="top">
2327
   *                              <td><b>ENT_NOQUOTES</b></td>
2328
   *                              <td>Will leave both double and single quotes unconverted.</td>
2329
   *                              </tr>
2330
   *                              <tr valign="top">
2331
   *                              <td><b>ENT_IGNORE</b></td>
2332
   *                              <td>
2333
   *                              Silently discard invalid code unit sequences instead of returning
2334
   *                              an empty string. Using this flag is discouraged as it
2335
   *                              may have security implications.
2336
   *                              </td>
2337
   *                              </tr>
2338
   *                              <tr valign="top">
2339
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2340
   *                              <td>
2341
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2342
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2343
   *                              </td>
2344
   *                              </tr>
2345
   *                              <tr valign="top">
2346
   *                              <td><b>ENT_DISALLOWED</b></td>
2347
   *                              <td>
2348
   *                              Replace invalid code points for the given document type with a
2349
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2350
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2351
   *                              instance, to ensure the well-formedness of XML documents with
2352
   *                              embedded external content.
2353
   *                              </td>
2354
   *                              </tr>
2355
   *                              <tr valign="top">
2356
   *                              <td><b>ENT_HTML401</b></td>
2357
   *                              <td>
2358
   *                              Handle code as HTML 4.01.
2359
   *                              </td>
2360
   *                              </tr>
2361
   *                              <tr valign="top">
2362
   *                              <td><b>ENT_XML1</b></td>
2363
   *                              <td>
2364
   *                              Handle code as XML 1.
2365
   *                              </td>
2366
   *                              </tr>
2367
   *                              <tr valign="top">
2368
   *                              <td><b>ENT_XHTML</b></td>
2369
   *                              <td>
2370
   *                              Handle code as XHTML.
2371
   *                              </td>
2372
   *                              </tr>
2373
   *                              <tr valign="top">
2374
   *                              <td><b>ENT_HTML5</b></td>
2375
   *                              <td>
2376
   *                              Handle code as HTML 5.
2377
   *                              </td>
2378
   *                              </tr>
2379
   *                              </table>
2380
   *                              </p>
2381
   * @param string $encoding      [optional] <p>
2382
   *                              Like <b>htmlspecialchars</b>,
2383
   *                              <b>htmlentities</b> takes an optional third argument
2384
   *                              <i>encoding</i> which defines encoding used in
2385
   *                              conversion.
2386
   *                              Although this argument is technically optional, you are highly
2387
   *                              encouraged to specify the correct value for your code.
2388
   *                              </p>
2389
   * @param bool   $double_encode [optional] <p>
2390
   *                              When <i>double_encode</i> is turned off PHP will not
2391
   *                              encode existing html entities. The default is to convert everything.
2392
   *                              </p>
2393
   *
2394
   *
2395
   * @return string the encoded string.
2396
   * </p>
2397
   * <p>
2398
   * If the input <i>string</i> contains an invalid code unit
2399
   * sequence within the given <i>encoding</i> an empty string
2400
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2401
   * <b>ENT_SUBSTITUTE</b> flags are set.
2402
   */
2403 2
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2404
  {
2405 2
    if ($encoding !== 'UTF-8') {
2406 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2407 1
    }
2408
2409 2
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2410
2411
    /**
2412
     * PHP doesn't replace a backslash to its html entity since this is something
2413
     * that's mostly used to escape characters when inserting in a database. Since
2414
     * we're using a decent database layer, we don't need this shit and we're replacing
2415
     * the double backslashes by its' html entity equivalent.
2416
     *
2417
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2418
     */
2419 2
    $str = str_replace('\\', '&#92;', $str);
2420
2421 2
    if ($encoding !== 'UTF-8') {
2422 1
      return $str;
2423
    }
2424
2425 2
    $byteLengths = self::chr_size_list($str);
2426 2
    $search = array();
2427 2
    $replacements = array();
2428 2
    foreach ($byteLengths as $counter => $byteLength) {
2429 2
      if ($byteLength >= 3) {
2430 1
        $char = self::access($str, $counter);
2431
2432 1
        if (!isset($replacements[$char])) {
2433 1
          $search[$char] = $char;
2434 1
          $replacements[$char] = self::html_encode($char);
2435 1
        }
2436 1
      }
2437 2
    }
2438
2439 2
    return str_replace($search, $replacements, $str);
2440
  }
2441
2442
  /**
2443
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2444
   *
2445
   * INFO: Take a look at "UTF8::htmlentities()"
2446
   *
2447
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2448
   *
2449
   * @param string $str           <p>
2450
   *                              The string being converted.
2451
   *                              </p>
2452
   * @param int    $flags         [optional] <p>
2453
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2454
   *                              invalid code unit sequences and the used document type. The default is
2455
   *                              ENT_COMPAT | ENT_HTML401.
2456
   *                              <table>
2457
   *                              Available <i>flags</i> constants
2458
   *                              <tr valign="top">
2459
   *                              <td>Constant Name</td>
2460
   *                              <td>Description</td>
2461
   *                              </tr>
2462
   *                              <tr valign="top">
2463
   *                              <td><b>ENT_COMPAT</b></td>
2464
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2465
   *                              </tr>
2466
   *                              <tr valign="top">
2467
   *                              <td><b>ENT_QUOTES</b></td>
2468
   *                              <td>Will convert both double and single quotes.</td>
2469
   *                              </tr>
2470
   *                              <tr valign="top">
2471
   *                              <td><b>ENT_NOQUOTES</b></td>
2472
   *                              <td>Will leave both double and single quotes unconverted.</td>
2473
   *                              </tr>
2474
   *                              <tr valign="top">
2475
   *                              <td><b>ENT_IGNORE</b></td>
2476
   *                              <td>
2477
   *                              Silently discard invalid code unit sequences instead of returning
2478
   *                              an empty string. Using this flag is discouraged as it
2479
   *                              may have security implications.
2480
   *                              </td>
2481
   *                              </tr>
2482
   *                              <tr valign="top">
2483
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2484
   *                              <td>
2485
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2486
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2487
   *                              </td>
2488
   *                              </tr>
2489
   *                              <tr valign="top">
2490
   *                              <td><b>ENT_DISALLOWED</b></td>
2491
   *                              <td>
2492
   *                              Replace invalid code points for the given document type with a
2493
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2494
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2495
   *                              instance, to ensure the well-formedness of XML documents with
2496
   *                              embedded external content.
2497
   *                              </td>
2498
   *                              </tr>
2499
   *                              <tr valign="top">
2500
   *                              <td><b>ENT_HTML401</b></td>
2501
   *                              <td>
2502
   *                              Handle code as HTML 4.01.
2503
   *                              </td>
2504
   *                              </tr>
2505
   *                              <tr valign="top">
2506
   *                              <td><b>ENT_XML1</b></td>
2507
   *                              <td>
2508
   *                              Handle code as XML 1.
2509
   *                              </td>
2510
   *                              </tr>
2511
   *                              <tr valign="top">
2512
   *                              <td><b>ENT_XHTML</b></td>
2513
   *                              <td>
2514
   *                              Handle code as XHTML.
2515
   *                              </td>
2516
   *                              </tr>
2517
   *                              <tr valign="top">
2518
   *                              <td><b>ENT_HTML5</b></td>
2519
   *                              <td>
2520
   *                              Handle code as HTML 5.
2521
   *                              </td>
2522
   *                              </tr>
2523
   *                              </table>
2524
   *                              </p>
2525
   * @param string $encoding      [optional] <p>
2526
   *                              Defines encoding used in conversion.
2527
   *                              </p>
2528
   *                              <p>
2529
   *                              For the purposes of this function, the encodings
2530
   *                              ISO-8859-1, ISO-8859-15,
2531
   *                              UTF-8, cp866,
2532
   *                              cp1251, cp1252, and
2533
   *                              KOI8-R are effectively equivalent, provided the
2534
   *                              <i>string</i> itself is valid for the encoding, as
2535
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2536
   *                              the same positions in all of these encodings.
2537
   *                              </p>
2538
   * @param bool   $double_encode [optional] <p>
2539
   *                              When <i>double_encode</i> is turned off PHP will not
2540
   *                              encode existing html entities, the default is to convert everything.
2541
   *                              </p>
2542
   *
2543
   * @return string The converted string.
2544
   * </p>
2545
   * <p>
2546
   * If the input <i>string</i> contains an invalid code unit
2547
   * sequence within the given <i>encoding</i> an empty string
2548
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2549
   * <b>ENT_SUBSTITUTE</b> flags are set.
2550
   */
2551 1
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2552
  {
2553 1
    if ($encoding !== 'UTF-8') {
2554 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2555 1
    }
2556
2557 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2558
  }
2559
2560
  /**
2561
   * Checks whether iconv is available on the server.
2562
   *
2563
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2564
   */
2565 1
  public static function iconv_loaded()
2566
  {
2567 1
    $return = extension_loaded('iconv') ? true : false;
2568
2569
    // INFO: "default_charset" is already set by the "Bootup"-class
2570
2571 1
    if (Bootup::is_php('5.6') === false) {
2572
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2573 1
      iconv_set_encoding('input_encoding', 'UTF-8');
2574 1
      iconv_set_encoding('output_encoding', 'UTF-8');
2575 1
      iconv_set_encoding('internal_encoding', 'UTF-8');
2576 1
    }
2577
2578 1
    return $return;
2579
  }
2580
2581
  /**
2582
   * alias for "UTF8::decimal_to_chr()"
2583
   *
2584
   * @see UTF8::decimal_to_chr()
2585
   *
2586
   * @param mixed $int
2587
   *
2588
   * @return string
2589
   */
2590 2
  public static function int_to_chr($int)
2591
  {
2592 2
    return self::decimal_to_chr($int);
2593
  }
2594
2595
  /**
2596
   * Converts Integer to hexadecimal U+xxxx code point representation.
2597
   *
2598
   * INFO: opposite to UTF8::hex_to_int()
2599
   *
2600
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2601
   * @param string $pfix [optional]
2602
   *
2603
   * @return string <p>The code point, or empty string on failure.</p>
2604
   */
2605 3
  public static function int_to_hex($int, $pfix = 'U+')
2606
  {
2607 3
    if ((int)$int === $int) {
2608 3
      $hex = dechex($int);
2609
2610 3
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2611
2612 3
      return $pfix . $hex;
2613
    }
2614
2615 1
    return '';
2616
  }
2617
2618
  /**
2619
   * Checks whether intl-char is available on the server.
2620
   *
2621
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2622
   */
2623 1
  public static function intlChar_loaded()
2624
  {
2625
    return (
2626 1
        Bootup::is_php('7.0') === true
2627 1
        &&
2628
        class_exists('IntlChar') === true
2629 1
    );
2630
  }
2631
2632
  /**
2633
   * Checks whether intl is available on the server.
2634
   *
2635
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2636
   */
2637 4
  public static function intl_loaded()
2638
  {
2639 4
    return extension_loaded('intl') ? true : false;
2640
  }
2641
2642
  /**
2643
   * alias for "UTF8::is_ascii()"
2644
   *
2645
   * @see UTF8::is_ascii()
2646
   *
2647
   * @param string $str
2648
   *
2649
   * @return boolean
2650
   *
2651
   * @deprecated
2652
   */
2653
  public static function isAscii($str)
2654
  {
2655
    return self::is_ascii($str);
2656
  }
2657
2658
  /**
2659
   * alias for "UTF8::is_base64()"
2660
   *
2661
   * @see UTF8::is_base64()
2662
   *
2663
   * @param string $str
2664
   *
2665
   * @return bool
2666
   *
2667
   * @deprecated
2668
   */
2669
  public static function isBase64($str)
2670
  {
2671
    return self::is_base64($str);
2672
  }
2673
2674
  /**
2675
   * alias for "UTF8::is_binary()"
2676
   *
2677
   * @see UTF8::is_binary()
2678
   *
2679
   * @param string $str
2680
   *
2681
   * @return bool
2682
   *
2683
   * @deprecated
2684
   */
2685
  public static function isBinary($str)
2686
  {
2687
    return self::is_binary($str);
2688
  }
2689
2690
  /**
2691
   * alias for "UTF8::is_bom()"
2692
   *
2693
   * @see UTF8::is_bom()
2694
   *
2695
   * @param string $utf8_chr
2696
   *
2697
   * @return boolean
2698
   *
2699
   * @deprecated
2700
   */
2701
  public static function isBom($utf8_chr)
2702
  {
2703
    return self::is_bom($utf8_chr);
2704
  }
2705
2706
  /**
2707
   * alias for "UTF8::is_html()"
2708
   *
2709
   * @see UTF8::is_html()
2710
   *
2711
   * @param string $str
2712
   *
2713
   * @return boolean
2714
   *
2715
   * @deprecated
2716
   */
2717
  public static function isHtml($str)
2718
  {
2719
    return self::is_html($str);
2720
  }
2721
2722
  /**
2723
   * alias for "UTF8::is_json()"
2724
   *
2725
   * @see UTF8::is_json()
2726
   *
2727
   * @param string $str
2728
   *
2729
   * @return bool
2730
   *
2731
   * @deprecated
2732
   */
2733
  public static function isJson($str)
2734
  {
2735
    return self::is_json($str);
2736
  }
2737
2738
  /**
2739
   * alias for "UTF8::is_utf16()"
2740
   *
2741
   * @see UTF8::is_utf16()
2742
   *
2743
   * @param string $str
2744
   *
2745
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2746
   *
2747
   * @deprecated
2748
   */
2749
  public static function isUtf16($str)
2750
  {
2751
    return self::is_utf16($str);
2752
  }
2753
2754
  /**
2755
   * alias for "UTF8::is_utf32()"
2756
   *
2757
   * @see UTF8::is_utf32()
2758
   *
2759
   * @param string $str
2760
   *
2761
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2762
   *
2763
   * @deprecated
2764
   */
2765
  public static function isUtf32($str)
2766
  {
2767
    return self::is_utf32($str);
2768
  }
2769
2770
  /**
2771
   * alias for "UTF8::is_utf8()"
2772
   *
2773
   * @see UTF8::is_utf8()
2774
   *
2775
   * @param string $str
2776
   * @param bool   $strict
2777
   *
2778
   * @return bool
2779
   *
2780
   * @deprecated
2781
   */
2782
  public static function isUtf8($str, $strict = false)
2783
  {
2784
    return self::is_utf8($str, $strict);
2785
  }
2786
2787
  /**
2788
   * Checks if a string is 7 bit ASCII.
2789
   *
2790
   * @param string $str <p>The string to check.</p>
2791
   *
2792
   * @return bool <p>
2793
   *              <strong>true</strong> if it is ASCII<br />
2794
   *              <strong>false</strong> otherwise
2795
   *              </p>
2796
   */
2797 53
  public static function is_ascii($str)
2798
  {
2799 53
    $str = (string)$str;
2800
2801 53
    if (!isset($str[0])) {
2802 6
      return true;
2803
    }
2804
2805 52
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2806
  }
2807
2808
  /**
2809
   * Returns true if the string is base64 encoded, false otherwise.
2810
   *
2811
   * @param string $str <p>The input string.</p>
2812
   *
2813
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2814
   */
2815 1
  public static function is_base64($str)
2816
  {
2817 1
    $str = (string)$str;
2818
2819 1
    if (!isset($str[0])) {
2820 1
      return false;
2821
    }
2822
2823 1
    $base64String = (string)base64_decode($str, true);
2824 1
    if ($base64String && base64_encode($base64String) === $str) {
2825 1
      return true;
2826
    }
2827
2828 1
    return false;
2829
  }
2830
2831
  /**
2832
   * Check if the input is binary... (is look like a hack).
2833
   *
2834
   * @param mixed $input
2835
   *
2836
   * @return bool
2837
   */
2838 16
  public static function is_binary($input)
2839
  {
2840 16
    $input = (string)$input;
2841
2842 16
    if (!isset($input[0])) {
2843 4
      return false;
2844
    }
2845
2846 16
    if (preg_match('~^[01]+$~', $input)) {
2847 4
      return true;
2848
    }
2849
2850 16
    $testLength = strlen($input);
2851 16
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2852 5
      return true;
2853
    }
2854
2855 15
    if (substr_count($input, "\x00") > 0) {
2856 1
      return true;
2857
    }
2858
2859 15
    return false;
2860
  }
2861
2862
  /**
2863
   * Check if the file is binary.
2864
   *
2865
   * @param string $file
2866
   *
2867
   * @return boolean
2868
   */
2869
  public static function is_binary_file($file)
2870
  {
2871
    try {
2872
      $fp = fopen($file, 'rb');
2873
      $block = fread($fp, 512);
2874
      fclose($fp);
2875
    } catch (\Exception $e) {
2876
      $block = '';
2877
    }
2878
2879
    return self::is_binary($block);
2880
  }
2881
2882
  /**
2883
   * Checks if the given string is equal to any "Byte Order Mark".
2884
   *
2885
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2886
   *
2887
   * @param string $str <p>The input string.</p>
2888
   *
2889
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2890
   */
2891 1
  public static function is_bom($str)
2892
  {
2893 1
    foreach (self::$BOM as $bomString => $bomByteLength) {
2894 1
      if ($str === $bomString) {
2895 1
        return true;
2896
      }
2897 1
    }
2898
2899 1
    return false;
2900
  }
2901
2902
  /**
2903
   * Check if the string contains any html-tags <lall>.
2904
   *
2905
   * @param string $str <p>The input string.</p>
2906
   *
2907
   * @return boolean
2908
   */
2909 1
  public static function is_html($str)
2910
  {
2911 1
    $str = (string)$str;
2912
2913 1
    if (!isset($str[0])) {
2914 1
      return false;
2915
    }
2916
2917
    // init
2918 1
    $matches = array();
2919
2920 1
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2921
2922 1
    if (count($matches) === 0) {
2923 1
      return false;
2924
    }
2925
2926 1
    return true;
2927
  }
2928
2929
  /**
2930
   * Try to check if "$str" is an json-string.
2931
   *
2932
   * @param string $str <p>The input string.</p>
2933
   *
2934
   * @return bool
2935
   */
2936 1
  public static function is_json($str)
2937
  {
2938 1
    $str = (string)$str;
2939
2940 1
    if (!isset($str[0])) {
2941
      return false;
2942
    }
2943
2944 1
    $json = self::json_decode($str);
2945
2946
    if (
2947
        (
2948 1
            is_object($json) === true
2949 1
            ||
2950 1
            is_array($json) === true
2951 1
        )
2952 1
        &&
2953 1
        json_last_error() === JSON_ERROR_NONE
2954 1
    ) {
2955 1
      return true;
2956
    }
2957
2958 1
    return false;
2959
  }
2960
2961
  /**
2962
   * Check if the string is UTF-16.
2963
   *
2964
   * @param string $str <p>The input string.</p>
2965
   *
2966
   * @return int|false <p>
2967
   *                   <strong>false</strong> if is't not UTF-16,<br />
2968
   *                   <strong>1</strong> for UTF-16LE,<br />
2969
   *                   <strong>2</strong> for UTF-16BE.
2970
   *                   </p>
2971
   */
2972 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2973
  {
2974 5
    $str = self::remove_bom($str);
2975
2976 5
    if (self::is_binary($str) === true) {
2977
2978 5
      $maybeUTF16LE = 0;
2979 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2980 5
      if ($test) {
2981 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2982 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2983 5
        if ($test3 === $test) {
2984 5
          $strChars = self::count_chars($str, true);
2985 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2986 4
            if (in_array($test3char, $strChars, true) === true) {
2987 2
              $maybeUTF16LE++;
2988 2
            }
2989 5
          }
2990 5
        }
2991 5
      }
2992
2993 5
      $maybeUTF16BE = 0;
2994 5
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
2995 5
      if ($test) {
2996 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
2997 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
2998 5
        if ($test3 === $test) {
2999 5
          $strChars = self::count_chars($str, true);
3000 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3001 4
            if (in_array($test3char, $strChars, true) === true) {
3002 3
              $maybeUTF16BE++;
3003 3
            }
3004 5
          }
3005 5
        }
3006 5
      }
3007
3008 5
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3009 3
        if ($maybeUTF16LE > $maybeUTF16BE) {
3010 2
          return 1;
3011
        }
3012
3013 3
        return 2;
3014
      }
3015
3016 3
    }
3017
3018 3
    return false;
3019
  }
3020
3021
  /**
3022
   * Check if the string is UTF-32.
3023
   *
3024
   * @param string $str
3025
   *
3026
   * @return int|false <p>
3027
   *                   <strong>false</strong> if is't not UTF-32,<br />
3028
   *                   <strong>1</strong> for UTF-32LE,<br />
3029
   *                   <strong>2</strong> for UTF-32BE.
3030
   *                   </p>
3031
   */
3032 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3033
  {
3034 3
    $str = self::remove_bom($str);
3035
3036 3
    if (self::is_binary($str) === true) {
3037
3038 3
      $maybeUTF32LE = 0;
3039 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3040 3
      if ($test) {
3041 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3042 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3043 2
        if ($test3 === $test) {
3044 2
          $strChars = self::count_chars($str, true);
3045 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3046 2
            if (in_array($test3char, $strChars, true) === true) {
3047 1
              $maybeUTF32LE++;
3048 1
            }
3049 2
          }
3050 2
        }
3051 2
      }
3052
3053 3
      $maybeUTF32BE = 0;
3054 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3055 3
      if ($test) {
3056 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3057 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3058 2
        if ($test3 === $test) {
3059 2
          $strChars = self::count_chars($str, true);
3060 2
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3061 2
            if (in_array($test3char, $strChars, true) === true) {
3062 1
              $maybeUTF32BE++;
3063 1
            }
3064 2
          }
3065 2
        }
3066 2
      }
3067
3068 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3069 1
        if ($maybeUTF32LE > $maybeUTF32BE) {
3070 1
          return 1;
3071
        }
3072
3073 1
        return 2;
3074
      }
3075
3076 3
    }
3077
3078 3
    return false;
3079
  }
3080
3081
  /**
3082
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3083
   *
3084
   * @see    http://hsivonen.iki.fi/php-utf8/
3085
   *
3086
   * @param string $str    <p>The string to be checked.</p>
3087
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3088
   *
3089
   * @return bool
3090
   */
3091 60
  public static function is_utf8($str, $strict = false)
3092
  {
3093 60
    $str = (string)$str;
3094
3095 60
    if (!isset($str[0])) {
3096 3
      return true;
3097
    }
3098
3099 58
    if ($strict === true) {
3100 1
      if (self::is_utf16($str) !== false) {
3101 1
        return false;
3102
      }
3103
3104
      if (self::is_utf32($str) !== false) {
3105
        return false;
3106
      }
3107
    }
3108
3109 58
    if (self::pcre_utf8_support() !== true) {
3110
3111
      // If even just the first character can be matched, when the /u
3112
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3113
      // invalid, nothing at all will match, even if the string contains
3114
      // some valid sequences
3115
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3116
    }
3117
3118 58
    $mState = 0; // cached expected number of octets after the current octet
3119
    // until the beginning of the next UTF8 character sequence
3120 58
    $mUcs4 = 0; // cached Unicode character
3121 58
    $mBytes = 1; // cached expected number of octets in the current sequence
3122
3123 58
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3124
      self::checkForSupport();
3125
    }
3126
3127 58 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3128
      $len = \mb_strlen($str, '8BIT');
3129
    } else {
3130 58
      $len = strlen($str);
3131
    }
3132
3133
    /** @noinspection ForeachInvariantsInspection */
3134 58
    for ($i = 0; $i < $len; $i++) {
3135 58
      $in = ord($str[$i]);
3136 58
      if ($mState === 0) {
3137
        // When mState is zero we expect either a US-ASCII character or a
3138
        // multi-octet sequence.
3139 58
        if (0 === (0x80 & $in)) {
3140
          // US-ASCII, pass straight through.
3141 52
          $mBytes = 1;
3142 58 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3143
          // First octet of 2 octet sequence.
3144 48
          $mUcs4 = $in;
3145 48
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3146 48
          $mState = 1;
3147 48
          $mBytes = 2;
3148 55
        } elseif (0xE0 === (0xF0 & $in)) {
3149
          // First octet of 3 octet sequence.
3150 29
          $mUcs4 = $in;
3151 29
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3152 29
          $mState = 2;
3153 29
          $mBytes = 3;
3154 46 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3155
          // First octet of 4 octet sequence.
3156 11
          $mUcs4 = $in;
3157 11
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3158 11
          $mState = 3;
3159 11
          $mBytes = 4;
3160 22
        } elseif (0xF8 === (0xFC & $in)) {
3161
          /* First octet of 5 octet sequence.
3162
          *
3163
          * This is illegal because the encoded codepoint must be either
3164
          * (a) not the shortest form or
3165
          * (b) outside the Unicode range of 0-0x10FFFF.
3166
          * Rather than trying to resynchronize, we will carry on until the end
3167
          * of the sequence and let the later error handling code catch it.
3168
          */
3169 4
          $mUcs4 = $in;
3170 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3171 4
          $mState = 4;
3172 4
          $mBytes = 5;
3173 12 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3174
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3175 4
          $mUcs4 = $in;
3176 4
          $mUcs4 = ($mUcs4 & 1) << 30;
3177 4
          $mState = 5;
3178 4
          $mBytes = 6;
3179 4
        } else {
3180
          /* Current octet is neither in the US-ASCII range nor a legal first
3181
           * octet of a multi-octet sequence.
3182
           */
3183 6
          return false;
3184
        }
3185 57
      } else {
3186
        // When mState is non-zero, we expect a continuation of the multi-octet
3187
        // sequence
3188 52
        if (0x80 === (0xC0 & $in)) {
3189
          // Legal continuation.
3190 48
          $shift = ($mState - 1) * 6;
3191 48
          $tmp = $in;
3192 48
          $tmp = ($tmp & 0x0000003F) << $shift;
3193 48
          $mUcs4 |= $tmp;
3194
          /**
3195
           * End of the multi-octet sequence. mUcs4 now contains the final
3196
           * Unicode code point to be output
3197
           */
3198 48
          if (0 === --$mState) {
3199
            /*
3200
            * Check for illegal sequences and code points.
3201
            */
3202
            // From Unicode 3.1, non-shortest form is illegal
3203
            if (
3204 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3205 48
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3206 48
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3207 48
                (4 < $mBytes) ||
3208
                // From Unicode 3.2, surrogate characters are illegal.
3209 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3210
                // Code points outside the Unicode range are illegal.
3211 48
                ($mUcs4 > 0x10FFFF)
3212 48
            ) {
3213 7
              return false;
3214
            }
3215
            // initialize UTF8 cache
3216 48
            $mState = 0;
3217 48
            $mUcs4 = 0;
3218 48
            $mBytes = 1;
3219 48
          }
3220 48
        } else {
3221
          /**
3222
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3223
           * Incomplete multi-octet sequence.
3224
           */
3225 26
          return false;
3226
        }
3227
      }
3228 57
    }
3229
3230 27
    return true;
3231
  }
3232
3233
  /**
3234
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3235
   * Decodes a JSON string
3236
   *
3237
   * @link http://php.net/manual/en/function.json-decode.php
3238
   *
3239
   * @param string $json    <p>
3240
   *                        The <i>json</i> string being decoded.
3241
   *                        </p>
3242
   *                        <p>
3243
   *                        This function only works with UTF-8 encoded strings.
3244
   *                        </p>
3245
   *                        <p>PHP implements a superset of
3246
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3247
   *                        only supports these values when they are nested inside an array or an object.
3248
   *                        </p>
3249
   * @param bool   $assoc   [optional] <p>
3250
   *                        When <b>TRUE</b>, returned objects will be converted into
3251
   *                        associative arrays.
3252
   *                        </p>
3253
   * @param int    $depth   [optional] <p>
3254
   *                        User specified recursion depth.
3255
   *                        </p>
3256
   * @param int    $options [optional] <p>
3257
   *                        Bitmask of JSON decode options. Currently only
3258
   *                        <b>JSON_BIGINT_AS_STRING</b>
3259
   *                        is supported (default is to cast large integers as floats)
3260
   *                        </p>
3261
   *
3262
   * @return mixed the value encoded in <i>json</i> in appropriate
3263
   * PHP type. Values true, false and
3264
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3265
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3266
   * <i>json</i> cannot be decoded or if the encoded
3267
   * data is deeper than the recursion limit.
3268
   */
3269 2 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3270
  {
3271 2
    $json = (string)self::filter($json);
3272
3273 2
    if (Bootup::is_php('5.4') === true) {
3274
      $json = json_decode($json, $assoc, $depth, $options);
3275
    } else {
3276 2
      $json = json_decode($json, $assoc, $depth);
3277
    }
3278
3279 2
    return $json;
3280
  }
3281
3282
  /**
3283
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3284
   * Returns the JSON representation of a value.
3285
   *
3286
   * @link http://php.net/manual/en/function.json-encode.php
3287
   *
3288
   * @param mixed $value   <p>
3289
   *                       The <i>value</i> being encoded. Can be any type except
3290
   *                       a resource.
3291
   *                       </p>
3292
   *                       <p>
3293
   *                       All string data must be UTF-8 encoded.
3294
   *                       </p>
3295
   *                       <p>PHP implements a superset of
3296
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3297
   *                       only supports these values when they are nested inside an array or an object.
3298
   *                       </p>
3299
   * @param int   $options [optional] <p>
3300
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3301
   *                       <b>JSON_HEX_TAG</b>,
3302
   *                       <b>JSON_HEX_AMP</b>,
3303
   *                       <b>JSON_HEX_APOS</b>,
3304
   *                       <b>JSON_NUMERIC_CHECK</b>,
3305
   *                       <b>JSON_PRETTY_PRINT</b>,
3306
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3307
   *                       <b>JSON_FORCE_OBJECT</b>,
3308
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3309
   *                       constants is described on
3310
   *                       the JSON constants page.
3311
   *                       </p>
3312
   * @param int   $depth   [optional] <p>
3313
   *                       Set the maximum depth. Must be greater than zero.
3314
   *                       </p>
3315
   *
3316
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3317
   */
3318 2 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3319
  {
3320 2
    $value = self::filter($value);
3321
3322 2
    if (Bootup::is_php('5.5') === true) {
3323
      $json = json_encode($value, $options, $depth);
3324
    } else {
3325 2
      $json = json_encode($value, $options);
3326
    }
3327
3328 2
    return $json;
3329
  }
3330
3331
  /**
3332
   * Makes string's first char lowercase.
3333
   *
3334
   * @param string $str <p>The input string</p>
3335
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3336
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3337
   *
3338
   * @return string <p>The resulting string</p>
3339
   */
3340 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3341
  {
3342 7
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3343 7
    if ($strPartTwo === false) {
3344
      $strPartTwo = '';
3345
    }
3346
3347 7
    $strPartOne = self::strtolower(
3348 7
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3349 7
        $encoding,
3350
        $cleanUtf8
3351 7
    );
3352
3353 7
    return $strPartOne . $strPartTwo;
3354
  }
3355
3356
  /**
3357
   * alias for "UTF8::lcfirst()"
3358
   *
3359
   * @see UTF8::lcfirst()
3360
   *
3361
   * @param string  $word
3362
   * @param string  $encoding
3363
   * @param boolean $cleanUtf8
3364
   *
3365
   * @return string
3366
   */
3367 1
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3368
  {
3369 1
    return self::lcfirst($word, $encoding, $cleanUtf8);
3370
  }
3371
3372
  /**
3373
   * Lowercase for all words in the string.
3374
   *
3375
   * @param string   $str        <p>The input string.</p>
3376
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3377
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3378
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3379
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3380
   *
3381
   * @return string
3382
   */
3383 1
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3384
  {
3385 1
    if (!$str) {
3386 1
      return '';
3387
    }
3388
3389 1
    $words = self::str_to_words($str, $charlist);
3390 1
    $newWords = array();
3391
3392 1
    if (count($exceptions) > 0) {
3393 1
      $useExceptions = true;
3394 1
    } else {
3395 1
      $useExceptions = false;
3396
    }
3397
3398 1 View Code Duplication
    foreach ($words as $word) {
3399
3400 1
      if (!$word) {
3401 1
        continue;
3402
      }
3403
3404
      if (
3405
          $useExceptions === false
3406 1
          ||
3407
          (
3408
              $useExceptions === true
3409 1
              &&
3410 1
              !in_array($word, $exceptions, true)
3411 1
          )
3412 1
      ) {
3413 1
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3414 1
      }
3415
3416 1
      $newWords[] = $word;
3417 1
    }
3418
3419 1
    return implode('', $newWords);
3420
  }
3421
3422
  /**
3423
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3424
   *
3425
   * @param string $str   <p>The string to be trimmed</p>
3426
   * @param string $chars <p>Optional characters to be stripped</p>
3427
   *
3428
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3429
   */
3430 24 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3431
  {
3432 24
    $str = (string)$str;
3433
3434 24
    if (!isset($str[0])) {
3435 2
      return '';
3436
    }
3437
3438
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3439 23
    if ($chars === INF || !$chars) {
3440 2
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3441
    }
3442
3443 23
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3444
  }
3445
3446
  /**
3447
   * Returns the UTF-8 character with the maximum code point in the given data.
3448
   *
3449
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3450
   *
3451
   * @return string <p>The character with the highest code point than others.</p>
3452
   */
3453 1 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3454
  {
3455 1
    if (is_array($arg) === true) {
3456 1
      $arg = implode('', $arg);
3457 1
    }
3458
3459 1
    return self::chr(max(self::codepoints($arg)));
3460
  }
3461
3462
  /**
3463
   * Calculates and returns the maximum number of bytes taken by any
3464
   * UTF-8 encoded character in the given string.
3465
   *
3466
   * @param string $str <p>The original Unicode string.</p>
3467
   *
3468
   * @return int <p>Max byte lengths of the given chars.</p>
3469
   */
3470 1
  public static function max_chr_width($str)
3471
  {
3472 1
    $bytes = self::chr_size_list($str);
3473 1
    if (count($bytes) > 0) {
3474 1
      return (int)max($bytes);
3475
    }
3476
3477 1
    return 0;
3478
  }
3479
3480
  /**
3481
   * Checks whether mbstring is available on the server.
3482
   *
3483
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3484
   */
3485 15
  public static function mbstring_loaded()
3486
  {
3487 15
    $return = extension_loaded('mbstring') ? true : false;
3488
3489 15
    if ($return === true) {
3490 15
      \mb_internal_encoding('UTF-8');
3491 15
    }
3492
3493 15
    return $return;
3494
  }
3495
3496
  /**
3497
   * Returns the UTF-8 character with the minimum code point in the given data.
3498
   *
3499
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3500
   *
3501
   * @return string <p>The character with the lowest code point than others.</p>
3502
   */
3503 1 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3504
  {
3505 1
    if (is_array($arg) === true) {
3506 1
      $arg = implode('', $arg);
3507 1
    }
3508
3509 1
    return self::chr(min(self::codepoints($arg)));
3510
  }
3511
3512
  /**
3513
   * alias for "UTF8::normalize_encoding()"
3514
   *
3515
   * @see UTF8::normalize_encoding()
3516
   *
3517
   * @param string $encoding
3518
   * @param mixed  $fallback
3519
   *
3520
   * @return string
3521
   *
3522
   * @deprecated
3523
   */
3524
  public static function normalizeEncoding($encoding, $fallback = false)
3525
  {
3526
    return self::normalize_encoding($encoding, $fallback);
3527
  }
3528
3529
  /**
3530
   * Normalize the encoding-"name" input.
3531
   *
3532
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3533
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3534
   *
3535
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3536
   */
3537 77
  public static function normalize_encoding($encoding, $fallback = false)
3538
  {
3539 77
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3540
3541 77
    if (!$encoding) {
3542 3
      return $fallback;
3543
    }
3544
3545 76
    if ('UTF-8' === $encoding) {
3546
      return $encoding;
3547
    }
3548
3549 76
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3550 6
      return $encoding;
3551
    }
3552
3553 75
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3554 74
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3555
    }
3556
3557 4
    $encodingOrig = $encoding;
3558 4
    $encoding = strtoupper($encoding);
3559 4
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3560
3561
    $equivalences = array(
3562 4
        'ISO88591'    => 'ISO-8859-1',
3563 4
        'ISO8859'     => 'ISO-8859-1',
3564 4
        'ISO'         => 'ISO-8859-1',
3565 4
        'LATIN1'      => 'ISO-8859-1',
3566 4
        'LATIN'       => 'ISO-8859-1',
3567 4
        'WIN1252'     => 'ISO-8859-1',
3568 4
        'WINDOWS1252' => 'ISO-8859-1',
3569 4
        'UTF16'       => 'UTF-16',
3570 4
        'UTF32'       => 'UTF-32',
3571 4
        'UTF8'        => 'UTF-8',
3572 4
        'UTF'         => 'UTF-8',
3573 4
        'UTF7'        => 'UTF-7',
3574 4
        '8BIT'        => 'CP850',
3575 4
        'BINARY'      => 'CP850',
3576 4
    );
3577
3578 4
    if (!empty($equivalences[$encodingUpperHelper])) {
3579 4
      $encoding = $equivalences[$encodingUpperHelper];
3580 4
    }
3581
3582 4
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3583
3584 4
    return $encoding;
3585
  }
3586
3587
  /**
3588
   * Normalize some MS Word special characters.
3589
   *
3590
   * @param string $str <p>The string to be normalized.</p>
3591
   *
3592
   * @return string
3593
   */
3594 16 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3595
  {
3596 16
    $str = (string)$str;
3597
3598 16
    if (!isset($str[0])) {
3599 1
      return '';
3600
    }
3601
3602 16
    static $UTF8_MSWORD_KEYS_CACHE = null;
3603 16
    static $UTF8_MSWORD_VALUES_CACHE = null;
3604
3605 16
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3606 1
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3607 1
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3608 1
    }
3609
3610 16
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3611
  }
3612
3613
  /**
3614
   * Normalize the whitespace.
3615
   *
3616
   * @param string $str                     <p>The string to be normalized.</p>
3617
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3618
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3619
   *                                        bidirectional text chars.</p>
3620
   *
3621
   * @return string
3622
   */
3623 37
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3624
  {
3625 37
    $str = (string)$str;
3626
3627 37
    if (!isset($str[0])) {
3628 4
      return '';
3629
    }
3630
3631 37
    static $WHITESPACE_CACHE = array();
3632 37
    $cacheKey = (int)$keepNonBreakingSpace;
3633
3634 37
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3635
3636 2
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3637
3638 2
      if ($keepNonBreakingSpace === true) {
3639
        /** @noinspection OffsetOperationsInspection */
3640 1
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3641 1
      }
3642
3643 2
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3644 2
    }
3645
3646 37
    if ($keepBidiUnicodeControls === false) {
3647 37
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3648
3649 37
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3650 1
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3651 1
      }
3652
3653 37
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3654 37
    }
3655
3656 37
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3657
  }
3658
3659
  /**
3660
   * Strip all whitespace characters. This includes tabs and newline
3661
   * characters, as well as multibyte whitespace such as the thin space
3662
   * and ideographic space.
3663
   *
3664
   * @param string $str
3665
   *
3666
   * @return string
3667
   */
3668 12
  public static function strip_whitespace($str)
3669
  {
3670 12
    $str = (string)$str;
3671
3672 12
    if (!isset($str[0])) {
3673 1
      return '';
3674
    }
3675
3676 11
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3677
  }
3678
3679
  /**
3680
   * Format a number with grouped thousands.
3681
   *
3682
   * @param float  $number
3683
   * @param int    $decimals
3684
   * @param string $dec_point
3685
   * @param string $thousands_sep
3686
   *
3687
   * @return string
3688
   *    *
3689
   * @deprecated Because this has nothing to do with UTF8. :/
3690
   */
3691
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3692
  {
3693
    $thousands_sep = (string)$thousands_sep;
3694
    $dec_point = (string)$dec_point;
3695
    $number = (float)$number;
3696
3697
    if (
3698
        isset($thousands_sep[1], $dec_point[1])
3699
        &&
3700
        Bootup::is_php('5.4') === true
3701
    ) {
3702
      return str_replace(
3703
          array(
3704
              '.',
3705
              ',',
3706
          ),
3707
          array(
3708
              $dec_point,
3709
              $thousands_sep,
3710
          ),
3711
          number_format($number, $decimals, '.', ',')
3712
      );
3713
    }
3714
3715
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3716
  }
3717
3718
  /**
3719
   * Calculates Unicode code point of the given UTF-8 encoded character.
3720
   *
3721
   * INFO: opposite to UTF8::chr()
3722
   *
3723
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3724
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3725
   *
3726
   * @return int <p>
3727
   *             Unicode code point of the given character,<br />
3728
   *             0 on invalid UTF-8 byte sequence.
3729
   *             </p>
3730
   */
3731 23
  public static function ord($chr, $encoding = 'UTF-8')
3732
  {
3733
3734 23
    if ($encoding !== 'UTF-8') {
3735 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3736
3737
      // check again, if it's still not UTF-8
3738
      /** @noinspection NotOptimalIfConditionsInspection */
3739 1
      if ($encoding !== 'UTF-8') {
3740 1
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3741 1
      }
3742 1
    }
3743
3744 23
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3745
      self::checkForSupport();
3746
    }
3747
3748 23
    if (self::$SUPPORT['intlChar'] === true) {
3749
      $tmpReturn = \IntlChar::ord($chr);
3750
      if ($tmpReturn) {
3751
        return $tmpReturn;
3752
      }
3753
    }
3754
3755
    // use static cache, if there is no support for "\IntlChar"
3756 23
    static $CHAR_CACHE = array();
3757 23
    if (isset($CHAR_CACHE[$chr]) === true) {
3758 23
      return $CHAR_CACHE[$chr];
3759
    }
3760
3761 10
    $chr_orig = $chr;
3762
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3763 10
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3764 10
    $code = $chr ? $chr[1] : 0;
3765
3766 10
    if (0xF0 <= $code && isset($chr[4])) {
3767 1
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3768
    }
3769
3770 10
    if (0xE0 <= $code && isset($chr[3])) {
3771 4
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3772
    }
3773
3774 10
    if (0xC0 <= $code && isset($chr[2])) {
3775 6
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3776
    }
3777
3778 10
    return $CHAR_CACHE[$chr_orig] = $code;
3779
  }
3780
3781
  /**
3782
   * Parses the string into an array (into the the second parameter).
3783
   *
3784
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3785
   *          if the second parameter is not set!
3786
   *
3787
   * @link http://php.net/manual/en/function.parse-str.php
3788
   *
3789
   * @param string  $str       <p>The input string.</p>
3790
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3791
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3792
   *
3793
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3794
   */
3795 1
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3796
  {
3797 1
    if ($cleanUtf8 === true) {
3798 1
      $str = self::clean($str);
3799 1
    }
3800
3801
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3802 1
    $return = \mb_parse_str($str, $result);
3803 1
    if ($return === false || empty($result)) {
3804 1
      return false;
3805
    }
3806
3807 1
    return true;
3808
  }
3809
3810
  /**
3811
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3812
   *
3813
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3814
   */
3815 58
  public static function pcre_utf8_support()
3816
  {
3817
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3818
    /** @noinspection UsageOfSilenceOperatorInspection */
3819 58
    return (bool)@preg_match('//u', '');
3820
  }
3821
3822
  /**
3823
   * Create an array containing a range of UTF-8 characters.
3824
   *
3825
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3826
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3827
   *
3828
   * @return array
3829
   */
3830 1
  public static function range($var1, $var2)
3831
  {
3832 1
    if (!$var1 || !$var2) {
3833 1
      return array();
3834
    }
3835
3836 1 View Code Duplication
    if (ctype_digit((string)$var1)) {
3837 1
      $start = (int)$var1;
3838 1
    } elseif (ctype_xdigit($var1)) {
3839
      $start = (int)self::hex_to_int($var1);
3840
    } else {
3841 1
      $start = self::ord($var1);
3842
    }
3843
3844 1
    if (!$start) {
3845
      return array();
3846
    }
3847
3848 1 View Code Duplication
    if (ctype_digit((string)$var2)) {
3849 1
      $end = (int)$var2;
3850 1
    } elseif (ctype_xdigit($var2)) {
3851
      $end = (int)self::hex_to_int($var2);
3852
    } else {
3853 1
      $end = self::ord($var2);
3854
    }
3855
3856 1
    if (!$end) {
3857
      return array();
3858
    }
3859
3860 1
    return array_map(
3861
        array(
3862 1
            '\\voku\\helper\\UTF8',
3863 1
            'chr',
3864 1
        ),
3865 1
        range($start, $end)
3866 1
    );
3867
  }
3868
3869
  /**
3870
   * Multi decode html entity & fix urlencoded-win1252-chars.
3871
   *
3872
   * e.g:
3873
   * 'test+test'                     => 'test+test'
3874
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3875
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3876
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3877
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3878
   * 'Düsseldorf'                   => 'Düsseldorf'
3879
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3880
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3881
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3882
   *
3883
   * @param string $str          <p>The input string.</p>
3884
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3885
   *
3886
   * @return string
3887
   */
3888 2 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3889
  {
3890 2
    $str = (string)$str;
3891
3892 2
    if (!isset($str[0])) {
3893 1
      return '';
3894
    }
3895
3896 2
    $pattern = '/%u([0-9a-f]{3,4})/i';
3897 2
    if (preg_match($pattern, $str)) {
3898 1
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3899 1
    }
3900
3901 2
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3902
3903
    do {
3904 2
      $str_compare = $str;
3905
3906 2
      $str = self::fix_simple_utf8(
3907 2
          rawurldecode(
3908 2
              self::html_entity_decode(
3909 2
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3910
                  $flags
3911 2
              )
3912 2
          )
3913 2
      );
3914
3915 2
    } while ($multi_decode === true && $str_compare !== $str);
3916
3917 2
    return (string)$str;
3918
  }
3919
3920
  /**
3921
   * alias for "UTF8::remove_bom()"
3922
   *
3923
   * @see UTF8::remove_bom()
3924
   *
3925
   * @param string $str
3926
   *
3927
   * @return string
3928
   *
3929
   * @deprecated
3930
   */
3931
  public static function removeBOM($str)
3932
  {
3933
    return self::remove_bom($str);
3934
  }
3935
3936
  /**
3937
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3938
   *
3939
   * @param string $str <p>The input string.</p>
3940
   *
3941
   * @return string <p>String without UTF-BOM</p>
3942
   */
3943 40
  public static function remove_bom($str)
3944
  {
3945 40
    $str = (string)$str;
3946
3947 40
    if (!isset($str[0])) {
3948 5
      return '';
3949
    }
3950
3951 40
    foreach (self::$BOM as $bomString => $bomByteLength) {
3952 40
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
3953 5
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
3954 5
        if ($strTmp === false) {
3955
          $strTmp = '';
3956
        }
3957 5
        $str = (string)$strTmp;
3958 5
      }
3959 40
    }
3960
3961 40
    return $str;
3962
  }
3963
3964
  /**
3965
   * Removes duplicate occurrences of a string in another string.
3966
   *
3967
   * @param string          $str  <p>The base string.</p>
3968
   * @param string|string[] $what <p>String to search for in the base string.</p>
3969
   *
3970
   * @return string <p>The result string with removed duplicates.</p>
3971
   */
3972 1
  public static function remove_duplicates($str, $what = ' ')
3973
  {
3974 1
    if (is_string($what) === true) {
3975 1
      $what = array($what);
3976 1
    }
3977
3978 1
    if (is_array($what) === true) {
3979
      /** @noinspection ForeachSourceInspection */
3980 1
      foreach ($what as $item) {
3981 1
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
3982 1
      }
3983 1
    }
3984
3985 1
    return $str;
3986
  }
3987
3988
  /**
3989
   * Remove invisible characters from a string.
3990
   *
3991
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
3992
   *
3993
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
3994
   *
3995
   * @param string $str
3996
   * @param bool   $url_encoded
3997
   * @param string $replacement
3998
   *
3999
   * @return string
4000
   */
4001 57
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4002
  {
4003
    // init
4004 57
    $non_displayables = array();
4005
4006
    // every control character except newline (dec 10),
4007
    // carriage return (dec 13) and horizontal tab (dec 09)
4008 57
    if ($url_encoded) {
4009 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4010 57
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4011 57
    }
4012
4013 57
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4014
4015
    do {
4016 57
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4017 57
    } while ($count !== 0);
4018
4019 57
    return $str;
4020
  }
4021
4022
  /**
4023
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4024
   *
4025
   * @param string $str                <p>The input string</p>
4026
   * @param string $replacementChar    <p>The replacement character.</p>
4027
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4028
   *
4029
   * @return string
4030
   */
4031 57
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4032
  {
4033 57
    $str = (string)$str;
4034
4035 57
    if (!isset($str[0])) {
4036 4
      return '';
4037
    }
4038
4039 57
    if ($processInvalidUtf8 === true) {
4040 57
      $replacementCharHelper = $replacementChar;
4041 57
      if ($replacementChar === '') {
4042 57
        $replacementCharHelper = 'none';
4043 57
      }
4044
4045 57
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4046
        self::checkForSupport();
4047
      }
4048
4049 57
      $save = \mb_substitute_character();
4050 57
      \mb_substitute_character($replacementCharHelper);
4051
      /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4052 57
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4053 57
      \mb_substitute_character($save);
4054 57
    }
4055
4056 57
    return str_replace(
4057
        array(
4058 57
            "\xEF\xBF\xBD",
4059 57
            '�',
4060 57
        ),
4061
        array(
4062 57
            $replacementChar,
4063 57
            $replacementChar,
4064 57
        ),
4065
        $str
4066 57
    );
4067
  }
4068
4069
  /**
4070
   * Strip whitespace or other characters from end of a UTF-8 string.
4071
   *
4072
   * @param string $str   <p>The string to be trimmed.</p>
4073
   * @param string $chars <p>Optional characters to be stripped.</p>
4074
   *
4075
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4076
   */
4077 23 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4078
  {
4079 23
    $str = (string)$str;
4080
4081 23
    if (!isset($str[0])) {
4082 5
      return '';
4083
    }
4084
4085
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4086 19
    if ($chars === INF || !$chars) {
4087 3
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4088
    }
4089
4090 18
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4091
  }
4092
4093
  /**
4094
   * rxClass
4095
   *
4096
   * @param string $s
4097
   * @param string $class
4098
   *
4099
   * @return string
4100
   */
4101 60
  private static function rxClass($s, $class = '')
4102
  {
4103 60
    static $RX_CLASSS_CACHE = array();
4104
4105 60
    $cacheKey = $s . $class;
4106
4107 60
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4108 48
      return $RX_CLASSS_CACHE[$cacheKey];
4109
    }
4110
4111
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4112 19
    $class = array($class);
4113
4114
    /** @noinspection SuspiciousLoopInspection */
4115 19
    foreach (self::str_split($s) as $s) {
4116 18
      if ('-' === $s) {
4117
        $class[0] = '-' . $class[0];
4118 18
      } elseif (!isset($s[2])) {
4119 18
        $class[0] .= preg_quote($s, '/');
4120 18
      } elseif (1 === self::strlen($s)) {
4121 2
        $class[0] .= $s;
4122 2
      } else {
4123
        $class[] = $s;
4124
      }
4125 19
    }
4126
4127 19
    if ($class[0]) {
4128 19
      $class[0] = '[' . $class[0] . ']';
4129 19
    }
4130
4131 19
    if (1 === count($class)) {
4132 19
      $return = $class[0];
4133 19
    } else {
4134
      $return = '(?:' . implode('|', $class) . ')';
4135
    }
4136
4137 19
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4138
4139 19
    return $return;
4140
  }
4141
4142
  /**
4143
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4144
   */
4145
  public static function showSupport()
4146
  {
4147
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4148
      self::checkForSupport();
4149
    }
4150
4151
    foreach (self::$SUPPORT as $utf8Support) {
4152
      echo $utf8Support . "\n<br>";
4153
    }
4154
  }
4155
4156
  /**
4157
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4158
   *
4159
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4160
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4161
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4162
   *
4163
   * @return string <p>The HTML numbered entity.</p>
4164
   */
4165 1
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4166
  {
4167 1
    $char = (string)$char;
4168
4169 1
    if (!isset($char[0])) {
4170 1
      return '';
4171
    }
4172
4173
    if (
4174
        $keepAsciiChars === true
4175 1
        &&
4176 1
        self::is_ascii($char) === true
4177 1
    ) {
4178 1
      return $char;
4179
    }
4180
4181 1
    if ($encoding !== 'UTF-8') {
4182
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4183
    }
4184
4185 1
    return '&#' . self::ord($char, $encoding) . ';';
4186
  }
4187
4188
  /**
4189
   * Convert a string to an array of Unicode characters.
4190
   *
4191
   * @param string  $str       <p>The string to split into array.</p>
4192
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4193
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4194
   *
4195
   * @return string[] <p>An array containing chunks of the string.</p>
4196
   */
4197 39
  public static function split($str, $length = 1, $cleanUtf8 = false)
4198
  {
4199 39
    $str = (string)$str;
4200
4201 39
    if (!isset($str[0])) {
4202 3
      return array();
4203
    }
4204
4205
    // init
4206 38
    $ret = array();
4207
4208 38
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4209
      self::checkForSupport();
4210
    }
4211
4212 38
    if ($cleanUtf8 === true) {
4213 7
      $str = self::clean($str);
4214 7
    }
4215
4216 38
    if (self::$SUPPORT['pcre_utf8'] === true) {
4217
4218 38
      preg_match_all('/./us', $str, $retArray);
4219 38
      if (isset($retArray[0])) {
4220 38
        $ret = $retArray[0];
4221 38
      }
4222 38
      unset($retArray);
4223
4224 38
    } else {
4225
4226
      // fallback
4227
4228
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4229
        self::checkForSupport();
4230
      }
4231
4232 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4233
        $len = \mb_strlen($str, '8BIT');
4234
      } else {
4235
        $len = strlen($str);
4236
      }
4237
4238
      /** @noinspection ForeachInvariantsInspection */
4239
      for ($i = 0; $i < $len; $i++) {
4240
4241
        if (($str[$i] & "\x80") === "\x00") {
4242
4243
          $ret[] = $str[$i];
4244
4245
        } elseif (
4246
            isset($str[$i + 1])
4247
            &&
4248
            ($str[$i] & "\xE0") === "\xC0"
4249
        ) {
4250
4251
          if (($str[$i + 1] & "\xC0") === "\x80") {
4252
            $ret[] = $str[$i] . $str[$i + 1];
4253
4254
            $i++;
4255
          }
4256
4257 View Code Duplication
        } elseif (
4258
            isset($str[$i + 2])
4259
            &&
4260
            ($str[$i] & "\xF0") === "\xE0"
4261
        ) {
4262
4263
          if (
4264
              ($str[$i + 1] & "\xC0") === "\x80"
4265
              &&
4266
              ($str[$i + 2] & "\xC0") === "\x80"
4267
          ) {
4268
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4269
4270
            $i += 2;
4271
          }
4272
4273
        } elseif (
4274
            isset($str[$i + 3])
4275
            &&
4276
            ($str[$i] & "\xF8") === "\xF0"
4277
        ) {
4278
4279 View Code Duplication
          if (
4280
              ($str[$i + 1] & "\xC0") === "\x80"
4281
              &&
4282
              ($str[$i + 2] & "\xC0") === "\x80"
4283
              &&
4284
              ($str[$i + 3] & "\xC0") === "\x80"
4285
          ) {
4286
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4287
4288
            $i += 3;
4289
          }
4290
4291
        }
4292
      }
4293
    }
4294
4295 38
    if ($length > 1) {
4296 5
      $ret = array_chunk($ret, $length);
4297
4298 5
      return array_map(
4299
          function ($item) {
4300 5
            return implode('', $item);
4301 5
          }, $ret
4302 5
      );
4303
    }
4304
4305
    /** @noinspection OffsetOperationsInspection */
4306 34
    if (isset($ret[0]) && $ret[0] === '') {
4307
      return array();
4308
    }
4309
4310 34
    return $ret;
4311
  }
4312
4313
  /**
4314
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4315
   *
4316
   * @param string $str <p>The input string.</p>
4317
   *
4318
   * @return false|string <p>
4319
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br />
4320
   *                      otherwise it will return false.
4321
   *                      </p>
4322
   */
4323 12
  public static function str_detect_encoding($str)
4324
  {
4325
    //
4326
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4327
    //
4328
4329 12
    if (self::is_binary($str) === true) {
4330
4331 3
      if (self::is_utf16($str) === 1) {
4332 1
        return 'UTF-16LE';
4333
      }
4334
4335 3
      if (self::is_utf16($str) === 2) {
4336 1
        return 'UTF-16BE';
4337
      }
4338
4339 2
      if (self::is_utf32($str) === 1) {
4340
        return 'UTF-32LE';
4341
      }
4342
4343 2
      if (self::is_utf32($str) === 2) {
4344
        return 'UTF-32BE';
4345
      }
4346
4347 2
    }
4348
4349
    //
4350
    // 2.) simple check for ASCII chars
4351
    //
4352
4353 12
    if (self::is_ascii($str) === true) {
4354 3
      return 'ASCII';
4355
    }
4356
4357
    //
4358
    // 3.) simple check for UTF-8 chars
4359
    //
4360
4361 12
    if (self::is_utf8($str) === true) {
4362 9
      return 'UTF-8';
4363
    }
4364
4365
    //
4366
    // 4.) check via "\mb_detect_encoding()"
4367
    //
4368
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4369
4370
    $detectOrder = array(
4371 7
        'ISO-8859-1',
4372 7
        'ISO-8859-2',
4373 7
        'ISO-8859-3',
4374 7
        'ISO-8859-4',
4375 7
        'ISO-8859-5',
4376 7
        'ISO-8859-6',
4377 7
        'ISO-8859-7',
4378 7
        'ISO-8859-8',
4379 7
        'ISO-8859-9',
4380 7
        'ISO-8859-10',
4381 7
        'ISO-8859-13',
4382 7
        'ISO-8859-14',
4383 7
        'ISO-8859-15',
4384 7
        'ISO-8859-16',
4385 7
        'WINDOWS-1251',
4386 7
        'WINDOWS-1252',
4387 7
        'WINDOWS-1254',
4388 7
        'ISO-2022-JP',
4389 7
        'JIS',
4390 7
        'EUC-JP',
4391 7
    );
4392
4393 7
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4394 7
    if ($encoding) {
4395 7
      return $encoding;
4396
    }
4397
4398
    //
4399
    // 5.) check via "iconv()"
4400
    //
4401
4402
    $md5 = md5($str);
4403
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4404
      # INFO: //IGNORE and //TRANSLIT still throw notice
4405
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4406
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4407
        return $encodingTmp;
4408
      }
4409
    }
4410
4411
    return false;
4412
  }
4413
4414
  /**
4415
   * Check if the string ends with the given substring.
4416
   *
4417
   * @param string $haystack <p>The string to search in.</p>
4418
   * @param string $needle   <p>The substring to search for.</p>
4419
   *
4420
   * @return bool
4421
   */
4422 2
  public static function str_ends_with($haystack, $needle)
4423
  {
4424 2
    $haystack = (string)$haystack;
4425 2
    $needle = (string)$needle;
4426
4427 2
    if (!isset($haystack[0], $needle[0])) {
4428 1
      return false;
4429
    }
4430
4431 2
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4432 2
    if ($haystackSub === false) {
4433
      return false;
4434
    }
4435
4436 2
    if ($needle === $haystackSub) {
4437 2
      return true;
4438
    }
4439
4440 2
    return false;
4441
  }
4442
4443
  /**
4444
   * Check if the string ends with the given substring, case insensitive.
4445
   *
4446
   * @param string $haystack <p>The string to search in.</p>
4447
   * @param string $needle   <p>The substring to search for.</p>
4448
   *
4449
   * @return bool
4450
   */
4451 2 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4452
  {
4453 2
    $haystack = (string)$haystack;
4454 2
    $needle = (string)$needle;
4455
4456 2
    if (!isset($haystack[0], $needle[0])) {
4457 1
      return false;
4458
    }
4459
4460 2
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4461 2
      return true;
4462
    }
4463
4464 2
    return false;
4465
  }
4466
4467
  /**
4468
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4469
   *
4470
   * @link  http://php.net/manual/en/function.str-ireplace.php
4471
   *
4472
   * @param mixed $search  <p>
4473
   *                       Every replacement with search array is
4474
   *                       performed on the result of previous replacement.
4475
   *                       </p>
4476
   * @param mixed $replace <p>
4477
   *                       </p>
4478
   * @param mixed $subject <p>
4479
   *                       If subject is an array, then the search and
4480
   *                       replace is performed with every entry of
4481
   *                       subject, and the return value is an array as
4482
   *                       well.
4483
   *                       </p>
4484
   * @param int   $count   [optional] <p>
4485
   *                       The number of matched and replaced needles will
4486
   *                       be returned in count which is passed by
4487
   *                       reference.
4488
   *                       </p>
4489
   *
4490
   * @return mixed <p>A string or an array of replacements.</p>
4491
   */
4492 26
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4493
  {
4494 26
    $search = (array)$search;
4495
4496
    /** @noinspection AlterInForeachInspection */
4497 26
    foreach ($search as &$s) {
4498 26
      if ('' === $s .= '') {
4499 2
        $s = '/^(?<=.)$/';
4500 2
      } else {
4501 24
        $s = '/' . preg_quote($s, '/') . '/ui';
4502
      }
4503 26
    }
4504
4505 26
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4506 26
    $count = $replace; // used as reference parameter
4507
4508 26
    return $subject;
4509
  }
4510
4511
  /**
4512
   * Check if the string starts with the given substring, case insensitive.
4513
   *
4514
   * @param string $haystack <p>The string to search in.</p>
4515
   * @param string $needle   <p>The substring to search for.</p>
4516
   *
4517
   * @return bool
4518
   */
4519 2 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4520
  {
4521 2
    $haystack = (string)$haystack;
4522 2
    $needle = (string)$needle;
4523
4524 2
    if (!isset($haystack[0], $needle[0])) {
4525 1
      return false;
4526
    }
4527
4528 2
    if (self::stripos($haystack, $needle) === 0) {
4529 2
      return true;
4530
    }
4531
4532 2
    return false;
4533
  }
4534
4535
  /**
4536
   * Limit the number of characters in a string, but also after the next word.
4537
   *
4538
   * @param string $str
4539
   * @param int    $length
4540
   * @param string $strAddOn
4541
   *
4542
   * @return string
4543
   */
4544 1
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4545
  {
4546 1
    $str = (string)$str;
4547
4548 1
    if (!isset($str[0])) {
4549 1
      return '';
4550
    }
4551
4552 1
    $length = (int)$length;
4553
4554 1
    if (self::strlen($str) <= $length) {
4555 1
      return $str;
4556
    }
4557
4558 1
    if (self::substr($str, $length - 1, 1) === ' ') {
4559 1
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4560
    }
4561
4562 1
    $str = (string)self::substr($str, 0, $length);
4563 1
    $array = explode(' ', $str);
4564 1
    array_pop($array);
4565 1
    $new_str = implode(' ', $array);
4566
4567 1
    if ($new_str === '') {
4568 1
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4569 1
    } else {
4570 1
      $str = $new_str . $strAddOn;
4571
    }
4572
4573 1
    return $str;
4574
  }
4575
4576
  /**
4577
   * Pad a UTF-8 string to given length with another string.
4578
   *
4579
   * @param string $str        <p>The input string.</p>
4580
   * @param int    $pad_length <p>The length of return string.</p>
4581
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4582
   * @param int    $pad_type   [optional] <p>
4583
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4584
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4585
   *                           </p>
4586
   *
4587
   * @return string <strong>Returns the padded string</strong>
4588
   */
4589 2
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4590
  {
4591 2
    $str_length = self::strlen($str);
4592
4593
    if (
4594 2
        is_int($pad_length) === true
4595 2
        &&
4596
        $pad_length > 0
4597 2
        &&
4598
        $pad_length >= $str_length
4599 2
    ) {
4600 2
      $ps_length = self::strlen($pad_string);
4601
4602 2
      $diff = $pad_length - $str_length;
4603
4604
      switch ($pad_type) {
4605 2 View Code Duplication
        case STR_PAD_LEFT:
4606 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4607 2
          $pre = (string)self::substr($pre, 0, $diff);
4608 2
          $post = '';
4609 2
          break;
4610
4611 2
        case STR_PAD_BOTH:
4612 2
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4613 2
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4614 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4615 2
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4616 2
          break;
4617
4618 2
        case STR_PAD_RIGHT:
4619 2 View Code Duplication
        default:
4620 2
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4621 2
          $post = (string)self::substr($post, 0, $diff);
4622 2
          $pre = '';
4623 2
      }
4624
4625 2
      return $pre . $str . $post;
4626
    }
4627
4628 2
    return $str;
4629
  }
4630
4631
  /**
4632
   * Repeat a string.
4633
   *
4634
   * @param string $str        <p>
4635
   *                           The string to be repeated.
4636
   *                           </p>
4637
   * @param int    $multiplier <p>
4638
   *                           Number of time the input string should be
4639
   *                           repeated.
4640
   *                           </p>
4641
   *                           <p>
4642
   *                           multiplier has to be greater than or equal to 0.
4643
   *                           If the multiplier is set to 0, the function
4644
   *                           will return an empty string.
4645
   *                           </p>
4646
   *
4647
   * @return string <p>The repeated string.</p>
4648
   */
4649 1
  public static function str_repeat($str, $multiplier)
4650
  {
4651 1
    $str = self::filter($str);
4652
4653 1
    return str_repeat($str, $multiplier);
4654
  }
4655
4656
  /**
4657
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4658
   *
4659
   * Replace all occurrences of the search string with the replacement string
4660
   *
4661
   * @link http://php.net/manual/en/function.str-replace.php
4662
   *
4663
   * @param mixed $search  <p>
4664
   *                       The value being searched for, otherwise known as the needle.
4665
   *                       An array may be used to designate multiple needles.
4666
   *                       </p>
4667
   * @param mixed $replace <p>
4668
   *                       The replacement value that replaces found search
4669
   *                       values. An array may be used to designate multiple replacements.
4670
   *                       </p>
4671
   * @param mixed $subject <p>
4672
   *                       The string or array being searched and replaced on,
4673
   *                       otherwise known as the haystack.
4674
   *                       </p>
4675
   *                       <p>
4676
   *                       If subject is an array, then the search and
4677
   *                       replace is performed with every entry of
4678
   *                       subject, and the return value is an array as
4679
   *                       well.
4680
   *                       </p>
4681
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4682
   *
4683
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4684
   */
4685 12
  public static function str_replace($search, $replace, $subject, &$count = null)
4686
  {
4687 12
    return str_replace($search, $replace, $subject, $count);
4688
  }
4689
4690
  /**
4691
   * Replace the first "$search"-term with the "$replace"-term.
4692
   *
4693
   * @param string $search
4694
   * @param string $replace
4695
   * @param string $subject
4696
   *
4697
   * @return string
4698
   */
4699 1
  public static function str_replace_first($search, $replace, $subject)
4700
  {
4701 1
    $pos = self::strpos($subject, $search);
4702
4703 1
    if ($pos !== false) {
4704 1
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4705
    }
4706
4707 1
    return $subject;
4708
  }
4709
4710
  /**
4711
   * Shuffles all the characters in the string.
4712
   *
4713
   * @param string $str <p>The input string</p>
4714
   *
4715
   * @return string <p>The shuffled string.</p>
4716
   */
4717 1
  public static function str_shuffle($str)
4718
  {
4719 1
    $array = self::split($str);
4720
4721 1
    shuffle($array);
4722
4723 1
    return implode('', $array);
4724
  }
4725
4726
  /**
4727
   * Sort all characters according to code points.
4728
   *
4729
   * @param string $str    <p>A UTF-8 string.</p>
4730
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4731
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4732
   *
4733
   * @return string <p>String of sorted characters.</p>
4734
   */
4735 1
  public static function str_sort($str, $unique = false, $desc = false)
4736
  {
4737 1
    $array = self::codepoints($str);
4738
4739 1
    if ($unique) {
4740 1
      $array = array_flip(array_flip($array));
4741 1
    }
4742
4743 1
    if ($desc) {
4744 1
      arsort($array);
4745 1
    } else {
4746 1
      asort($array);
4747
    }
4748
4749 1
    return self::string($array);
4750
  }
4751
4752
  /**
4753
   * Split a string into an array.
4754
   *
4755
   * @param string $str
4756
   * @param int    $len
4757
   *
4758
   * @return array
4759
   */
4760 22
  public static function str_split($str, $len = 1)
4761
  {
4762 22
    $str = (string)$str;
4763
4764 22
    if (!isset($str[0])) {
4765 1
      return array();
4766
    }
4767
4768 21
    $len = (int)$len;
4769
4770 21
    if ($len < 1) {
4771
      return str_split($str, $len);
4772
    }
4773
4774
    /** @noinspection PhpInternalEntityUsedInspection */
4775 21
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4776 21
    $a = $a[0];
4777
4778 21
    if ($len === 1) {
4779 21
      return $a;
4780
    }
4781
4782 1
    $arrayOutput = array();
4783 1
    $p = -1;
4784
4785
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4786 1
    foreach ($a as $l => $a) {
4787 1
      if ($l % $len) {
4788 1
        $arrayOutput[$p] .= $a;
4789 1
      } else {
4790 1
        $arrayOutput[++$p] = $a;
4791
      }
4792 1
    }
4793
4794 1
    return $arrayOutput;
4795
  }
4796
4797
  /**
4798
   * Check if the string starts with the given substring.
4799
   *
4800
   * @param string $haystack <p>The string to search in.</p>
4801
   * @param string $needle   <p>The substring to search for.</p>
4802
   *
4803
   * @return bool
4804
   */
4805 2 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4806
  {
4807 2
    $haystack = (string)$haystack;
4808 2
    $needle = (string)$needle;
4809
4810 2
    if (!isset($haystack[0], $needle[0])) {
4811 1
      return false;
4812
    }
4813
4814 2
    if (self::strpos($haystack, $needle) === 0) {
4815 2
      return true;
4816
    }
4817
4818 2
    return false;
4819
  }
4820
4821
  /**
4822
   * Get a binary representation of a specific string.
4823
   *
4824
   * @param string $str <p>The input string.</p>
4825
   *
4826
   * @return string
4827
   */
4828 1
  public static function str_to_binary($str)
4829
  {
4830 1
    $str = (string)$str;
4831
4832 1
    $value = unpack('H*', $str);
4833
4834 1
    return base_convert($value[1], 16, 2);
4835
  }
4836
4837
  /**
4838
   * Convert a string into an array of words.
4839
   *
4840
   * @param string   $str
4841
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4842
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4843
   * @param null|int $removeShortValues
4844
   *
4845
   * @return array
4846
   */
4847 10
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4848
  {
4849 10
    $str = (string)$str;
4850
4851 10
    if ($removeShortValues !== null) {
4852 1
      $removeShortValues = (int)$removeShortValues;
4853 1
    }
4854
4855 10
    if (!isset($str[0])) {
4856 2
      if ($removeEmptyValues === true) {
4857
        return array();
4858
      }
4859
4860 2
      return array('');
4861
    }
4862
4863 10
    $charList = self::rxClass($charList, '\pL');
4864
4865 10
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4866
4867
    if (
4868
        $removeShortValues === null
4869 10
        &&
4870
        $removeEmptyValues === false
4871 10
    ) {
4872 10
      return $return;
4873
    }
4874
4875 1
    $tmpReturn = array();
4876 1
    foreach ($return as $returnValue) {
4877
      if (
4878
          $removeShortValues !== null
4879 1
          &&
4880 1
          self::strlen($returnValue) <= $removeShortValues
4881 1
      ) {
4882 1
        continue;
4883
      }
4884
4885
      if (
4886
          $removeEmptyValues === true
4887 1
          &&
4888 1
          trim($returnValue) === ''
4889 1
      ) {
4890 1
        continue;
4891
      }
4892
4893 1
      $tmpReturn[] = $returnValue;
4894 1
    }
4895
4896 1
    return $tmpReturn;
4897
  }
4898
4899
  /**
4900
   * alias for "UTF8::to_ascii()"
4901
   *
4902
   * @see UTF8::to_ascii()
4903
   *
4904
   * @param string $str
4905
   * @param string $unknown
4906
   * @param bool   $strict
4907
   *
4908
   * @return string
4909
   */
4910 7
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4911
  {
4912 7
    return self::to_ascii($str, $unknown, $strict);
4913
  }
4914
4915
  /**
4916
   * Counts number of words in the UTF-8 string.
4917
   *
4918
   * @param string $str      <p>The input string.</p>
4919
   * @param int    $format   [optional] <p>
4920
   *                         <strong>0</strong> => return a number of words (default)<br />
4921
   *                         <strong>1</strong> => return an array of words<br />
4922
   *                         <strong>2</strong> => return an array of words with word-offset as key
4923
   *                         </p>
4924
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4925
   *
4926
   * @return array|int <p>The number of words in the string</p>
4927
   */
4928 1
  public static function str_word_count($str, $format = 0, $charlist = '')
4929
  {
4930 1
    $strParts = self::str_to_words($str, $charlist);
4931
4932 1
    $len = count($strParts);
4933
4934 1
    if ($format === 1) {
4935
4936 1
      $numberOfWords = array();
4937 1
      for ($i = 1; $i < $len; $i += 2) {
4938 1
        $numberOfWords[] = $strParts[$i];
4939 1
      }
4940
4941 1
    } elseif ($format === 2) {
4942
4943 1
      $numberOfWords = array();
4944 1
      $offset = self::strlen($strParts[0]);
4945 1
      for ($i = 1; $i < $len; $i += 2) {
4946 1
        $numberOfWords[$offset] = $strParts[$i];
4947 1
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
4948 1
      }
4949
4950 1
    } else {
4951
4952 1
      $numberOfWords = ($len - 1) / 2;
4953
4954
    }
4955
4956 1
    return $numberOfWords;
4957
  }
4958
4959
  /**
4960
   * Case-insensitive string comparison.
4961
   *
4962
   * INFO: Case-insensitive version of UTF8::strcmp()
4963
   *
4964
   * @param string $str1
4965
   * @param string $str2
4966
   *
4967
   * @return int <p>
4968
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
4969
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
4970
   *             <strong>0</strong> if they are equal.
4971
   *             </p>
4972
   */
4973 11
  public static function strcasecmp($str1, $str2)
4974
  {
4975 11
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
4976
  }
4977
4978
  /**
4979
   * alias for "UTF8::strstr()"
4980
   *
4981
   * @see UTF8::strstr()
4982
   *
4983
   * @param string  $haystack
4984
   * @param string  $needle
4985
   * @param bool    $before_needle
4986
   * @param string  $encoding
4987
   * @param boolean $cleanUtf8
4988
   *
4989
   * @return string|false
4990
   */
4991 1
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
4992
  {
4993 1
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
4994
  }
4995
4996
  /**
4997
   * Case-sensitive string comparison.
4998
   *
4999
   * @param string $str1
5000
   * @param string $str2
5001
   *
5002
   * @return int  <p>
5003
   *              <strong>&lt; 0</strong> if str1 is less than str2<br />
5004
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br />
5005
   *              <strong>0</strong> if they are equal.
5006
   *              </p>
5007
   */
5008 14
  public static function strcmp($str1, $str2)
5009
  {
5010
    /** @noinspection PhpUndefinedClassInspection */
5011 14
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5012 13
        \Normalizer::normalize($str1, \Normalizer::NFD),
5013 13
        \Normalizer::normalize($str2, \Normalizer::NFD)
5014 14
    );
5015
  }
5016
5017
  /**
5018
   * Find length of initial segment not matching mask.
5019
   *
5020
   * @param string $str
5021
   * @param string $charList
5022
   * @param int    $offset
5023
   * @param int    $length
5024
   *
5025
   * @return int|null
5026
   */
5027 15
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5028
  {
5029 15
    if ('' === $charList .= '') {
5030 1
      return null;
5031
    }
5032
5033 14 View Code Duplication
    if ($offset || $length !== null) {
5034 2
      $strTmp = self::substr($str, $offset, $length);
5035 2
      if ($strTmp === false) {
5036
        return null;
5037
      }
5038 2
      $str = (string)$strTmp;
5039 2
    }
5040
5041 14
    $str = (string)$str;
5042 14
    if (!isset($str[0])) {
5043 1
      return null;
5044
    }
5045
5046 13
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5047
      /** @noinspection OffsetOperationsInspection */
5048 13
      return self::strlen($length[1]);
5049
    }
5050
5051 1
    return self::strlen($str);
5052
  }
5053
5054
  /**
5055
   * alias for "UTF8::stristr()"
5056
   *
5057
   * @see UTF8::stristr()
5058
   *
5059
   * @param string  $haystack
5060
   * @param string  $needle
5061
   * @param bool    $before_needle
5062
   * @param string  $encoding
5063
   * @param boolean $cleanUtf8
5064
   *
5065
   * @return string|false
5066
   */
5067 1
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5068
  {
5069 1
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5070
  }
5071
5072
  /**
5073
   * Create a UTF-8 string from code points.
5074
   *
5075
   * INFO: opposite to UTF8::codepoints()
5076
   *
5077
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5078
   *
5079
   * @return string <p>UTF-8 encoded string.</p>
5080
   */
5081 2
  public static function string(array $array)
5082
  {
5083 2
    return implode(
5084 2
        '',
5085 2
        array_map(
5086
            array(
5087 2
                '\\voku\\helper\\UTF8',
5088 2
                'chr',
5089 2
            ),
5090
            $array
5091 2
        )
5092 2
    );
5093
  }
5094
5095
  /**
5096
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5097
   *
5098
   * @param string $str <p>The input string.</p>
5099
   *
5100
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5101
   */
5102 3
  public static function string_has_bom($str)
5103
  {
5104 3
    foreach (self::$BOM as $bomString => $bomByteLength) {
5105 3
      if (0 === strpos($str, $bomString)) {
5106 3
        return true;
5107
      }
5108 3
    }
5109
5110 3
    return false;
5111
  }
5112
5113
  /**
5114
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5115
   *
5116
   * @link http://php.net/manual/en/function.strip-tags.php
5117
   *
5118
   * @param string  $str            <p>
5119
   *                                The input string.
5120
   *                                </p>
5121
   * @param string  $allowable_tags [optional] <p>
5122
   *                                You can use the optional second parameter to specify tags which should
5123
   *                                not be stripped.
5124
   *                                </p>
5125
   *                                <p>
5126
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5127
   *                                can not be changed with allowable_tags.
5128
   *                                </p>
5129
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5130
   *
5131
   * @return string <p>The stripped string.</p>
5132
   */
5133 2 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5134
  {
5135 2
    $str = (string)$str;
5136
5137 2
    if (!isset($str[0])) {
5138 1
      return '';
5139
    }
5140
5141 2
    if ($cleanUtf8 === true) {
5142 1
      $str = self::clean($str);
5143 1
    }
5144
5145 2
    return strip_tags($str, $allowable_tags);
5146
  }
5147
5148
  /**
5149
   * Finds position of first occurrence of a string within another, case insensitive.
5150
   *
5151
   * @link http://php.net/manual/en/function.mb-stripos.php
5152
   *
5153
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5154
   * @param string  $needle    <p>The string to find in haystack.</p>
5155
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5156
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5157
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5158
   *
5159
   * @return int|false <p>
5160
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br />
5161
   *                   or false if needle is not found.
5162
   *                   </p>
5163
   */
5164 10
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5165
  {
5166 10
    $haystack = (string)$haystack;
5167 10
    $needle = (string)$needle;
5168 10
    $offset = (int)$offset;
5169
5170 10
    if (!isset($haystack[0], $needle[0])) {
5171 3
      return false;
5172
    }
5173
5174 9
    if ($cleanUtf8 === true) {
5175
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5176
      // if invalid characters are found in $haystack before $needle
5177 1
      $haystack = self::clean($haystack);
5178 1
      $needle = self::clean($needle);
5179 1
    }
5180
5181 View Code Duplication
    if (
5182
        $encoding === 'UTF-8'
5183 9
        ||
5184 2
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5185 9
    ) {
5186 9
      $encoding = 'UTF-8';
5187 9
    } else {
5188 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5189
    }
5190
5191 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5192
      self::checkForSupport();
5193
    }
5194
5195
    if (
5196
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5197 9
        &&
5198 9
        self::$SUPPORT['intl'] === true
5199 9
        &&
5200 9
        Bootup::is_php('5.4') === true
5201 9
    ) {
5202
      return \grapheme_stripos($haystack, $needle, $offset);
5203
    }
5204
5205
    // fallback to "mb_"-function via polyfill
5206 9
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5207
  }
5208
5209
  /**
5210
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5211
   *
5212
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5213
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5214
   * @param bool    $before_needle [optional] <p>
5215
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5216
   *                               haystack before the first occurrence of the needle (excluding the needle).
5217
   *                               </p>
5218
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5219
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5220
   *
5221
   * @return false|string A sub-string,<br />or <strong>false</strong> if needle is not found.
5222
   */
5223 17
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5224
  {
5225 17
    $haystack = (string)$haystack;
5226 17
    $needle = (string)$needle;
5227 17
    $before_needle = (bool)$before_needle;
5228
5229 17
    if (!isset($haystack[0], $needle[0])) {
5230 6
      return false;
5231
    }
5232
5233 11
    if ($encoding !== 'UTF-8') {
5234 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5235 1
    }
5236
5237 11
    if ($cleanUtf8 === true) {
5238
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5239
      // if invalid characters are found in $haystack before $needle
5240 1
      $needle = self::clean($needle);
5241 1
      $haystack = self::clean($haystack);
5242 1
    }
5243
5244 11
    if (!$needle) {
5245
      return $haystack;
5246
    }
5247
5248 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5249
      self::checkForSupport();
5250
    }
5251
5252 View Code Duplication
    if (
5253
        $encoding !== 'UTF-8'
5254 11
        &&
5255 1
        self::$SUPPORT['mbstring'] === false
5256 11
    ) {
5257
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5258
    }
5259
5260 11
    if (self::$SUPPORT['mbstring'] === true) {
5261 11
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5262
    }
5263
5264
    if (
5265
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5266
        &&
5267
        self::$SUPPORT['intl'] === true
5268
        &&
5269
        Bootup::is_php('5.4') === true
5270
    ) {
5271
      return \grapheme_stristr($haystack, $needle, $before_needle);
5272
    }
5273
5274
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
5275
      return stristr($str, $search);
0 ignored issues
show
Bug introduced by
The variable $str does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
Bug introduced by
The variable $search does not exist. Did you forget to declare it?

This check marks access to variables or properties that have not been declared yet. While PHP has no explicit notion of declaring a variable, accessing it before a value is assigned to it is most likely a bug.

Loading history...
5276
    }
5277
5278
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5279
5280
    if (!isset($match[1])) {
5281
      return false;
5282
    }
5283
5284
    if ($before_needle) {
5285
      return $match[1];
5286
    }
5287
5288
    return self::substr($haystack, self::strlen($match[1]));
5289
  }
5290
5291
  /**
5292
   * Get the string length, not the byte-length!
5293
   *
5294
   * @link     http://php.net/manual/en/function.mb-strlen.php
5295
   *
5296
   * @param string  $str       <p>The string being checked for length.</p>
5297
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5298
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5299
   *
5300
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5301
   *             character counted as +1)</p>
5302
   */
5303 88
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5304
  {
5305 88
    $str = (string)$str;
5306
5307 88
    if (!isset($str[0])) {
5308 5
      return 0;
5309
    }
5310
5311 View Code Duplication
    if (
5312
        $encoding === 'UTF-8'
5313 87
        ||
5314 13
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5315 87
    ) {
5316 78
      $encoding = 'UTF-8';
5317 78
    } else {
5318 12
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5319
    }
5320
5321 87
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5322
      self::checkForSupport();
5323
    }
5324
5325
    switch ($encoding) {
5326 87
      case 'ASCII':
5327 87
      case 'CP850':
5328
        if (
5329
            $encoding === 'CP850'
5330 10
            &&
5331 10
            self::$SUPPORT['mbstring_func_overload'] === false
5332 10
        ) {
5333 10
          return strlen($str);
5334
        }
5335
5336
        return \mb_strlen($str, '8BIT');
5337
    }
5338
5339 79
    if ($cleanUtf8 === true) {
5340
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5341
      // if invalid characters are found in $str
5342 2
      $str = self::clean($str);
5343 2
    }
5344
5345 View Code Duplication
    if (
5346
        $encoding !== 'UTF-8'
5347 79
        &&
5348 2
        self::$SUPPORT['mbstring'] === false
5349 79
        &&
5350
        self::$SUPPORT['iconv'] === false
5351 79
    ) {
5352
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5353
    }
5354
5355
    if (
5356
        $encoding !== 'UTF-8'
5357 79
        &&
5358 2
        self::$SUPPORT['iconv'] === true
5359 79
        &&
5360 2
        self::$SUPPORT['mbstring'] === false
5361 79
    ) {
5362
      return \iconv_strlen($str, $encoding);
5363
    }
5364
5365 79
    if (self::$SUPPORT['mbstring'] === true) {
5366 79
      return \mb_strlen($str, $encoding);
5367
    }
5368
5369
    if (self::$SUPPORT['iconv'] === true) {
5370
      return \iconv_strlen($str, $encoding);
5371
    }
5372
5373
    if (
5374
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5375
        &&
5376
        self::$SUPPORT['intl'] === true
5377
        &&
5378
        Bootup::is_php('5.4') === true
5379
    ) {
5380
      return \grapheme_strlen($str);
5381
    }
5382
5383
    if (self::is_ascii($str)) {
5384
      return strlen($str);
5385
    }
5386
5387
    // fallback via vanilla php
5388
    preg_match_all('/./us', $str, $parts);
5389
    $returnTmp = count($parts[0]);
5390
    if ($returnTmp !== 0) {
5391
      return $returnTmp;
5392
    }
5393
5394
    // fallback to "mb_"-function via polyfill
5395
    return \mb_strlen($str, $encoding);
5396
  }
5397
5398
  /**
5399
   * Case insensitive string comparisons using a "natural order" algorithm.
5400
   *
5401
   * INFO: natural order version of UTF8::strcasecmp()
5402
   *
5403
   * @param string $str1 <p>The first string.</p>
5404
   * @param string $str2 <p>The second string.</p>
5405
   *
5406
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br />
5407
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br />
5408
   *             <strong>0</strong> if they are equal
5409
   */
5410 1
  public static function strnatcasecmp($str1, $str2)
5411
  {
5412 1
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5413
  }
5414
5415
  /**
5416
   * String comparisons using a "natural order" algorithm
5417
   *
5418
   * INFO: natural order version of UTF8::strcmp()
5419
   *
5420
   * @link  http://php.net/manual/en/function.strnatcmp.php
5421
   *
5422
   * @param string $str1 <p>The first string.</p>
5423
   * @param string $str2 <p>The second string.</p>
5424
   *
5425
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br />
5426
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br />
5427
   *             <strong>0</strong> if they are equal
5428
   */
5429 2
  public static function strnatcmp($str1, $str2)
5430
  {
5431 2
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5432
  }
5433
5434
  /**
5435
   * Case-insensitive string comparison of the first n characters.
5436
   *
5437
   * @link  http://php.net/manual/en/function.strncasecmp.php
5438
   *
5439
   * @param string $str1 <p>The first string.</p>
5440
   * @param string $str2 <p>The second string.</p>
5441
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5442
   *
5443
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5444
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5445
   *             <strong>0</strong> if they are equal
5446
   */
5447 1
  public static function strncasecmp($str1, $str2, $len)
5448
  {
5449 1
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5450
  }
5451
5452
  /**
5453
   * String comparison of the first n characters.
5454
   *
5455
   * @link  http://php.net/manual/en/function.strncmp.php
5456
   *
5457
   * @param string $str1 <p>The first string.</p>
5458
   * @param string $str2 <p>The second string.</p>
5459
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5460
   *
5461
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br />
5462
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br />
5463
   *             <strong>0</strong> if they are equal
5464
   */
5465 2
  public static function strncmp($str1, $str2, $len)
5466
  {
5467 2
    $str1 = (string)self::substr($str1, 0, $len);
5468 2
    $str2 = (string)self::substr($str2, 0, $len);
5469
5470 2
    return self::strcmp($str1, $str2);
5471
  }
5472
5473
  /**
5474
   * Search a string for any of a set of characters.
5475
   *
5476
   * @link  http://php.net/manual/en/function.strpbrk.php
5477
   *
5478
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5479
   * @param string $char_list <p>This parameter is case sensitive.</p>
5480
   *
5481
   * @return string String starting from the character found, or false if it is not found.
5482
   */
5483 1
  public static function strpbrk($haystack, $char_list)
5484
  {
5485 1
    $haystack = (string)$haystack;
5486 1
    $char_list = (string)$char_list;
5487
5488 1
    if (!isset($haystack[0], $char_list[0])) {
5489 1
      return false;
5490
    }
5491
5492 1
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5493 1
      return substr($haystack, strpos($haystack, $m[0]));
5494
    }
5495
5496 1
    return false;
5497
  }
5498
5499
  /**
5500
   * Find position of first occurrence of string in a string.
5501
   *
5502
   * @link http://php.net/manual/en/function.mb-strpos.php
5503
   *
5504
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5505
   * @param string  $needle    <p>The string to find in haystack.</p>
5506
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5507
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5508
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5509
   *
5510
   * @return int|false <p>
5511
   *                   The numeric position of the first occurrence of needle in the haystack string.<br />
5512
   *                   If needle is not found it returns false.
5513
   *                   </p>
5514
   */
5515 58
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5516
  {
5517 58
    $haystack = (string)$haystack;
5518 58
    $needle = (string)$needle;
5519
5520 58
    if (!isset($haystack[0], $needle[0])) {
5521 3
      return false;
5522
    }
5523
5524
    // init
5525 57
    $offset = (int)$offset;
5526
5527
    // iconv and mbstring do not support integer $needle
5528
5529 57
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5530
      $needle = (string)self::chr($needle);
5531
    }
5532
5533 57
    if ($cleanUtf8 === true) {
5534
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5535
      // if invalid characters are found in $haystack before $needle
5536 2
      $needle = self::clean($needle);
5537 2
      $haystack = self::clean($haystack);
5538 2
    }
5539
5540 View Code Duplication
    if (
5541
        $encoding === 'UTF-8'
5542 57
        ||
5543 42
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5544 57
    ) {
5545 17
      $encoding = 'UTF-8';
5546 17
    } else {
5547 41
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5548
    }
5549
5550 57
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5551
      self::checkForSupport();
5552
    }
5553
5554
    if (
5555
        $encoding === 'CP850'
5556 57
        &&
5557 41
        self::$SUPPORT['mbstring_func_overload'] === false
5558 57
    ) {
5559 41
      return strpos($haystack, $needle, $offset);
5560
    }
5561
5562 View Code Duplication
    if (
5563
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5564 17
        &
5565 17
        self::$SUPPORT['iconv'] === true
5566 17
        &&
5567 1
        self::$SUPPORT['mbstring'] === false
5568 17
    ) {
5569
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5570
    }
5571
5572
    if (
5573
        $offset >= 0 // iconv_strpos() can't handle negative offset
5574 17
        &&
5575
        $encoding !== 'UTF-8'
5576 17
        &&
5577 1
        self::$SUPPORT['mbstring'] === false
5578 17
        &&
5579
        self::$SUPPORT['iconv'] === true
5580 17
    ) {
5581
      // ignore invalid negative offset to keep compatibility
5582
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5583
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5584
    }
5585
5586 17
    if (self::$SUPPORT['mbstring'] === true) {
5587 17
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5588
    }
5589
5590
    if (
5591
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5592
        &&
5593
        self::$SUPPORT['intl'] === true
5594
        &&
5595
        Bootup::is_php('5.4') === true
5596
    ) {
5597
      return \grapheme_strpos($haystack, $needle, $offset);
5598
    }
5599
5600
    if (
5601
        $offset >= 0 // iconv_strpos() can't handle negative offset
5602
        &&
5603
        self::$SUPPORT['iconv'] === true
5604
    ) {
5605
      // ignore invalid negative offset to keep compatibility
5606
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5607
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5608
    }
5609
5610
    $haystackIsAscii = self::is_ascii($haystack);
5611
    if ($haystackIsAscii && self::is_ascii($needle)) {
5612
      return strpos($haystack, $needle, $offset);
5613
    }
5614
5615
    // fallback via vanilla php
5616
5617
    if ($haystackIsAscii) {
5618
      $haystackTmp = substr($haystack, $offset);
5619
    } else {
5620
      $haystackTmp = self::substr($haystack, $offset);
5621
    }
5622
    if ($haystackTmp === false) {
5623
      $haystackTmp = '';
5624
    }
5625
    $haystack = (string)$haystackTmp;
5626
5627
    if ($offset < 0) {
5628
      $offset = 0;
5629
    }
5630
5631
    $pos = strpos($haystack, $needle);
5632
    if ($pos === false) {
5633
      return false;
5634
    }
5635
5636
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5637
    if ($returnTmp !== false) {
5638
      return $returnTmp;
5639
    }
5640
5641
    // fallback to "mb_"-function via polyfill
5642
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5643
  }
5644
5645
  /**
5646
   * Finds the last occurrence of a character in a string within another.
5647
   *
5648
   * @link http://php.net/manual/en/function.mb-strrchr.php
5649
   *
5650
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5651
   * @param string $needle        <p>The string to find in haystack</p>
5652
   * @param bool   $before_needle [optional] <p>
5653
   *                              Determines which portion of haystack
5654
   *                              this function returns.
5655
   *                              If set to true, it returns all of haystack
5656
   *                              from the beginning to the last occurrence of needle.
5657
   *                              If set to false, it returns all of haystack
5658
   *                              from the last occurrence of needle to the end,
5659
   *                              </p>
5660
   * @param string $encoding      [optional] <p>
5661
   *                              Character encoding name to use.
5662
   *                              If it is omitted, internal character encoding is used.
5663
   *                              </p>
5664
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5665
   *
5666
   * @return string|false The portion of haystack or false if needle is not found.
5667
   */
5668 1 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5669
  {
5670 1
    if ($encoding !== 'UTF-8') {
5671 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5672 1
    }
5673
5674 1
    if ($cleanUtf8 === true) {
5675
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5676
      // if invalid characters are found in $haystack before $needle
5677
      $needle = self::clean($needle);
5678
      $haystack = self::clean($haystack);
5679
    }
5680
5681
    // fallback to "mb_"-function via polyfill
5682 1
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5683
  }
5684
5685
  /**
5686
   * Reverses characters order in the string.
5687
   *
5688
   * @param string $str The input string
5689
   *
5690
   * @return string The string with characters in the reverse sequence
5691
   */
5692 4
  public static function strrev($str)
5693
  {
5694 4
    $str = (string)$str;
5695
5696 4
    if (!isset($str[0])) {
5697 2
      return '';
5698
    }
5699
5700 3
    return implode('', array_reverse(self::split($str)));
5701
  }
5702
5703
  /**
5704
   * Finds the last occurrence of a character in a string within another, case insensitive.
5705
   *
5706
   * @link http://php.net/manual/en/function.mb-strrichr.php
5707
   *
5708
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5709
   * @param string  $needle        <p>The string to find in haystack.</p>
5710
   * @param bool    $before_needle [optional] <p>
5711
   *                               Determines which portion of haystack
5712
   *                               this function returns.
5713
   *                               If set to true, it returns all of haystack
5714
   *                               from the beginning to the last occurrence of needle.
5715
   *                               If set to false, it returns all of haystack
5716
   *                               from the last occurrence of needle to the end,
5717
   *                               </p>
5718
   * @param string  $encoding      [optional] <p>
5719
   *                               Character encoding name to use.
5720
   *                               If it is omitted, internal character encoding is used.
5721
   *                               </p>
5722
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5723
   *
5724
   * @return string|false <p>The portion of haystack or<br />false if needle is not found.</p>
5725
   */
5726 1 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5727
  {
5728 1
    if ($encoding !== 'UTF-8') {
5729 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5730 1
    }
5731
5732 1
    if ($cleanUtf8 === true) {
5733
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5734
      // if invalid characters are found in $haystack before $needle
5735
      $needle = self::clean($needle);
5736
      $haystack = self::clean($haystack);
5737
    }
5738
5739 1
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5740
  }
5741
5742
  /**
5743
   * Find position of last occurrence of a case-insensitive string.
5744
   *
5745
   * @param string  $haystack  <p>The string to look in.</p>
5746
   * @param string  $needle    <p>The string to look for.</p>
5747
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5748
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5749
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5750
   *
5751
   * @return int|false <p>
5752
   *                   The numeric position of the last occurrence of needle in the haystack string.<br />If needle is
5753
   *                   not found, it returns false.
5754
   *                   </p>
5755
   */
5756 1
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5757
  {
5758 1
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5759
      $needle = (string)self::chr($needle);
5760
    }
5761
5762
    // init
5763 1
    $haystack = (string)$haystack;
5764 1
    $needle = (string)$needle;
5765 1
    $offset = (int)$offset;
5766
5767 1
    if (!isset($haystack[0], $needle[0])) {
5768
      return false;
5769
    }
5770
5771 View Code Duplication
    if (
5772
        $cleanUtf8 === true
5773 1
        ||
5774
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5775 1
    ) {
5776
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5777
5778
      $needle = self::clean($needle);
5779
      $haystack = self::clean($haystack);
5780
    }
5781
5782 View Code Duplication
    if (
5783
        $encoding === 'UTF-8'
5784 1
        ||
5785
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5786 1
    ) {
5787 1
      $encoding = 'UTF-8';
5788 1
    } else {
5789
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5790
    }
5791
5792 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5793
      self::checkForSupport();
5794
    }
5795
5796 View Code Duplication
    if (
5797
        $encoding !== 'UTF-8'
5798 1
        &&
5799
        self::$SUPPORT['mbstring'] === false
5800 1
    ) {
5801
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5802
    }
5803
5804 1
    if (self::$SUPPORT['mbstring'] === true) {
5805 1
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5806
    }
5807
5808
    if (
5809
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5810
        &&
5811
        self::$SUPPORT['intl'] === true
5812
        &&
5813
        Bootup::is_php('5.4') === true
5814
    ) {
5815
      return \grapheme_strripos($haystack, $needle, $offset);
5816
    }
5817
5818
    // fallback via vanilla php
5819
5820
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5821
  }
5822
5823
  /**
5824
   * Find position of last occurrence of a string in a string.
5825
   *
5826
   * @link http://php.net/manual/en/function.mb-strrpos.php
5827
   *
5828
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5829
   * @param string|int $needle    <p>The string to find in haystack.<br />Or a code point as int.</p>
5830
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5831
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5832
   *                              the end of the string.
5833
   *                              </p>
5834
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5835
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5836
   *
5837
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br />If needle
5838
   *                   is not found, it returns false.</p>
5839
   */
5840 10
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5841
  {
5842 10
    if ((int)$needle === $needle && $needle >= 0) {
5843 2
      $needle = (string)self::chr($needle);
5844 2
    }
5845
5846
    // init
5847 10
    $haystack = (string)$haystack;
5848 10
    $needle = (string)$needle;
5849 10
    $offset = (int)$offset;
5850
5851 10
    if (!isset($haystack[0], $needle[0])) {
5852 2
      return false;
5853
    }
5854
5855 View Code Duplication
    if (
5856
        $cleanUtf8 === true
5857 9
        ||
5858
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5859 9
    ) {
5860
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5861 3
      $needle = self::clean($needle);
5862 3
      $haystack = self::clean($haystack);
5863 3
    }
5864
5865 View Code Duplication
    if (
5866
        $encoding === 'UTF-8'
5867 9
        ||
5868 1
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5869 9
    ) {
5870 9
      $encoding = 'UTF-8';
5871 9
    } else {
5872 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5873
    }
5874
5875 9
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5876
      self::checkForSupport();
5877
    }
5878
5879 View Code Duplication
    if (
5880
        $encoding !== 'UTF-8'
5881 9
        &&
5882 1
        self::$SUPPORT['mbstring'] === false
5883 9
    ) {
5884
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5885
    }
5886
5887 9
    if (self::$SUPPORT['mbstring'] === true) {
5888 9
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5889
    }
5890
5891
    if (
5892
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5893
        &&
5894
        self::$SUPPORT['intl'] === true
5895
        &&
5896
        Bootup::is_php('5.4') === true
5897
    ) {
5898
      return \grapheme_strrpos($haystack, $needle, $offset);
5899
    }
5900
5901
    // fallback via vanilla php
5902
5903
    $haystackTmp = null;
5904
    if ($offset > 0) {
5905
      $haystackTmp = self::substr($haystack, $offset);
5906
    } elseif ($offset < 0) {
5907
      $haystackTmp = self::substr($haystack, 0, $offset);
5908
      $offset = 0;
5909
    }
5910
5911
    if ($haystackTmp !== null) {
5912
      if ($haystackTmp === false) {
5913
        $haystackTmp = '';
5914
      }
5915
      $haystack = (string)$haystackTmp;
5916
    }
5917
5918
    $pos = strrpos($haystack, $needle);
5919
    if ($pos === false) {
5920
      return false;
5921
    }
5922
5923
    return $offset + self::strlen(substr($haystack, 0, $pos));
5924
  }
5925
5926
  /**
5927
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5928
   * mask.
5929
   *
5930
   * @param string $str    <p>The input string.</p>
5931
   * @param string $mask   <p>The mask of chars</p>
5932
   * @param int    $offset [optional]
5933
   * @param int    $length [optional]
5934
   *
5935
   * @return int
5936
   */
5937 10
  public static function strspn($str, $mask, $offset = 0, $length = null)
5938
  {
5939 10 View Code Duplication
    if ($offset || $length !== null) {
5940 2
      $strTmp = self::substr($str, $offset, $length);
5941 2
      if ($strTmp === false) {
5942
        $strTmp = '';
5943
      }
5944 2
      $str = (string)$strTmp;
5945 2
    }
5946
5947 10
    $str = (string)$str;
5948 10
    if (!isset($str[0], $mask[0])) {
5949 2
      return 0;
5950
    }
5951
5952 8
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
5953
  }
5954
5955
  /**
5956
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
5957
   *
5958
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5959
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5960
   * @param bool    $before_needle [optional] <p>
5961
   *                               If <b>TRUE</b>, strstr() returns the part of the
5962
   *                               haystack before the first occurrence of the needle (excluding the needle).
5963
   *                               </p>
5964
   * @param string  $encoding      [optional] <p>Set the charset.</p>
5965
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5966
   *
5967
   * @return string|false A sub-string,<br />or <strong>false</strong> if needle is not found.
5968
   */
5969 2
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5970
  {
5971 2
    $haystack = (string)$haystack;
5972 2
    $needle = (string)$needle;
5973
5974 2
    if (!isset($haystack[0], $needle[0])) {
5975 1
      return false;
5976
    }
5977
5978 2
    if ($cleanUtf8 === true) {
5979
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5980
      // if invalid characters are found in $haystack before $needle
5981
      $needle = self::clean($needle);
5982
      $haystack = self::clean($haystack);
5983
    }
5984
5985 2
    if ($encoding !== 'UTF-8') {
5986 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5987 1
    }
5988
5989 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5990
      self::checkForSupport();
5991
    }
5992
5993 View Code Duplication
    if (
5994
        $encoding !== 'UTF-8'
5995 2
        &&
5996 1
        self::$SUPPORT['mbstring'] === false
5997 2
    ) {
5998
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5999
    }
6000
6001 2
    if (self::$SUPPORT['mbstring'] === true) {
6002 2
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6003
    }
6004
6005
    if (
6006
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6007
        &&
6008
        self::$SUPPORT['intl'] === true
6009
        &&
6010
        Bootup::is_php('5.4') === true
6011
    ) {
6012
      return \grapheme_strstr($haystack, $needle, $before_needle);
6013
    }
6014
6015
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6016
6017
    if (!isset($match[1])) {
6018
      return false;
6019
    }
6020
6021
    if ($before_needle) {
6022
      return $match[1];
6023
    }
6024
6025
    return self::substr($haystack, self::strlen($match[1]));
6026
  }
6027
6028
  /**
6029
   * Unicode transformation for case-less matching.
6030
   *
6031
   * @link http://unicode.org/reports/tr21/tr21-5.html
6032
   *
6033
   * @param string  $str       <p>The input string.</p>
6034
   * @param bool    $full      [optional] <p>
6035
   *                           <b>true</b>, replace full case folding chars (default)<br />
6036
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6037
   *                           </p>
6038
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6039
   *
6040
   * @return string
6041
   */
6042 13
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6043
  {
6044
    // init
6045 13
    $str = (string)$str;
6046
6047 13
    if (!isset($str[0])) {
6048 4
      return '';
6049
    }
6050
6051 12
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6052 12
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6053
6054 12
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6055 1
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6056 1
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6057 1
    }
6058
6059 12
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6060
6061 12
    if ($full) {
6062
6063 12
      static $FULL_CASE_FOLD = null;
6064
6065 12
      if ($FULL_CASE_FOLD === null) {
6066 1
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6067 1
      }
6068
6069
      /** @noinspection OffsetOperationsInspection */
6070 12
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6071 12
    }
6072
6073 12
    if ($cleanUtf8 === true) {
6074 1
      $str = self::clean($str);
6075 1
    }
6076
6077 12
    return self::strtolower($str);
6078
  }
6079
6080
  /**
6081
   * Make a string lowercase.
6082
   *
6083
   * @link http://php.net/manual/en/function.mb-strtolower.php
6084
   *
6085
   * @param string      $str       <p>The string being lowercased.</p>
6086
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6087
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6088
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6089
   *
6090
   * @return string str with all alphabetic characters converted to lowercase.
6091
   */
6092 25 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6093
  {
6094
    // init
6095 25
    $str = (string)$str;
6096
6097 25
    if (!isset($str[0])) {
6098 3
      return '';
6099
    }
6100
6101 23
    if ($cleanUtf8 === true) {
6102
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6103
      // if invalid characters are found in $haystack before $needle
6104 1
      $str = self::clean($str);
6105 1
    }
6106
6107 23
    if ($encoding !== 'UTF-8') {
6108 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6109 2
    }
6110
6111 23
    if ($lang !== null) {
6112
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6113
        self::checkForSupport();
6114
      }
6115
6116
      if (
6117
          self::$SUPPORT['intl'] === true
6118
          &&
6119
          Bootup::is_php('5.4') === true
6120
      ) {
6121
6122
        $langCode = $lang . '-Lower';
6123
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6124
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6125
6126
          $langCode = 'Any-Lower';
6127
        }
6128
6129
        return transliterator_transliterate($langCode, $str);
6130
      }
6131
6132
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6133
    }
6134
6135 23
    return \mb_strtolower($str, $encoding);
6136
  }
6137
6138
  /**
6139
   * Generic case sensitive transformation for collation matching.
6140
   *
6141
   * @param string $str <p>The input string</p>
6142
   *
6143
   * @return string
6144
   */
6145 3
  private static function strtonatfold($str)
6146
  {
6147
    /** @noinspection PhpUndefinedClassInspection */
6148 3
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6149
  }
6150
6151
  /**
6152
   * Make a string uppercase.
6153
   *
6154
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6155
   *
6156
   * @param string      $str       <p>The string being uppercased.</p>
6157
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6158
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6159
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6160
   *
6161
   * @return string str with all alphabetic characters converted to uppercase.
6162
   */
6163 19 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6164
  {
6165 19
    $str = (string)$str;
6166
6167 19
    if (!isset($str[0])) {
6168 3
      return '';
6169
    }
6170
6171 17
    if ($cleanUtf8 === true) {
6172
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6173
      // if invalid characters are found in $haystack before $needle
6174 1
      $str = self::clean($str);
6175 1
    }
6176
6177 17
    if ($encoding !== 'UTF-8') {
6178 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6179 2
    }
6180
6181 17
    if ($lang !== null) {
6182
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6183
        self::checkForSupport();
6184
      }
6185
6186
      if (
6187
          self::$SUPPORT['intl'] === true
6188
          &&
6189
          Bootup::is_php('5.4') === true
6190
      ) {
6191
6192
        $langCode = $lang . '-Upper';
6193
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6194
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6195
6196
          $langCode = 'Any-Upper';
6197
        }
6198
6199
        return transliterator_transliterate($langCode, $str);
6200
      }
6201
6202
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6203
    }
6204
6205 17
    return \mb_strtoupper($str, $encoding);
6206
  }
6207
6208
  /**
6209
   * Translate characters or replace sub-strings.
6210
   *
6211
   * @link  http://php.net/manual/en/function.strtr.php
6212
   *
6213
   * @param string          $str  <p>The string being translated.</p>
6214
   * @param string|string[] $from <p>The string replacing from.</p>
6215
   * @param string|string[] $to   <p>The string being translated to to.</p>
6216
   *
6217
   * @return string <p>
6218
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6219
   *                corresponding character in to.
6220
   *                </p>
6221
   */
6222 1
  public static function strtr($str, $from, $to = INF)
6223
  {
6224 1
    $str = (string)$str;
6225
6226 1
    if (!isset($str[0])) {
6227
      return '';
6228
    }
6229
6230 1
    if ($from === $to) {
6231
      return $str;
6232
    }
6233
6234 1
    if (INF !== $to) {
6235 1
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6235 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6236 1
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6236 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6237 1
      $countFrom = count($from);
6238 1
      $countTo = count($to);
6239
6240 1
      if ($countFrom > $countTo) {
6241 1
        $from = array_slice($from, 0, $countTo);
6242 1
      } elseif ($countFrom < $countTo) {
6243 1
        $to = array_slice($to, 0, $countFrom);
6244 1
      }
6245
6246 1
      $from = array_combine($from, $to);
6247 1
    }
6248
6249 1
    if (is_string($from)) {
6250 1
      return str_replace($from, '', $str);
6251
    }
6252
6253 1
    return strtr($str, $from);
6254
  }
6255
6256
  /**
6257
   * Return the width of a string.
6258
   *
6259
   * @param string  $str       <p>The input string.</p>
6260
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6261
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6262
   *
6263
   * @return int
6264
   */
6265 1
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6266
  {
6267 1
    if ($encoding !== 'UTF-8') {
6268 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6269 1
    }
6270
6271 1
    if ($cleanUtf8 === true) {
6272
      // iconv and mbstring are not tolerant to invalid encoding
6273
      // further, their behaviour is inconsistent with that of PHP's substr
6274 1
      $str = self::clean($str);
6275 1
    }
6276
6277
    // fallback to "mb_"-function via polyfill
6278 1
    return \mb_strwidth($str, $encoding);
6279
  }
6280
6281
  /**
6282
   * Changes all keys in an array.
6283
   *
6284
   * @param array $array <p>The array to work on</p>
6285
   * @param int $case [optional] <p> Either <strong>CASE_UPPER</strong><br />
6286
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6287
   *
6288
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6289
   *                     input is not an array.</p>
6290
   */
6291 1
  public static function array_change_key_case($array, $case = CASE_LOWER)
6292
  {
6293 1
    if (!is_array($array)) {
6294
      return false;
6295
    }
6296
6297
    if (
6298
        $case !== CASE_LOWER
6299 1
        &&
6300
        $case !== CASE_UPPER
6301 1
    ) {
6302
      $case = CASE_UPPER;
6303
    }
6304
6305 1
    $return = array();
6306 1
    foreach ($array as $key => $value) {
6307 1
      if ($case  === CASE_LOWER) {
6308 1
        $key = self::strtolower($key);
6309 1
      } else {
6310 1
        $key = self::strtoupper($key);
6311
      }
6312
6313 1
      $return[$key] = $value;
6314 1
    }
6315
6316 1
    return $return;
6317
  }
6318
6319
  /**
6320
   * Get part of a string.
6321
   *
6322
   * @link http://php.net/manual/en/function.mb-substr.php
6323
   *
6324
   * @param string  $str       <p>The string being checked.</p>
6325
   * @param int     $offset    <p>The first position used in str.</p>
6326
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6327
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6328
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6329
   *
6330
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6331
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6332
   *                      characters long, <b>FALSE</b> will be returned.</p>
6333
   */
6334 76
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6335
  {
6336
    // init
6337 76
    $str = (string)$str;
6338
6339 76
    if (!isset($str[0])) {
6340 10
      return '';
6341
    }
6342
6343
    // Empty string
6344 74
    if ($length === 0) {
6345 3
      return '';
6346
    }
6347
6348 73
    if ($cleanUtf8 === true) {
6349
      // iconv and mbstring are not tolerant to invalid encoding
6350
      // further, their behaviour is inconsistent with that of PHP's substr
6351 1
      $str = self::clean($str);
6352 1
    }
6353
6354
    // Whole string
6355 73
    if (!$offset && $length === null) {
6356 1
      return $str;
6357
    }
6358
6359 72
    $str_length = 0;
6360 72
    if ($offset || $length === null) {
6361 49
      $str_length = (int)self::strlen($str, $encoding);
6362 49
    }
6363
6364
    // Impossible
6365 72
    if ($offset && $offset > $str_length) {
6366 2
      return false;
6367
    }
6368
6369 70
    if ($length === null) {
6370 34
      $length = $str_length;
6371 34
    } else {
6372 59
      $length = (int)$length;
6373
    }
6374
6375 View Code Duplication
    if (
6376
        $encoding === 'UTF-8'
6377 70
        ||
6378 23
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6379 70
    ) {
6380 49
      $encoding = 'UTF-8';
6381 49
    } else {
6382 22
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6383
    }
6384
6385 70
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6386
      self::checkForSupport();
6387
    }
6388
6389
    if (
6390
        $encoding === 'CP850'
6391 70
        &&
6392 21
        self::$SUPPORT['mbstring_func_overload'] === false
6393 70
    ) {
6394 21
      return substr($str, $offset, $length === null ? $str_length : $length);
6395
    }
6396
6397 View Code Duplication
    if (
6398
        $encoding !== 'UTF-8'
6399 49
        &&
6400 1
        self::$SUPPORT['mbstring'] === false
6401 49
    ) {
6402
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6403
    }
6404
6405 49
    if (self::$SUPPORT['mbstring'] === true) {
6406 49
      return \mb_substr($str, $offset, $length, $encoding);
6407
    }
6408
6409
    if (
6410
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6411
        &&
6412
        self::$SUPPORT['intl'] === true
6413
        &&
6414
        Bootup::is_php('5.4') === true
6415
    ) {
6416
      return \grapheme_substr($str, $offset, $length);
6417
    }
6418
6419
    if (
6420
        $length >= 0 // "iconv_substr()" can't handle negative length
6421
        &&
6422
        self::$SUPPORT['iconv'] === true
6423
    ) {
6424
      return \iconv_substr($str, $offset, $length);
6425
    }
6426
6427
    if (self::is_ascii($str)) {
6428
      return ($length === null) ?
6429
          substr($str, $offset) :
6430
          substr($str, $offset, $length);
6431
    }
6432
6433
    // fallback via vanilla php
6434
6435
    // split to array, and remove invalid characters
6436
    $array = self::split($str);
6437
6438
    // extract relevant part, and join to make sting again
6439
    return implode('', array_slice($array, $offset, $length));
6440
  }
6441
6442
  /**
6443
   * Binary safe comparison of two strings from an offset, up to length characters.
6444
   *
6445
   * @param string  $str1               <p>The main string being compared.</p>
6446
   * @param string  $str2               <p>The secondary string being compared.</p>
6447
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6448
   *                                    counting from the end of the string.</p>
6449
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6450
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6451
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6452
   *                                    insensitive.</p>
6453
   *
6454
   * @return int <p>
6455
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br />
6456
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br />
6457
   *             <strong>0</strong> if they are equal.
6458
   *             </p>
6459
   */
6460 1
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6461
  {
6462
    if (
6463
        $offset !== 0
6464 1
        ||
6465
        $length !== null
6466 1
    ) {
6467 1
      $str1Tmp = self::substr($str1, $offset, $length);
6468 1
      if ($str1Tmp === false) {
6469
        $str1Tmp = '';
6470
      }
6471 1
      $str1 = (string)$str1Tmp;
6472
6473 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6474 1
      if ($str2Tmp === false) {
6475
        $str2Tmp = '';
6476
      }
6477 1
      $str2 = (string)$str2Tmp;
6478 1
    }
6479
6480 1
    if ($case_insensitivity === true) {
6481 1
      return self::strcasecmp($str1, $str2);
6482
    }
6483
6484 1
    return self::strcmp($str1, $str2);
6485
  }
6486
6487
  /**
6488
   * Count the number of substring occurrences.
6489
   *
6490
   * @link  http://php.net/manual/en/function.substr-count.php
6491
   *
6492
   * @param string  $haystack  <p>The string to search in.</p>
6493
   * @param string  $needle    <p>The substring to search for.</p>
6494
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6495
   * @param int     $length    [optional] <p>
6496
   *                           The maximum length after the specified offset to search for the
6497
   *                           substring. It outputs a warning if the offset plus the length is
6498
   *                           greater than the haystack length.
6499
   *                           </p>
6500
   * @param string  $encoding  <p>Set the charset.</p>
6501
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6502
   *
6503
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6504
   */
6505 1
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6506
  {
6507
    // init
6508 1
    $haystack = (string)$haystack;
6509 1
    $needle = (string)$needle;
6510
6511 1
    if (!isset($haystack[0], $needle[0])) {
6512 1
      return false;
6513
    }
6514
6515 1
    if ($offset || $length !== null) {
6516
6517 1
      if ($length === null) {
6518 1
        $length = (int)self::strlen($haystack);
6519 1
      }
6520
6521 1
      $offset = (int)$offset;
6522 1
      $length = (int)$length;
6523
6524
      if (
6525
          (
6526
              $length !== 0
6527 1
              &&
6528
              $offset !== 0
6529 1
          )
6530 1
          &&
6531 1
          $length + $offset <= 0
6532 1
          &&
6533 1
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6534 1
      ) {
6535 1
        return false;
6536
      }
6537
6538 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6539 1
      if ($haystackTmp === false) {
6540
        $haystackTmp = '';
6541
      }
6542 1
      $haystack = (string)$haystackTmp;
6543 1
    }
6544
6545 1
    if ($encoding !== 'UTF-8') {
6546 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6547 1
    }
6548
6549 1
    if ($cleanUtf8 === true) {
6550
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6551
      // if invalid characters are found in $haystack before $needle
6552
      $needle = self::clean($needle);
6553
      $haystack = self::clean($haystack);
6554
    }
6555
6556 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6557
      self::checkForSupport();
6558
    }
6559
6560 View Code Duplication
    if (
6561
        $encoding !== 'UTF-8'
6562 1
        &&
6563 1
        self::$SUPPORT['mbstring'] === false
6564 1
    ) {
6565
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6566
    }
6567
6568 1
    if (self::$SUPPORT['mbstring'] === true) {
6569 1
      return \mb_substr_count($haystack, $needle, $encoding);
6570
    }
6571
6572
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6573
6574
    return count($matches);
6575
  }
6576
6577
  /**
6578
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6579
   *
6580
   * @param string $haystack <p>The string to search in.</p>
6581
   * @param string $needle   <p>The substring to search for.</p>
6582
   *
6583
   * @return string <p>Return the sub-string.</p>
6584
   */
6585 1 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6586
  {
6587
    // init
6588 1
    $haystack = (string)$haystack;
6589 1
    $needle = (string)$needle;
6590
6591 1
    if (!isset($haystack[0])) {
6592 1
      return '';
6593
    }
6594
6595 1
    if (!isset($needle[0])) {
6596 1
      return $haystack;
6597
    }
6598
6599 1
    if (self::str_istarts_with($haystack, $needle) === true) {
6600 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6601 1
      if ($haystackTmp === false) {
6602
        $haystackTmp = '';
6603
      }
6604 1
      $haystack = (string)$haystackTmp;
6605 1
    }
6606
6607 1
    return $haystack;
6608
  }
6609
6610
  /**
6611
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6612
   *
6613
   * @param string $haystack <p>The string to search in.</p>
6614
   * @param string $needle   <p>The substring to search for.</p>
6615
   *
6616
   * @return string <p>Return the sub-string.</p>
6617
   */
6618 1 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6619
  {
6620
    // init
6621 1
    $haystack = (string)$haystack;
6622 1
    $needle = (string)$needle;
6623
6624 1
    if (!isset($haystack[0])) {
6625 1
      return '';
6626
    }
6627
6628 1
    if (!isset($needle[0])) {
6629 1
      return $haystack;
6630
    }
6631
6632 1
    if (self::str_iends_with($haystack, $needle) === true) {
6633 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6634 1
      if ($haystackTmp === false) {
6635
        $haystackTmp = '';
6636
      }
6637 1
      $haystack = (string)$haystackTmp;
6638 1
    }
6639
6640 1
    return $haystack;
6641
  }
6642
6643
  /**
6644
   * Removes an prefix ($needle) from start of the string ($haystack).
6645
   *
6646
   * @param string $haystack <p>The string to search in.</p>
6647
   * @param string $needle   <p>The substring to search for.</p>
6648
   *
6649
   * @return string <p>Return the sub-string.</p>
6650
   */
6651 1 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6652
  {
6653
    // init
6654 1
    $haystack = (string)$haystack;
6655 1
    $needle = (string)$needle;
6656
6657 1
    if (!isset($haystack[0])) {
6658 1
      return '';
6659
    }
6660
6661 1
    if (!isset($needle[0])) {
6662 1
      return $haystack;
6663
    }
6664
6665 1
    if (self::str_starts_with($haystack, $needle) === true) {
6666 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6667 1
      if ($haystackTmp === false) {
6668
        $haystackTmp = '';
6669
      }
6670 1
      $haystack = (string)$haystackTmp;
6671 1
    }
6672
6673 1
    return $haystack;
6674
  }
6675
6676
  /**
6677
   * Replace text within a portion of a string.
6678
   *
6679
   * source: https://gist.github.com/stemar/8287074
6680
   *
6681
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6682
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6683
   * @param int|int[]       $offset           <p>
6684
   *                                          If start is positive, the replacing will begin at the start'th offset
6685
   *                                          into string.
6686
   *                                          <br /><br />
6687
   *                                          If start is negative, the replacing will begin at the start'th character
6688
   *                                          from the end of string.
6689
   *                                          </p>
6690
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6691
   *                                          portion of string which is to be replaced. If it is negative, it
6692
   *                                          represents the number of characters from the end of string at which to
6693
   *                                          stop replacing. If it is not given, then it will default to strlen(
6694
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6695
   *                                          length is zero then this function will have the effect of inserting
6696
   *                                          replacement into string at the given start offset.</p>
6697
   *
6698
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6699
   */
6700 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6701
  {
6702 7
    if (is_array($str) === true) {
6703 1
      $num = count($str);
6704
6705
      // the replacement
6706 1
      if (is_array($replacement) === true) {
6707 1
        $replacement = array_slice($replacement, 0, $num);
6708 1
      } else {
6709 1
        $replacement = array_pad(array($replacement), $num, $replacement);
6710
      }
6711
6712
      // the offset
6713 1 View Code Duplication
      if (is_array($offset) === true) {
6714 1
        $offset = array_slice($offset, 0, $num);
6715 1
        foreach ($offset as &$valueTmp) {
6716 1
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6717 1
        }
6718 1
        unset($valueTmp);
6719 1
      } else {
6720 1
        $offset = array_pad(array($offset), $num, $offset);
6721
      }
6722
6723
      // the length
6724 1
      if (!isset($length)) {
6725 1
        $length = array_fill(0, $num, 0);
6726 1 View Code Duplication
      } elseif (is_array($length) === true) {
6727 1
        $length = array_slice($length, 0, $num);
6728 1
        foreach ($length as &$valueTmpV2) {
6729 1
          if (isset($valueTmpV2)) {
6730 1
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6731 1
          } else {
6732
            $valueTmpV2 = 0;
6733
          }
6734 1
        }
6735 1
        unset($valueTmpV2);
6736 1
      } else {
6737 1
        $length = array_pad(array($length), $num, $length);
6738
      }
6739
6740
      // recursive call
6741 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6742
    }
6743
6744 7
    if (is_array($replacement) === true) {
6745 1
      if (count($replacement) > 0) {
6746 1
        $replacement = $replacement[0];
6747 1
      } else {
6748 1
        $replacement = '';
6749
      }
6750 1
    }
6751
6752
    // init
6753 7
    $str = (string)$str;
6754 7
    $replacement = (string)$replacement;
6755
6756 7
    if (!isset($str[0])) {
6757 1
      return $replacement;
6758
    }
6759
6760 6
    if (self::is_ascii($str)) {
6761 3
      return ($length === null) ?
6762 3
          substr_replace($str, $replacement, $offset) :
6763 3
          substr_replace($str, $replacement, $offset, $length);
6764
    }
6765
6766 5
    preg_match_all('/./us', $str, $smatches);
6767 5
    preg_match_all('/./us', $replacement, $rmatches);
6768
6769 5
    if ($length === null) {
6770 3
      $length = (int)self::strlen($str);
6771 3
    }
6772
6773 5
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6774
6775 5
    return implode('', $smatches[0]);
6776
  }
6777
6778
  /**
6779
   * Removes an suffix ($needle) from end of the string ($haystack).
6780
   *
6781
   * @param string $haystack <p>The string to search in.</p>
6782
   * @param string $needle   <p>The substring to search for.</p>
6783
   *
6784
   * @return string <p>Return the sub-string.</p>
6785
   */
6786 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6787
  {
6788 1
    $haystack = (string)$haystack;
6789 1
    $needle = (string)$needle;
6790
6791 1
    if (!isset($haystack[0])) {
6792 1
      return '';
6793
    }
6794
6795 1
    if (!isset($needle[0])) {
6796 1
      return $haystack;
6797
    }
6798
6799 1
    if (self::str_ends_with($haystack, $needle) === true) {
6800 1
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6801 1
      if ($haystackTmp === false) {
6802
        $haystackTmp = '';
6803
      }
6804 1
      $haystack = (string)$haystackTmp;
6805 1
    }
6806
6807 1
    return $haystack;
6808
  }
6809
6810
  /**
6811
   * Returns a case swapped version of the string.
6812
   *
6813
   * @param string  $str       <p>The input string.</p>
6814
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6815
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6816
   *
6817
   * @return string <p>Each character's case swapped.</p>
6818
   */
6819 1
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6820
  {
6821 1
    $str = (string)$str;
6822
6823 1
    if (!isset($str[0])) {
6824 1
      return '';
6825
    }
6826
6827 1
    if ($encoding !== 'UTF-8') {
6828 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6829 1
    }
6830
6831 1
    if ($cleanUtf8 === true) {
6832
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6833
      // if invalid characters are found in $haystack before $needle
6834 1
      $str = self::clean($str);
6835 1
    }
6836
6837 1
    $strSwappedCase = preg_replace_callback(
6838 1
        '/[\S]/u',
6839
        function ($match) use ($encoding) {
6840 1
          $marchToUpper = UTF8::strtoupper($match[0], $encoding);
6841
6842 1
          if ($match[0] === $marchToUpper) {
6843 1
            return UTF8::strtolower($match[0], $encoding);
6844
          }
6845
6846 1
          return $marchToUpper;
6847 1
        },
6848
        $str
6849 1
    );
6850
6851 1
    return $strSwappedCase;
6852
  }
6853
6854
  /**
6855
   * alias for "UTF8::to_ascii()"
6856
   *
6857
   * @see UTF8::to_ascii()
6858
   *
6859
   * @param string $s
6860
   * @param string $subst_chr
6861
   * @param bool   $strict
6862
   *
6863
   * @return string
6864
   *
6865
   * @deprecated
6866
   */
6867
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6868
  {
6869
    return self::to_ascii($s, $subst_chr, $strict);
6870
  }
6871
6872
  /**
6873
   * alias for "UTF8::to_iso8859()"
6874
   *
6875
   * @see UTF8::to_iso8859()
6876
   *
6877
   * @param string $str
6878
   *
6879
   * @return string|string[]
6880
   *
6881
   * @deprecated
6882
   */
6883
  public static function toIso8859($str)
6884
  {
6885
    return self::to_iso8859($str);
6886
  }
6887
6888
  /**
6889
   * alias for "UTF8::to_latin1()"
6890
   *
6891
   * @see UTF8::to_latin1()
6892
   *
6893
   * @param $str
6894
   *
6895
   * @return string
6896
   *
6897
   * @deprecated
6898
   */
6899
  public static function toLatin1($str)
6900
  {
6901
    return self::to_latin1($str);
6902
  }
6903
6904
  /**
6905
   * alias for "UTF8::to_utf8()"
6906
   *
6907
   * @see UTF8::to_utf8()
6908
   *
6909
   * @param string $str
6910
   *
6911
   * @return string
6912
   *
6913
   * @deprecated
6914
   */
6915
  public static function toUTF8($str)
6916
  {
6917
    return self::to_utf8($str);
6918
  }
6919
6920
  /**
6921
   * Convert a string into ASCII.
6922
   *
6923
   * @param string $str     <p>The input string.</p>
6924
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6925
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6926
   *                        performance</p>
6927
   *
6928
   * @return string
6929
   */
6930 21
  public static function to_ascii($str, $unknown = '?', $strict = false)
6931
  {
6932 21
    static $UTF8_TO_ASCII;
6933
6934
    // init
6935 21
    $str = (string)$str;
6936
6937 21
    if (!isset($str[0])) {
6938 4
      return '';
6939
    }
6940
6941
    // check if we only have ASCII, first (better performance)
6942 18
    if (self::is_ascii($str) === true) {
6943 6
      return $str;
6944
    }
6945
6946 13
    $str = self::clean($str, true, true, true);
6947
6948
    // check again, if we only have ASCII, now ...
6949 13
    if (self::is_ascii($str) === true) {
6950 7
      return $str;
6951
    }
6952
6953 7
    if ($strict === true) {
6954
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6955
        self::checkForSupport();
6956
      }
6957
6958
      if (
6959
          self::$SUPPORT['intl'] === true
6960
          &&
6961
          Bootup::is_php('5.4') === true
6962
      ) {
6963
6964
        // HACK for issue from "transliterator_transliterate()"
6965
        $str = str_replace(
6966
            'ℌ',
6967
            'H',
6968
            $str
6969
        );
6970
6971
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
6972
6973
        // check again, if we only have ASCII, now ...
6974
        if (self::is_ascii($str) === true) {
6975
          return $str;
6976
        }
6977
6978
      }
6979
    }
6980
6981 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
6982 7
    $chars = $ar[0];
6983 7
    foreach ($chars as &$c) {
6984
6985 7
      $ordC0 = ord($c[0]);
6986
6987 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
6988 7
        continue;
6989
      }
6990
6991 7
      $ordC1 = ord($c[1]);
6992
6993
      // ASCII - next please
6994 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
6995 7
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
6996 7
      }
6997
6998 7
      if ($ordC0 >= 224) {
6999 2
        $ordC2 = ord($c[2]);
7000
7001 2
        if ($ordC0 <= 239) {
7002 2
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7003 2
        }
7004
7005 2
        if ($ordC0 >= 240) {
7006 1
          $ordC3 = ord($c[3]);
7007
7008 1
          if ($ordC0 <= 247) {
7009 1
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7010 1
          }
7011
7012 1
          if ($ordC0 >= 248) {
7013
            $ordC4 = ord($c[4]);
7014
7015 View Code Duplication
            if ($ordC0 <= 251) {
7016
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7017
            }
7018
7019
            if ($ordC0 >= 252) {
7020
              $ordC5 = ord($c[5]);
7021
7022 View Code Duplication
              if ($ordC0 <= 253) {
7023
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7024
              }
7025
            }
7026
          }
7027 1
        }
7028 2
      }
7029
7030 7
      if ($ordC0 === 254 || $ordC0 === 255) {
7031
        $c = $unknown;
7032
        continue;
7033
      }
7034
7035 7
      if (!isset($ord)) {
7036
        $c = $unknown;
7037
        continue;
7038
      }
7039
7040 7
      $bank = $ord >> 8;
7041 7
      if (!isset($UTF8_TO_ASCII[$bank])) {
7042 3
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7043 3
        if ($UTF8_TO_ASCII[$bank] === false) {
7044 1
          $UTF8_TO_ASCII[$bank] = array();
7045 1
        }
7046 3
      }
7047
7048 7
      $newchar = $ord & 255;
7049
7050 7
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7051
7052
        // keep for debugging
7053
        /*
7054
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7055
        echo "char: " . $c . "\n";
7056
        echo "ord: " . $ord . "\n";
7057
        echo "newchar: " . $newchar . "\n";
7058
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7059
        echo "bank:" . $bank . "\n\n";
7060
        */
7061
7062 7
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7063 7
      } else {
7064
7065
        // keep for debugging missing chars
7066
        /*
7067
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7068
        echo "char: " . $c . "\n";
7069
        echo "ord: " . $ord . "\n";
7070
        echo "newchar: " . $newchar . "\n";
7071
        echo "bank:" . $bank . "\n\n";
7072
        */
7073
7074 1
        $c = $unknown;
7075
      }
7076 7
    }
7077
7078 7
    return implode('', $chars);
7079
  }
7080
7081
  /**
7082
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7083
   *
7084
   * @param string|string[] $str
7085
   *
7086
   * @return string|string[]
7087
   */
7088 3
  public static function to_iso8859($str)
7089
  {
7090 3
    if (is_array($str) === true) {
7091
7092
      /** @noinspection ForeachSourceInspection */
7093 1
      foreach ($str as $k => $v) {
7094
        /** @noinspection AlterInForeachInspection */
7095
        /** @noinspection OffsetOperationsInspection */
7096 1
        $str[$k] = self::to_iso8859($v);
7097 1
      }
7098
7099 1
      return $str;
7100
    }
7101
7102 3
    $str = (string)$str;
7103
7104 3
    if (!isset($str[0])) {
7105 1
      return '';
7106
    }
7107
7108 3
    return self::utf8_decode($str);
7109
  }
7110
7111
  /**
7112
   * alias for "UTF8::to_iso8859()"
7113
   *
7114
   * @see UTF8::to_iso8859()
7115
   *
7116
   * @param string|string[] $str
7117
   *
7118
   * @return string|string[]
7119
   */
7120 1
  public static function to_latin1($str)
7121
  {
7122 1
    return self::to_iso8859($str);
7123
  }
7124
7125
  /**
7126
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7127
   *
7128
   * <ul>
7129
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7130
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859-1.</li>
7131
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7132
   * case.</li>
7133
   * </ul>
7134
   *
7135
   * @param string|string[] $str                    <p>Any string or array.</p>
7136
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7137
   *
7138
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7139
   */
7140 22
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7141
  {
7142 22
    if (is_array($str) === true) {
7143
      /** @noinspection ForeachSourceInspection */
7144 2
      foreach ($str as $k => $v) {
7145
        /** @noinspection AlterInForeachInspection */
7146
        /** @noinspection OffsetOperationsInspection */
7147 2
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7148 2
      }
7149
7150 2
      return $str;
7151
    }
7152
7153 22
    $str = (string)$str;
7154
7155 22
    if (!isset($str[0])) {
7156 3
      return $str;
7157
    }
7158
7159 22
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7160
      self::checkForSupport();
7161
    }
7162
7163 22 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7164
      $max = \mb_strlen($str, '8BIT');
7165
    } else {
7166 22
      $max = strlen($str);
7167
    }
7168
7169 22
    $buf = '';
7170
7171
    /** @noinspection ForeachInvariantsInspection */
7172 22
    for ($i = 0; $i < $max; $i++) {
7173 22
      $c1 = $str[$i];
7174
7175 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7176
7177 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7178
7179 20
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7180
7181 20
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7182 18
            $buf .= $c1 . $c2;
7183 18
            $i++;
7184 18
          } else { // not valid UTF8 - convert it
7185 8
            $buf .= self::to_utf8_convert($c1);
7186
          }
7187
7188 22
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7189
7190 21
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7191 21
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7192
7193 21
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7194 15
            $buf .= $c1 . $c2 . $c3;
7195 15
            $i += 2;
7196 15
          } else { // not valid UTF8 - convert it
7197 11
            $buf .= self::to_utf8_convert($c1);
7198
          }
7199
7200 22
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7201
7202 12
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7203 12
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7204 12
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7205
7206 12
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7207 5
            $buf .= $c1 . $c2 . $c3 . $c4;
7208 5
            $i += 3;
7209 5
          } else { // not valid UTF8 - convert it
7210 9
            $buf .= self::to_utf8_convert($c1);
7211
          }
7212
7213 12
        } else { // doesn't look like UTF8, but should be converted
7214 9
          $buf .= self::to_utf8_convert($c1);
7215
        }
7216
7217 22
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7218
7219 5
        $buf .= self::to_utf8_convert($c1);
7220
7221 5
      } else { // it doesn't need conversion
7222 20
        $buf .= $c1;
7223
      }
7224 22
    }
7225
7226
    // decode unicode escape sequences
7227 22
    $buf = preg_replace_callback(
7228 22
        '/\\\\u([0-9a-f]{4})/i',
7229 22
        function ($match) {
7230 4
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7231 22
        },
7232
        $buf
7233 22
    );
7234
7235
    // decode UTF-8 codepoints
7236 22
    if ($decodeHtmlEntityToUtf8 === true) {
7237 1
      $buf = self::html_entity_decode($buf);
7238 1
    }
7239
7240 22
    return $buf;
7241
  }
7242
7243
  /**
7244
   * @param int $int
7245
   *
7246
   * @return string
7247
   */
7248 16
  private static function to_utf8_convert($int)
7249
  {
7250 16
    $buf = '';
7251
7252 16
    $ordC1 = ord($int);
7253 16
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7254 2
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7255 2
    } else {
7256 16
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7257 16
      $cc2 = ($int & "\x3F") | "\x80";
7258 16
      $buf .= $cc1 . $cc2;
7259
    }
7260
7261 16
    return $buf;
7262
  }
7263
7264
  /**
7265
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7266
   *
7267
   * INFO: This is slower then "trim()"
7268
   *
7269
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7270
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7271
   *
7272
   * @param string $str   <p>The string to be trimmed</p>
7273
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7274
   *
7275
   * @return string <p>The trimmed string.</p>
7276
   */
7277 26
  public static function trim($str = '', $chars = INF)
7278
  {
7279 26
    $str = (string)$str;
7280
7281 26
    if (!isset($str[0])) {
7282 5
      return '';
7283
    }
7284
7285
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7286 22
    if ($chars === INF || !$chars) {
7287 6
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7288
    }
7289
7290 16
    return self::rtrim(self::ltrim($str, $chars), $chars);
7291
  }
7292
7293
  /**
7294
   * Makes string's first char uppercase.
7295
   *
7296
   * @param string  $str       <p>The input string.</p>
7297
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7298
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7299
   *
7300
   * @return string <p>The resulting string</p>
7301
   */
7302 14
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7303
  {
7304 14
    if ($cleanUtf8 === true) {
7305
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7306
      // if invalid characters are found in $haystack before $needle
7307
      $str = self::clean($str);
7308
    }
7309
7310 14
    $strPartTwo = self::substr($str, 1, null, $encoding);
7311 14
    if ($strPartTwo === false) {
7312
      $strPartTwo = '';
7313
    }
7314
7315 14
    $strPartOne = self::strtoupper(
7316 14
        (string)self::substr($str, 0, 1, $encoding),
7317 14
        $encoding,
7318
        $cleanUtf8
7319 14
    );
7320
7321 14
    return $strPartOne . $strPartTwo;
7322
  }
7323
7324
  /**
7325
   * alias for "UTF8::ucfirst()"
7326
   *
7327
   * @see UTF8::ucfirst()
7328
   *
7329
   * @param string  $word
7330
   * @param string  $encoding
7331
   * @param boolean $cleanUtf8
7332
   *
7333
   * @return string
7334
   */
7335 1
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7336
  {
7337 1
    return self::ucfirst($word, $encoding, $cleanUtf8);
7338
  }
7339
7340
  /**
7341
   * Uppercase for all words in the string.
7342
   *
7343
   * @param string   $str        <p>The input string.</p>
7344
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7345
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7346
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7347
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7348
   *
7349
   * @return string
7350
   */
7351 8
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7352
  {
7353 8
    if (!$str) {
7354 2
      return '';
7355
    }
7356
7357
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7358
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7359
7360 7
    if ($cleanUtf8 === true) {
7361
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7362
      // if invalid characters are found in $haystack before $needle
7363
      $str = self::clean($str);
7364
    }
7365
7366 7
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7367
7368
    if (
7369
        $usePhpDefaultFunctions === true
7370 7
        &&
7371 7
        self::is_ascii($str) === true
7372 7
    ) {
7373
      return ucwords($str);
7374
    }
7375
7376 7
    $words = self::str_to_words($str, $charlist);
7377 7
    $newWords = array();
7378
7379 7
    if (count($exceptions) > 0) {
7380 1
      $useExceptions = true;
7381 1
    } else {
7382 7
      $useExceptions = false;
7383
    }
7384
7385 7 View Code Duplication
    foreach ($words as $word) {
7386
7387 7
      if (!$word) {
7388 7
        continue;
7389
      }
7390
7391
      if (
7392
          $useExceptions === false
7393 7
          ||
7394
          (
7395
              $useExceptions === true
7396 1
              &&
7397 1
              !in_array($word, $exceptions, true)
7398 1
          )
7399 7
      ) {
7400 7
        $word = self::ucfirst($word, $encoding);
7401 7
      }
7402
7403 7
      $newWords[] = $word;
7404 7
    }
7405
7406 7
    return implode('', $newWords);
7407
  }
7408
7409
  /**
7410
   * Multi decode html entity & fix urlencoded-win1252-chars.
7411
   *
7412
   * e.g:
7413
   * 'test+test'                     => 'test test'
7414
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7415
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7416
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7417
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7418
   * 'Düsseldorf'                   => 'Düsseldorf'
7419
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7420
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7421
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7422
   *
7423
   * @param string $str          <p>The input string.</p>
7424
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7425
   *
7426
   * @return string
7427
   */
7428 1 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7429
  {
7430 1
    $str = (string)$str;
7431
7432 1
    if (!isset($str[0])) {
7433 1
      return '';
7434
    }
7435
7436 1
    $pattern = '/%u([0-9a-f]{3,4})/i';
7437 1
    if (preg_match($pattern, $str)) {
7438 1
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7439 1
    }
7440
7441 1
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7442
7443
    do {
7444 1
      $str_compare = $str;
7445
7446 1
      $str = self::fix_simple_utf8(
7447 1
          urldecode(
7448 1
              self::html_entity_decode(
7449 1
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7450
                  $flags
7451 1
              )
7452 1
          )
7453 1
      );
7454
7455 1
    } while ($multi_decode === true && $str_compare !== $str);
7456
7457 1
    return (string)$str;
7458
  }
7459
7460
  /**
7461
   * Return a array with "urlencoded"-win1252 -> UTF-8
7462
   *
7463
   * @deprecated use the "UTF8::urldecode()" function to decode a string
7464
   *
7465
   * @return array
7466
   */
7467
  public static function urldecode_fix_win1252_chars()
7468
  {
7469
    return array(
7470
        '%20' => ' ',
7471
        '%21' => '!',
7472
        '%22' => '"',
7473
        '%23' => '#',
7474
        '%24' => '$',
7475
        '%25' => '%',
7476
        '%26' => '&',
7477
        '%27' => "'",
7478
        '%28' => '(',
7479
        '%29' => ')',
7480
        '%2A' => '*',
7481
        '%2B' => '+',
7482
        '%2C' => ',',
7483
        '%2D' => '-',
7484
        '%2E' => '.',
7485
        '%2F' => '/',
7486
        '%30' => '0',
7487
        '%31' => '1',
7488
        '%32' => '2',
7489
        '%33' => '3',
7490
        '%34' => '4',
7491
        '%35' => '5',
7492
        '%36' => '6',
7493
        '%37' => '7',
7494
        '%38' => '8',
7495
        '%39' => '9',
7496
        '%3A' => ':',
7497
        '%3B' => ';',
7498
        '%3C' => '<',
7499
        '%3D' => '=',
7500
        '%3E' => '>',
7501
        '%3F' => '?',
7502
        '%40' => '@',
7503
        '%41' => 'A',
7504
        '%42' => 'B',
7505
        '%43' => 'C',
7506
        '%44' => 'D',
7507
        '%45' => 'E',
7508
        '%46' => 'F',
7509
        '%47' => 'G',
7510
        '%48' => 'H',
7511
        '%49' => 'I',
7512
        '%4A' => 'J',
7513
        '%4B' => 'K',
7514
        '%4C' => 'L',
7515
        '%4D' => 'M',
7516
        '%4E' => 'N',
7517
        '%4F' => 'O',
7518
        '%50' => 'P',
7519
        '%51' => 'Q',
7520
        '%52' => 'R',
7521
        '%53' => 'S',
7522
        '%54' => 'T',
7523
        '%55' => 'U',
7524
        '%56' => 'V',
7525
        '%57' => 'W',
7526
        '%58' => 'X',
7527
        '%59' => 'Y',
7528
        '%5A' => 'Z',
7529
        '%5B' => '[',
7530
        '%5C' => '\\',
7531
        '%5D' => ']',
7532
        '%5E' => '^',
7533
        '%5F' => '_',
7534
        '%60' => '`',
7535
        '%61' => 'a',
7536
        '%62' => 'b',
7537
        '%63' => 'c',
7538
        '%64' => 'd',
7539
        '%65' => 'e',
7540
        '%66' => 'f',
7541
        '%67' => 'g',
7542
        '%68' => 'h',
7543
        '%69' => 'i',
7544
        '%6A' => 'j',
7545
        '%6B' => 'k',
7546
        '%6C' => 'l',
7547
        '%6D' => 'm',
7548
        '%6E' => 'n',
7549
        '%6F' => 'o',
7550
        '%70' => 'p',
7551
        '%71' => 'q',
7552
        '%72' => 'r',
7553
        '%73' => 's',
7554
        '%74' => 't',
7555
        '%75' => 'u',
7556
        '%76' => 'v',
7557
        '%77' => 'w',
7558
        '%78' => 'x',
7559
        '%79' => 'y',
7560
        '%7A' => 'z',
7561
        '%7B' => '{',
7562
        '%7C' => '|',
7563
        '%7D' => '}',
7564
        '%7E' => '~',
7565
        '%7F' => '',
7566
        '%80' => '`',
7567
        '%81' => '',
7568
        '%82' => '‚',
7569
        '%83' => 'ƒ',
7570
        '%84' => '„',
7571
        '%85' => '…',
7572
        '%86' => '†',
7573
        '%87' => '‡',
7574
        '%88' => 'ˆ',
7575
        '%89' => '‰',
7576
        '%8A' => 'Š',
7577
        '%8B' => '‹',
7578
        '%8C' => 'Œ',
7579
        '%8D' => '',
7580
        '%8E' => 'Ž',
7581
        '%8F' => '',
7582
        '%90' => '',
7583
        '%91' => '‘',
7584
        '%92' => '’',
7585
        '%93' => '“',
7586
        '%94' => '”',
7587
        '%95' => '•',
7588
        '%96' => '–',
7589
        '%97' => '—',
7590
        '%98' => '˜',
7591
        '%99' => '™',
7592
        '%9A' => 'š',
7593
        '%9B' => '›',
7594
        '%9C' => 'œ',
7595
        '%9D' => '',
7596
        '%9E' => 'ž',
7597
        '%9F' => 'Ÿ',
7598
        '%A0' => '',
7599
        '%A1' => '¡',
7600
        '%A2' => '¢',
7601
        '%A3' => '£',
7602
        '%A4' => '¤',
7603
        '%A5' => '¥',
7604
        '%A6' => '¦',
7605
        '%A7' => '§',
7606
        '%A8' => '¨',
7607
        '%A9' => '©',
7608
        '%AA' => 'ª',
7609
        '%AB' => '«',
7610
        '%AC' => '¬',
7611
        '%AD' => '',
7612
        '%AE' => '®',
7613
        '%AF' => '¯',
7614
        '%B0' => '°',
7615
        '%B1' => '±',
7616
        '%B2' => '²',
7617
        '%B3' => '³',
7618
        '%B4' => '´',
7619
        '%B5' => 'µ',
7620
        '%B6' => '¶',
7621
        '%B7' => '·',
7622
        '%B8' => '¸',
7623
        '%B9' => '¹',
7624
        '%BA' => 'º',
7625
        '%BB' => '»',
7626
        '%BC' => '¼',
7627
        '%BD' => '½',
7628
        '%BE' => '¾',
7629
        '%BF' => '¿',
7630
        '%C0' => 'À',
7631
        '%C1' => 'Á',
7632
        '%C2' => 'Â',
7633
        '%C3' => 'Ã',
7634
        '%C4' => 'Ä',
7635
        '%C5' => 'Å',
7636
        '%C6' => 'Æ',
7637
        '%C7' => 'Ç',
7638
        '%C8' => 'È',
7639
        '%C9' => 'É',
7640
        '%CA' => 'Ê',
7641
        '%CB' => 'Ë',
7642
        '%CC' => 'Ì',
7643
        '%CD' => 'Í',
7644
        '%CE' => 'Î',
7645
        '%CF' => 'Ï',
7646
        '%D0' => 'Ð',
7647
        '%D1' => 'Ñ',
7648
        '%D2' => 'Ò',
7649
        '%D3' => 'Ó',
7650
        '%D4' => 'Ô',
7651
        '%D5' => 'Õ',
7652
        '%D6' => 'Ö',
7653
        '%D7' => '×',
7654
        '%D8' => 'Ø',
7655
        '%D9' => 'Ù',
7656
        '%DA' => 'Ú',
7657
        '%DB' => 'Û',
7658
        '%DC' => 'Ü',
7659
        '%DD' => 'Ý',
7660
        '%DE' => 'Þ',
7661
        '%DF' => 'ß',
7662
        '%E0' => 'à',
7663
        '%E1' => 'á',
7664
        '%E2' => 'â',
7665
        '%E3' => 'ã',
7666
        '%E4' => 'ä',
7667
        '%E5' => 'å',
7668
        '%E6' => 'æ',
7669
        '%E7' => 'ç',
7670
        '%E8' => 'è',
7671
        '%E9' => 'é',
7672
        '%EA' => 'ê',
7673
        '%EB' => 'ë',
7674
        '%EC' => 'ì',
7675
        '%ED' => 'í',
7676
        '%EE' => 'î',
7677
        '%EF' => 'ï',
7678
        '%F0' => 'ð',
7679
        '%F1' => 'ñ',
7680
        '%F2' => 'ò',
7681
        '%F3' => 'ó',
7682
        '%F4' => 'ô',
7683
        '%F5' => 'õ',
7684
        '%F6' => 'ö',
7685
        '%F7' => '÷',
7686
        '%F8' => 'ø',
7687
        '%F9' => 'ù',
7688
        '%FA' => 'ú',
7689
        '%FB' => 'û',
7690
        '%FC' => 'ü',
7691
        '%FD' => 'ý',
7692
        '%FE' => 'þ',
7693
        '%FF' => 'ÿ',
7694
    );
7695
  }
7696
7697
  /**
7698
   * Decodes an UTF-8 string to ISO-8859-1.
7699
   *
7700
   * @param string $str <p>The input string.</p>
7701
   *
7702
   * @return string
7703
   */
7704 6
  public static function utf8_decode($str)
7705
  {
7706
    // init
7707 6
    $str = (string)$str;
7708
7709 6
    if (!isset($str[0])) {
7710 3
      return '';
7711
    }
7712
7713 6
    $str = (string)self::to_utf8($str);
7714
7715 6
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7716 6
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7717
7718 6
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7719 1
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7720 1
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7721 1
    }
7722
7723
    /** @noinspection PhpInternalEntityUsedInspection */
7724 6
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7725
7726 6
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7727
      self::checkForSupport();
7728
    }
7729
7730 6 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7731
      $len = \mb_strlen($str, '8BIT');
7732
    } else {
7733 6
      $len = strlen($str);
7734
    }
7735
7736
    /** @noinspection ForeachInvariantsInspection */
7737 6
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7738 6
      switch ($str[$i] & "\xF0") {
7739 6
        case "\xC0":
7740 6
        case "\xD0":
7741 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7742 6
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7743 6
          break;
7744
7745
        /** @noinspection PhpMissingBreakStatementInspection */
7746 6
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7747
          ++$i;
7748 6
        case "\xE0":
7749 4
          $str[$j] = '?';
7750 4
          $i += 2;
7751 4
          break;
7752
7753 6
        default:
7754 6
          $str[$j] = $str[$i];
7755 6
      }
7756 6
    }
7757
7758 6
    return (string)self::substr($str, 0, $j, '8BIT');
7759
  }
7760
7761
  /**
7762
   * Encodes an ISO-8859-1 string to UTF-8.
7763
   *
7764
   * @param string $str <p>The input string.</p>
7765
   *
7766
   * @return string
7767
   */
7768 7
  public static function utf8_encode($str)
7769
  {
7770
    // init
7771 7
    $str = (string)$str;
7772
7773 7
    if (!isset($str[0])) {
7774 7
      return '';
7775
    }
7776
7777 7
    $strTmp = \utf8_encode($str);
7778 7
    if ($strTmp === false) {
7779
      return '';
7780
    }
7781
7782 7
    $str = (string)$strTmp;
7783 7
    if (false === strpos($str, "\xC2")) {
7784 3
      return $str;
7785
    }
7786
7787 6
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7788 6
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7789
7790 6
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7791 1
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7792 1
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7793 1
    }
7794
7795 6
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7796
  }
7797
7798
  /**
7799
   * fix -> utf8-win1252 chars
7800
   *
7801
   * @param string $str <p>The input string.</p>
7802
   *
7803
   * @return string
7804
   *
7805
   * @deprecated use "UTF8::fix_simple_utf8()"
7806
   */
7807
  public static function utf8_fix_win1252_chars($str)
7808
  {
7809
    return self::fix_simple_utf8($str);
7810
  }
7811
7812
  /**
7813
   * Returns an array with all utf8 whitespace characters.
7814
   *
7815
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7816
   *
7817
   * @author: Derek E. [email protected]
7818
   *
7819
   * @return array <p>
7820
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7821
   *               as defined in above URL.
7822
   *               </p>
7823
   */
7824 1
  public static function whitespace_table()
7825
  {
7826 1
    return self::$WHITESPACE_TABLE;
7827
  }
7828
7829
  /**
7830
   * Limit the number of words in a string.
7831
   *
7832
   * @param string $str      <p>The input string.</p>
7833
   * @param int    $limit    <p>The limit of words as integer.</p>
7834
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7835
   *
7836
   * @return string
7837
   */
7838 1
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7839
  {
7840 1
    $str = (string)$str;
7841
7842 1
    if (!isset($str[0])) {
7843 1
      return '';
7844
    }
7845
7846
    // init
7847 1
    $limit = (int)$limit;
7848
7849 1
    if ($limit < 1) {
7850 1
      return '';
7851
    }
7852
7853 1
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7854
7855
    if (
7856 1
        !isset($matches[0])
7857 1
        ||
7858 1
        self::strlen($str) === self::strlen($matches[0])
7859 1
    ) {
7860 1
      return $str;
7861
    }
7862
7863 1
    return self::rtrim($matches[0]) . $strAddOn;
7864
  }
7865
7866
  /**
7867
   * Wraps a string to a given number of characters
7868
   *
7869
   * @link  http://php.net/manual/en/function.wordwrap.php
7870
   *
7871
   * @param string $str   <p>The input string.</p>
7872
   * @param int    $width [optional] <p>The column width.</p>
7873
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7874
   * @param bool   $cut   [optional] <p>
7875
   *                      If the cut is set to true, the string is
7876
   *                      always wrapped at or before the specified width. So if you have
7877
   *                      a word that is larger than the given width, it is broken apart.
7878
   *                      </p>
7879
   *
7880
   * @return string <p>The given string wrapped at the specified column.</p>
7881
   */
7882 10
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7883
  {
7884 10
    $str = (string)$str;
7885 10
    $break = (string)$break;
7886
7887 10
    if (!isset($str[0], $break[0])) {
7888 3
      return '';
7889
    }
7890
7891 8
    $w = '';
7892 8
    $strSplit = explode($break, $str);
7893 8
    $count = count($strSplit);
7894
7895 8
    $chars = array();
7896
    /** @noinspection ForeachInvariantsInspection */
7897 8
    for ($i = 0; $i < $count; ++$i) {
7898
7899 8
      if ($i) {
7900 1
        $chars[] = $break;
7901 1
        $w .= '#';
7902 1
      }
7903
7904 8
      $c = $strSplit[$i];
7905 8
      unset($strSplit[$i]);
7906
7907 8
      foreach (self::split($c) as $c) {
7908 8
        $chars[] = $c;
7909 8
        $w .= ' ' === $c ? ' ' : '?';
7910 8
      }
7911 8
    }
7912
7913 8
    $strReturn = '';
7914 8
    $j = 0;
7915 8
    $b = $i = -1;
7916 8
    $w = wordwrap($w, $width, '#', $cut);
7917
7918 8
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7919 6
      for (++$i; $i < $b; ++$i) {
7920 6
        $strReturn .= $chars[$j];
7921 6
        unset($chars[$j++]);
7922 6
      }
7923
7924 6
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7925 3
        unset($chars[$j++]);
7926 3
      }
7927
7928 6
      $strReturn .= $break;
7929 6
    }
7930
7931 8
    return $strReturn . implode('', $chars);
7932
  }
7933
7934
  /**
7935
   * Returns an array of Unicode White Space characters.
7936
   *
7937
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7938
   */
7939 1
  public static function ws()
7940
  {
7941 1
    return self::$WHITESPACE;
7942
  }
7943
7944
}
7945