Completed
Push — master ( bbaf02...7256af )
by Lars
02:57
created

UTF8::ord()   D

Complexity

Conditions 14
Paths 114

Size

Total Lines 48
Code Lines 24

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 13
CRAP Score 14

Importance

Changes 0
Metric Value
dl 0
loc 48
ccs 13
cts 13
cp 1
rs 4.829
c 0
b 0
f 0
cc 14
eloc 24
nc 114
nop 2
crap 14

How to fix   Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace voku\helper;
4
5
/**
6
 * UTF8-Helper-Class
7
 *
8
 * @package voku\helper
9
 */
10
final class UTF8
11
{
12
  // (CRLF|([ZWNJ-ZWJ]|T+|L*(LV?V+|LV|LVT)T*|L+|[^Control])[Extend]*|[Control])
13
  // This regular expression is a work around for http://bugs.exim.org/1279
14
  const GRAPHEME_CLUSTER_RX = '(?:\r\n|(?:[ -~\x{200C}\x{200D}]|[ᆨ-ᇹ]+|[ᄀ-ᅟ]*(?:[가개갸걔거게겨계고과괘괴교구궈궤귀규그긔기까깨꺄꺠꺼께껴꼐꼬꽈꽤꾀꾜꾸꿔꿰뀌뀨끄끠끼나내냐냬너네녀녜노놔놰뇌뇨누눠눼뉘뉴느늬니다대댜댸더데뎌뎨도돠돼되됴두둬뒈뒤듀드듸디따때땨떄떠떼뗘뗴또똬뙈뙤뚀뚜뚸뛔뛰뜌뜨띄띠라래랴럐러레려례로롸뢔뢰료루뤄뤠뤼류르릐리마매먀먜머메며몌모뫄뫠뫼묘무뭐뭬뮈뮤므믜미바배뱌뱨버베벼볘보봐봬뵈뵤부붜붸뷔뷰브븨비빠빼뺘뺴뻐뻬뼈뼤뽀뽜뽸뾔뾰뿌뿨쀄쀠쀼쁘쁴삐사새샤섀서세셔셰소솨쇄쇠쇼수숴쉐쉬슈스싀시싸쌔쌰썌써쎄쎠쎼쏘쏴쐐쐬쑈쑤쒀쒜쒸쓔쓰씌씨아애야얘어에여예오와왜외요우워웨위유으의이자재쟈쟤저제져졔조좌좨죄죠주줘줴쥐쥬즈즤지짜째쨔쨰쩌쩨쪄쪠쪼쫘쫴쬐쬬쭈쭤쮀쮜쮸쯔쯰찌차채챠챼처체쳐쳬초촤쵀최쵸추춰췌취츄츠츼치카캐캬컈커케켜켸코콰쾌쾨쿄쿠쿼퀘퀴큐크킈키타태탸턔터테텨톄토톼퇘퇴툐투퉈퉤튀튜트틔티파패퍄퍠퍼페펴폐포퐈퐤푀표푸풔풰퓌퓨프픠피하해햐햬허헤혀혜호화홰회효후훠훼휘휴흐희히]?[ᅠ-ᆢ]+|[가-힣])[ᆨ-ᇹ]*|[ᄀ-ᅟ]+|[^\p{Cc}\p{Cf}\p{Zl}\p{Zp}])[\p{Mn}\p{Me}\x{09BE}\x{09D7}\x{0B3E}\x{0B57}\x{0BBE}\x{0BD7}\x{0CC2}\x{0CD5}\x{0CD6}\x{0D3E}\x{0D57}\x{0DCF}\x{0DDF}\x{200C}\x{200D}\x{1D165}\x{1D16E}-\x{1D172}]*|[\p{Cc}\p{Cf}\p{Zl}\p{Zp}])';
15
16
  /**
17
   * @var array
18
   */
19
  private static $WIN1252_TO_UTF8 = array(
20
      128 => "\xe2\x82\xac", // EURO SIGN
21
      130 => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
22
      131 => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
23
      132 => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
24
      133 => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
25
      134 => "\xe2\x80\xa0", // DAGGER
26
      135 => "\xe2\x80\xa1", // DOUBLE DAGGER
27
      136 => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
28
      137 => "\xe2\x80\xb0", // PER MILLE SIGN
29
      138 => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
30
      139 => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
31
      140 => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
32
      142 => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
33
      145 => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
34
      146 => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
35
      147 => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
36
      148 => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
37
      149 => "\xe2\x80\xa2", // BULLET
38
      150 => "\xe2\x80\x93", // EN DASH
39
      151 => "\xe2\x80\x94", // EM DASH
40
      152 => "\xcb\x9c", // SMALL TILDE
41
      153 => "\xe2\x84\xa2", // TRADE MARK SIGN
42
      154 => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
43
      155 => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
44
      156 => "\xc5\x93", // LATIN SMALL LIGATURE OE
45
      158 => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
46
      159 => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
47
      164 => "\xc3\xb1", // ñ
48
      165 => "\xc3\x91", // Ñ
49
  );
50
51
  /**
52
   * @var array
53
   */
54
  private static $CP1252_TO_UTF8 = array(
55
      '€' => '€',
56
      '‚' => '‚',
57
      'ƒ' => 'ƒ',
58
      '„' => '„',
59
      '…' => '…',
60
      '†' => '†',
61
      '‡' => '‡',
62
      'ˆ' => 'ˆ',
63
      '‰' => '‰',
64
      'Š' => 'Š',
65
      '‹' => '‹',
66
      'Œ' => 'Œ',
67
      'Ž' => 'Ž',
68
      '‘' => '‘',
69
      '’' => '’',
70
      '“' => '“',
71
      '”' => '”',
72
      '•' => '•',
73
      '–' => '–',
74
      '—' => '—',
75
      '˜' => '˜',
76
      '™' => '™',
77
      'š' => 'š',
78
      '›' => '›',
79
      'œ' => 'œ',
80
      'ž' => 'ž',
81
      'Ÿ' => 'Ÿ',
82
  );
83
84
  /**
85
   * Bom => Byte-Length
86
   *
87
   * INFO: https://en.wikipedia.org/wiki/Byte_order_mark
88
   *
89
   * @var array
90
   */
91
  private static $BOM = array(
92
      "\xef\xbb\xbf"     => 3, // UTF-8 BOM
93
      ''              => 6, // UTF-8 BOM as "WINDOWS-1252" (one char has [maybe] more then one byte ...)
94
      "\x00\x00\xfe\xff" => 4, // UTF-32 (BE) BOM
95
      '  þÿ'             => 6, // UTF-32 (BE) BOM as "WINDOWS-1252"
96
      "\xff\xfe\x00\x00" => 4, // UTF-32 (LE) BOM
97
      'ÿþ  '             => 6, // UTF-32 (LE) BOM as "WINDOWS-1252"
98
      "\xfe\xff"         => 2, // UTF-16 (BE) BOM
99
      'þÿ'               => 4, // UTF-16 (BE) BOM as "WINDOWS-1252"
100
      "\xff\xfe"         => 2, // UTF-16 (LE) BOM
101
      'ÿþ'               => 4, // UTF-16 (LE) BOM as "WINDOWS-1252"
102
  );
103
104
  /**
105
   * Numeric code point => UTF-8 Character
106
   *
107
   * url: http://www.w3schools.com/charsets/ref_utf_punctuation.asp
108
   *
109
   * @var array
110
   */
111
  private static $WHITESPACE = array(
112
    // NUL Byte
113
    0     => "\x0",
114
    // Tab
115
    9     => "\x9",
116
    // New Line
117
    10    => "\xa",
118
    // Vertical Tab
119
    11    => "\xb",
120
    // Carriage Return
121
    13    => "\xd",
122
    // Ordinary Space
123
    32    => "\x20",
124
    // NO-BREAK SPACE
125
    160   => "\xc2\xa0",
126
    // OGHAM SPACE MARK
127
    5760  => "\xe1\x9a\x80",
128
    // MONGOLIAN VOWEL SEPARATOR
129
    6158  => "\xe1\xa0\x8e",
130
    // EN QUAD
131
    8192  => "\xe2\x80\x80",
132
    // EM QUAD
133
    8193  => "\xe2\x80\x81",
134
    // EN SPACE
135
    8194  => "\xe2\x80\x82",
136
    // EM SPACE
137
    8195  => "\xe2\x80\x83",
138
    // THREE-PER-EM SPACE
139
    8196  => "\xe2\x80\x84",
140
    // FOUR-PER-EM SPACE
141
    8197  => "\xe2\x80\x85",
142
    // SIX-PER-EM SPACE
143
    8198  => "\xe2\x80\x86",
144
    // FIGURE SPACE
145
    8199  => "\xe2\x80\x87",
146
    // PUNCTUATION SPACE
147
    8200  => "\xe2\x80\x88",
148
    // THIN SPACE
149
    8201  => "\xe2\x80\x89",
150
    //HAIR SPACE
151
    8202  => "\xe2\x80\x8a",
152
    // LINE SEPARATOR
153
    8232  => "\xe2\x80\xa8",
154
    // PARAGRAPH SEPARATOR
155
    8233  => "\xe2\x80\xa9",
156
    // NARROW NO-BREAK SPACE
157
    8239  => "\xe2\x80\xaf",
158
    // MEDIUM MATHEMATICAL SPACE
159
    8287  => "\xe2\x81\x9f",
160
    // IDEOGRAPHIC SPACE
161
    12288 => "\xe3\x80\x80",
162
  );
163
164
  /**
165
   * @var array
166
   */
167
  private static $WHITESPACE_TABLE = array(
168
      'SPACE'                     => "\x20",
169
      'NO-BREAK SPACE'            => "\xc2\xa0",
170
      'OGHAM SPACE MARK'          => "\xe1\x9a\x80",
171
      'EN QUAD'                   => "\xe2\x80\x80",
172
      'EM QUAD'                   => "\xe2\x80\x81",
173
      'EN SPACE'                  => "\xe2\x80\x82",
174
      'EM SPACE'                  => "\xe2\x80\x83",
175
      'THREE-PER-EM SPACE'        => "\xe2\x80\x84",
176
      'FOUR-PER-EM SPACE'         => "\xe2\x80\x85",
177
      'SIX-PER-EM SPACE'          => "\xe2\x80\x86",
178
      'FIGURE SPACE'              => "\xe2\x80\x87",
179
      'PUNCTUATION SPACE'         => "\xe2\x80\x88",
180
      'THIN SPACE'                => "\xe2\x80\x89",
181
      'HAIR SPACE'                => "\xe2\x80\x8a",
182
      'LINE SEPARATOR'            => "\xe2\x80\xa8",
183
      'PARAGRAPH SEPARATOR'       => "\xe2\x80\xa9",
184
      'ZERO WIDTH SPACE'          => "\xe2\x80\x8b",
185
      'NARROW NO-BREAK SPACE'     => "\xe2\x80\xaf",
186
      'MEDIUM MATHEMATICAL SPACE' => "\xe2\x81\x9f",
187
      'IDEOGRAPHIC SPACE'         => "\xe3\x80\x80",
188
  );
189
190
  /**
191
   * bidirectional text chars
192
   *
193
   * url: https://www.w3.org/International/questions/qa-bidi-unicode-controls
194
   *
195
   * @var array
196
   */
197
  private static $BIDI_UNI_CODE_CONTROLS_TABLE = array(
198
    // LEFT-TO-RIGHT EMBEDDING (use -> dir = "ltr")
199
    8234 => "\xE2\x80\xAA",
200
    // RIGHT-TO-LEFT EMBEDDING (use -> dir = "rtl")
201
    8235 => "\xE2\x80\xAB",
202
    // POP DIRECTIONAL FORMATTING // (use -> </bdo>)
203
    8236 => "\xE2\x80\xAC",
204
    // LEFT-TO-RIGHT OVERRIDE // (use -> <bdo dir = "ltr">)
205
    8237 => "\xE2\x80\xAD",
206
    // RIGHT-TO-LEFT OVERRIDE // (use -> <bdo dir = "rtl">)
207
    8238 => "\xE2\x80\xAE",
208
    // LEFT-TO-RIGHT ISOLATE // (use -> dir = "ltr")
209
    8294 => "\xE2\x81\xA6",
210
    // RIGHT-TO-LEFT ISOLATE // (use -> dir = "rtl")
211
    8295 => "\xE2\x81\xA7",
212
    // FIRST STRONG ISOLATE // (use -> dir = "auto")
213
    8296 => "\xE2\x81\xA8",
214
    // POP DIRECTIONAL ISOLATE
215
    8297 => "\xE2\x81\xA9",
216
  );
217
218
  /**
219
   * @var array
220
   */
221
  private static $COMMON_CASE_FOLD = array(
222
      'ſ'            => 's',
223
      "\xCD\x85"     => 'ι',
224
      'ς'            => 'σ',
225
      "\xCF\x90"     => 'β',
226
      "\xCF\x91"     => 'θ',
227
      "\xCF\x95"     => 'φ',
228
      "\xCF\x96"     => 'π',
229
      "\xCF\xB0"     => 'κ',
230
      "\xCF\xB1"     => 'ρ',
231
      "\xCF\xB5"     => 'ε',
232
      "\xE1\xBA\x9B" => "\xE1\xB9\xA1",
233
      "\xE1\xBE\xBE" => 'ι',
234
  );
235
236
  /**
237
   * @var array
238
   */
239
  private static $BROKEN_UTF8_FIX = array(
240
      "\xc2\x80" => "\xe2\x82\xac", // EURO SIGN
241
      "\xc2\x82" => "\xe2\x80\x9a", // SINGLE LOW-9 QUOTATION MARK
242
      "\xc2\x83" => "\xc6\x92", // LATIN SMALL LETTER F WITH HOOK
243
      "\xc2\x84" => "\xe2\x80\x9e", // DOUBLE LOW-9 QUOTATION MARK
244
      "\xc2\x85" => "\xe2\x80\xa6", // HORIZONTAL ELLIPSIS
245
      "\xc2\x86" => "\xe2\x80\xa0", // DAGGER
246
      "\xc2\x87" => "\xe2\x80\xa1", // DOUBLE DAGGER
247
      "\xc2\x88" => "\xcb\x86", // MODIFIER LETTER CIRCUMFLEX ACCENT
248
      "\xc2\x89" => "\xe2\x80\xb0", // PER MILLE SIGN
249
      "\xc2\x8a" => "\xc5\xa0", // LATIN CAPITAL LETTER S WITH CARON
250
      "\xc2\x8b" => "\xe2\x80\xb9", // SINGLE LEFT-POINTING ANGLE QUOTE
251
      "\xc2\x8c" => "\xc5\x92", // LATIN CAPITAL LIGATURE OE
252
      "\xc2\x8e" => "\xc5\xbd", // LATIN CAPITAL LETTER Z WITH CARON
253
      "\xc2\x91" => "\xe2\x80\x98", // LEFT SINGLE QUOTATION MARK
254
      "\xc2\x92" => "\xe2\x80\x99", // RIGHT SINGLE QUOTATION MARK
255
      "\xc2\x93" => "\xe2\x80\x9c", // LEFT DOUBLE QUOTATION MARK
256
      "\xc2\x94" => "\xe2\x80\x9d", // RIGHT DOUBLE QUOTATION MARK
257
      "\xc2\x95" => "\xe2\x80\xa2", // BULLET
258
      "\xc2\x96" => "\xe2\x80\x93", // EN DASH
259
      "\xc2\x97" => "\xe2\x80\x94", // EM DASH
260
      "\xc2\x98" => "\xcb\x9c", // SMALL TILDE
261
      "\xc2\x99" => "\xe2\x84\xa2", // TRADE MARK SIGN
262
      "\xc2\x9a" => "\xc5\xa1", // LATIN SMALL LETTER S WITH CARON
263
      "\xc2\x9b" => "\xe2\x80\xba", // SINGLE RIGHT-POINTING ANGLE QUOTE
264
      "\xc2\x9c" => "\xc5\x93", // LATIN SMALL LIGATURE OE
265
      "\xc2\x9e" => "\xc5\xbe", // LATIN SMALL LETTER Z WITH CARON
266
      "\xc2\x9f" => "\xc5\xb8", // LATIN CAPITAL LETTER Y WITH DIAERESIS
267
      'ü'       => 'ü',
268
      'ä'       => 'ä',
269
      'ö'       => 'ö',
270
      'Ö'       => 'Ö',
271
      'ß'       => 'ß',
272
      'Ã '       => 'à',
273
      'á'       => 'á',
274
      'â'       => 'â',
275
      'ã'       => 'ã',
276
      'ù'       => 'ù',
277
      'ú'       => 'ú',
278
      'û'       => 'û',
279
      'Ù'       => 'Ù',
280
      'Ú'       => 'Ú',
281
      'Û'       => 'Û',
282
      'Ü'       => 'Ü',
283
      'ò'       => 'ò',
284
      'ó'       => 'ó',
285
      'ô'       => 'ô',
286
      'è'       => 'è',
287
      'é'       => 'é',
288
      'ê'       => 'ê',
289
      'ë'       => 'ë',
290
      'À'       => 'À',
291
      'Á'       => 'Á',
292
      'Â'       => 'Â',
293
      'Ã'       => 'Ã',
294
      'Ä'       => 'Ä',
295
      'Ã…'       => 'Å',
296
      'Ç'       => 'Ç',
297
      'È'       => 'È',
298
      'É'       => 'É',
299
      'Ê'       => 'Ê',
300
      'Ë'       => 'Ë',
301
      'ÃŒ'       => 'Ì',
302
      'Í'       => 'Í',
303
      'ÃŽ'       => 'Î',
304
      'Ï'       => 'Ï',
305
      'Ñ'       => 'Ñ',
306
      'Ã’'       => 'Ò',
307
      'Ó'       => 'Ó',
308
      'Ô'       => 'Ô',
309
      'Õ'       => 'Õ',
310
      'Ø'       => 'Ø',
311
      'Ã¥'       => 'å',
312
      'æ'       => 'æ',
313
      'ç'       => 'ç',
314
      'ì'       => 'ì',
315
      'í'       => 'í',
316
      'î'       => 'î',
317
      'ï'       => 'ï',
318
      'ð'       => 'ð',
319
      'ñ'       => 'ñ',
320
      'õ'       => 'õ',
321
      'ø'       => 'ø',
322
      'ý'       => 'ý',
323
      'ÿ'       => 'ÿ',
324
      '€'      => '€',
325
      '’'      => '’',
326
  );
327
328
  /**
329
   * @var array
330
   */
331
  private static $UTF8_TO_WIN1252 = array(
332
      "\xe2\x82\xac" => "\x80", // EURO SIGN
333
      "\xe2\x80\x9a" => "\x82", // SINGLE LOW-9 QUOTATION MARK
334
      "\xc6\x92"     => "\x83", // LATIN SMALL LETTER F WITH HOOK
335
      "\xe2\x80\x9e" => "\x84", // DOUBLE LOW-9 QUOTATION MARK
336
      "\xe2\x80\xa6" => "\x85", // HORIZONTAL ELLIPSIS
337
      "\xe2\x80\xa0" => "\x86", // DAGGER
338
      "\xe2\x80\xa1" => "\x87", // DOUBLE DAGGER
339
      "\xcb\x86"     => "\x88", // MODIFIER LETTER CIRCUMFLEX ACCENT
340
      "\xe2\x80\xb0" => "\x89", // PER MILLE SIGN
341
      "\xc5\xa0"     => "\x8a", // LATIN CAPITAL LETTER S WITH CARON
342
      "\xe2\x80\xb9" => "\x8b", // SINGLE LEFT-POINTING ANGLE QUOTE
343
      "\xc5\x92"     => "\x8c", // LATIN CAPITAL LIGATURE OE
344
      "\xc5\xbd"     => "\x8e", // LATIN CAPITAL LETTER Z WITH CARON
345
      "\xe2\x80\x98" => "\x91", // LEFT SINGLE QUOTATION MARK
346
      "\xe2\x80\x99" => "\x92", // RIGHT SINGLE QUOTATION MARK
347
      "\xe2\x80\x9c" => "\x93", // LEFT DOUBLE QUOTATION MARK
348
      "\xe2\x80\x9d" => "\x94", // RIGHT DOUBLE QUOTATION MARK
349
      "\xe2\x80\xa2" => "\x95", // BULLET
350
      "\xe2\x80\x93" => "\x96", // EN DASH
351
      "\xe2\x80\x94" => "\x97", // EM DASH
352
      "\xcb\x9c"     => "\x98", // SMALL TILDE
353
      "\xe2\x84\xa2" => "\x99", // TRADE MARK SIGN
354
      "\xc5\xa1"     => "\x9a", // LATIN SMALL LETTER S WITH CARON
355
      "\xe2\x80\xba" => "\x9b", // SINGLE RIGHT-POINTING ANGLE QUOTE
356
      "\xc5\x93"     => "\x9c", // LATIN SMALL LIGATURE OE
357
      "\xc5\xbe"     => "\x9e", // LATIN SMALL LETTER Z WITH CARON
358
      "\xc5\xb8"     => "\x9f", // LATIN CAPITAL LETTER Y WITH DIAERESIS
359
  );
360
361
  /**
362
   * @var array
363
   */
364
  private static $UTF8_MSWORD = array(
365
      "\xc2\xab"     => '"', // « (U+00AB) in UTF-8
366
      "\xc2\xbb"     => '"', // » (U+00BB) in UTF-8
367
      "\xe2\x80\x98" => "'", // ‘ (U+2018) in UTF-8
368
      "\xe2\x80\x99" => "'", // ’ (U+2019) in UTF-8
369
      "\xe2\x80\x9a" => "'", // ‚ (U+201A) in UTF-8
370
      "\xe2\x80\x9b" => "'", // ‛ (U+201B) in UTF-8
371
      "\xe2\x80\x9c" => '"', // “ (U+201C) in UTF-8
372
      "\xe2\x80\x9d" => '"', // ” (U+201D) in UTF-8
373
      "\xe2\x80\x9e" => '"', // „ (U+201E) in UTF-8
374
      "\xe2\x80\x9f" => '"', // ‟ (U+201F) in UTF-8
375
      "\xe2\x80\xb9" => "'", // ‹ (U+2039) in UTF-8
376
      "\xe2\x80\xba" => "'", // › (U+203A) in UTF-8
377
      "\xe2\x80\x93" => '-', // – (U+2013) in UTF-8
378
      "\xe2\x80\x94" => '-', // — (U+2014) in UTF-8
379
      "\xe2\x80\xa6" => '...' // … (U+2026) in UTF-8
380
  );
381
382
  /**
383
   * @var array
384
   */
385
  private static $ICONV_ENCODING = array(
386
      'ANSI_X3.4-1968',
387
      'ANSI_X3.4-1986',
388
      'ASCII',
389
      'CP367',
390
      'IBM367',
391
      'ISO-IR-6',
392
      'ISO646-US',
393
      'ISO_646.IRV:1991',
394
      'US',
395
      'US-ASCII',
396
      'CSASCII',
397
      'UTF-8',
398
      'ISO-10646-UCS-2',
399
      'UCS-2',
400
      'CSUNICODE',
401
      'UCS-2BE',
402
      'UNICODE-1-1',
403
      'UNICODEBIG',
404
      'CSUNICODE11',
405
      'UCS-2LE',
406
      'UNICODELITTLE',
407
      'ISO-10646-UCS-4',
408
      'UCS-4',
409
      'CSUCS4',
410
      'UCS-4BE',
411
      'UCS-4LE',
412
      'UTF-16',
413
      'UTF-16BE',
414
      'UTF-16LE',
415
      'UTF-32',
416
      'UTF-32BE',
417
      'UTF-32LE',
418
      'UNICODE-1-1-UTF-7',
419
      'UTF-7',
420
      'CSUNICODE11UTF7',
421
      'UCS-2-INTERNAL',
422
      'UCS-2-SWAPPED',
423
      'UCS-4-INTERNAL',
424
      'UCS-4-SWAPPED',
425
      'C99',
426
      'JAVA',
427
      'CP819',
428
      'IBM819',
429
      'ISO-8859-1',
430
      'ISO-IR-100',
431
      'ISO8859-1',
432
      'ISO_8859-1',
433
      'ISO_8859-1:1987',
434
      'L1',
435
      'LATIN1',
436
      'CSISOLATIN1',
437
      'ISO-8859-2',
438
      'ISO-IR-101',
439
      'ISO8859-2',
440
      'ISO_8859-2',
441
      'ISO_8859-2:1987',
442
      'L2',
443
      'LATIN2',
444
      'CSISOLATIN2',
445
      'ISO-8859-3',
446
      'ISO-IR-109',
447
      'ISO8859-3',
448
      'ISO_8859-3',
449
      'ISO_8859-3:1988',
450
      'L3',
451
      'LATIN3',
452
      'CSISOLATIN3',
453
      'ISO-8859-4',
454
      'ISO-IR-110',
455
      'ISO8859-4',
456
      'ISO_8859-4',
457
      'ISO_8859-4:1988',
458
      'L4',
459
      'LATIN4',
460
      'CSISOLATIN4',
461
      'CYRILLIC',
462
      'ISO-8859-5',
463
      'ISO-IR-144',
464
      'ISO8859-5',
465
      'ISO_8859-5',
466
      'ISO_8859-5:1988',
467
      'CSISOLATINCYRILLIC',
468
      'ARABIC',
469
      'ASMO-708',
470
      'ECMA-114',
471
      'ISO-8859-6',
472
      'ISO-IR-127',
473
      'ISO8859-6',
474
      'ISO_8859-6',
475
      'ISO_8859-6:1987',
476
      'CSISOLATINARABIC',
477
      'ECMA-118',
478
      'ELOT_928',
479
      'GREEK',
480
      'GREEK8',
481
      'ISO-8859-7',
482
      'ISO-IR-126',
483
      'ISO8859-7',
484
      'ISO_8859-7',
485
      'ISO_8859-7:1987',
486
      'ISO_8859-7:2003',
487
      'CSISOLATINGREEK',
488
      'HEBREW',
489
      'ISO-8859-8',
490
      'ISO-IR-138',
491
      'ISO8859-8',
492
      'ISO_8859-8',
493
      'ISO_8859-8:1988',
494
      'CSISOLATINHEBREW',
495
      'ISO-8859-9',
496
      'ISO-IR-148',
497
      'ISO8859-9',
498
      'ISO_8859-9',
499
      'ISO_8859-9:1989',
500
      'L5',
501
      'LATIN5',
502
      'CSISOLATIN5',
503
      'ISO-8859-10',
504
      'ISO-IR-157',
505
      'ISO8859-10',
506
      'ISO_8859-10',
507
      'ISO_8859-10:1992',
508
      'L6',
509
      'LATIN6',
510
      'CSISOLATIN6',
511
      'ISO-8859-11',
512
      'ISO8859-11',
513
      'ISO_8859-11',
514
      'ISO-8859-13',
515
      'ISO-IR-179',
516
      'ISO8859-13',
517
      'ISO_8859-13',
518
      'L7',
519
      'LATIN7',
520
      'ISO-8859-14',
521
      'ISO-CELTIC',
522
      'ISO-IR-199',
523
      'ISO8859-14',
524
      'ISO_8859-14',
525
      'ISO_8859-14:1998',
526
      'L8',
527
      'LATIN8',
528
      'ISO-8859-15',
529
      'ISO-IR-203',
530
      'ISO8859-15',
531
      'ISO_8859-15',
532
      'ISO_8859-15:1998',
533
      'LATIN-9',
534
      'ISO-8859-16',
535
      'ISO-IR-226',
536
      'ISO8859-16',
537
      'ISO_8859-16',
538
      'ISO_8859-16:2001',
539
      'L10',
540
      'LATIN10',
541
      'KOI8-R',
542
      'CSKOI8R',
543
      'KOI8-U',
544
      'KOI8-RU',
545
      'CP1250',
546
      'MS-EE',
547
      'WINDOWS-1250',
548
      'CP1251',
549
      'MS-CYRL',
550
      'WINDOWS-1251',
551
      'CP1252',
552
      'MS-ANSI',
553
      'WINDOWS-1252',
554
      'CP1253',
555
      'MS-GREEK',
556
      'WINDOWS-1253',
557
      'CP1254',
558
      'MS-TURK',
559
      'WINDOWS-1254',
560
      'CP1255',
561
      'MS-HEBR',
562
      'WINDOWS-1255',
563
      'CP1256',
564
      'MS-ARAB',
565
      'WINDOWS-1256',
566
      'CP1257',
567
      'WINBALTRIM',
568
      'WINDOWS-1257',
569
      'CP1258',
570
      'WINDOWS-1258',
571
      '850',
572
      'CP850',
573
      'IBM850',
574
      'CSPC850MULTILINGUAL',
575
      '862',
576
      'CP862',
577
      'IBM862',
578
      'CSPC862LATINHEBREW',
579
      '866',
580
      'CP866',
581
      'IBM866',
582
      'CSIBM866',
583
      'MAC',
584
      'MACINTOSH',
585
      'MACROMAN',
586
      'CSMACINTOSH',
587
      'MACCENTRALEUROPE',
588
      'MACICELAND',
589
      'MACCROATIAN',
590
      'MACROMANIA',
591
      'MACCYRILLIC',
592
      'MACUKRAINE',
593
      'MACGREEK',
594
      'MACTURKISH',
595
      'MACHEBREW',
596
      'MACARABIC',
597
      'MACTHAI',
598
      'HP-ROMAN8',
599
      'R8',
600
      'ROMAN8',
601
      'CSHPROMAN8',
602
      'NEXTSTEP',
603
      'ARMSCII-8',
604
      'GEORGIAN-ACADEMY',
605
      'GEORGIAN-PS',
606
      'KOI8-T',
607
      'CP154',
608
      'CYRILLIC-ASIAN',
609
      'PT154',
610
      'PTCP154',
611
      'CSPTCP154',
612
      'KZ-1048',
613
      'RK1048',
614
      'STRK1048-2002',
615
      'CSKZ1048',
616
      'MULELAO-1',
617
      'CP1133',
618
      'IBM-CP1133',
619
      'ISO-IR-166',
620
      'TIS-620',
621
      'TIS620',
622
      'TIS620-0',
623
      'TIS620.2529-1',
624
      'TIS620.2533-0',
625
      'TIS620.2533-1',
626
      'CP874',
627
      'WINDOWS-874',
628
      'VISCII',
629
      'VISCII1.1-1',
630
      'CSVISCII',
631
      'TCVN',
632
      'TCVN-5712',
633
      'TCVN5712-1',
634
      'TCVN5712-1:1993',
635
      'ISO-IR-14',
636
      'ISO646-JP',
637
      'JIS_C6220-1969-RO',
638
      'JP',
639
      'CSISO14JISC6220RO',
640
      'JISX0201-1976',
641
      'JIS_X0201',
642
      'X0201',
643
      'CSHALFWIDTHKATAKANA',
644
      'ISO-IR-87',
645
      'JIS0208',
646
      'JIS_C6226-1983',
647
      'JIS_X0208',
648
      'JIS_X0208-1983',
649
      'JIS_X0208-1990',
650
      'X0208',
651
      'CSISO87JISX0208',
652
      'ISO-IR-159',
653
      'JIS_X0212',
654
      'JIS_X0212-1990',
655
      'JIS_X0212.1990-0',
656
      'X0212',
657
      'CSISO159JISX02121990',
658
      'CN',
659
      'GB_1988-80',
660
      'ISO-IR-57',
661
      'ISO646-CN',
662
      'CSISO57GB1988',
663
      'CHINESE',
664
      'GB_2312-80',
665
      'ISO-IR-58',
666
      'CSISO58GB231280',
667
      'CN-GB-ISOIR165',
668
      'ISO-IR-165',
669
      'ISO-IR-149',
670
      'KOREAN',
671
      'KSC_5601',
672
      'KS_C_5601-1987',
673
      'KS_C_5601-1989',
674
      'CSKSC56011987',
675
      'EUC-JP',
676
      'EUCJP',
677
      'EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE',
678
      'CSEUCPKDFMTJAPANESE',
679
      'MS_KANJI',
680
      'SHIFT-JIS',
681
      'SHIFT_JIS',
682
      'SJIS',
683
      'CSSHIFTJIS',
684
      'CP932',
685
      'ISO-2022-JP',
686
      'CSISO2022JP',
687
      'ISO-2022-JP-1',
688
      'ISO-2022-JP-2',
689
      'CSISO2022JP2',
690
      'CN-GB',
691
      'EUC-CN',
692
      'EUCCN',
693
      'GB2312',
694
      'CSGB2312',
695
      'GBK',
696
      'CP936',
697
      'MS936',
698
      'WINDOWS-936',
699
      'GB18030',
700
      'ISO-2022-CN',
701
      'CSISO2022CN',
702
      'ISO-2022-CN-EXT',
703
      'HZ',
704
      'HZ-GB-2312',
705
      'EUC-TW',
706
      'EUCTW',
707
      'CSEUCTW',
708
      'BIG-5',
709
      'BIG-FIVE',
710
      'BIG5',
711
      'BIGFIVE',
712
      'CN-BIG5',
713
      'CSBIG5',
714
      'CP950',
715
      'BIG5-HKSCS:1999',
716
      'BIG5-HKSCS:2001',
717
      'BIG5-HKSCS',
718
      'BIG5-HKSCS:2004',
719
      'BIG5HKSCS',
720
      'EUC-KR',
721
      'EUCKR',
722
      'CSEUCKR',
723
      'CP949',
724
      'UHC',
725
      'CP1361',
726
      'JOHAB',
727
      'ISO-2022-KR',
728
      'CSISO2022KR',
729
      'CP856',
730
      'CP922',
731
      'CP943',
732
      'CP1046',
733
      'CP1124',
734
      'CP1129',
735
      'CP1161',
736
      'IBM-1161',
737
      'IBM1161',
738
      'CSIBM1161',
739
      'CP1162',
740
      'IBM-1162',
741
      'IBM1162',
742
      'CSIBM1162',
743
      'CP1163',
744
      'IBM-1163',
745
      'IBM1163',
746
      'CSIBM1163',
747
      'DEC-KANJI',
748
      'DEC-HANYU',
749
      '437',
750
      'CP437',
751
      'IBM437',
752
      'CSPC8CODEPAGE437',
753
      'CP737',
754
      'CP775',
755
      'IBM775',
756
      'CSPC775BALTIC',
757
      '852',
758
      'CP852',
759
      'IBM852',
760
      'CSPCP852',
761
      'CP853',
762
      '855',
763
      'CP855',
764
      'IBM855',
765
      'CSIBM855',
766
      '857',
767
      'CP857',
768
      'IBM857',
769
      'CSIBM857',
770
      'CP858',
771
      '860',
772
      'CP860',
773
      'IBM860',
774
      'CSIBM860',
775
      '861',
776
      'CP-IS',
777
      'CP861',
778
      'IBM861',
779
      'CSIBM861',
780
      '863',
781
      'CP863',
782
      'IBM863',
783
      'CSIBM863',
784
      'CP864',
785
      'IBM864',
786
      'CSIBM864',
787
      '865',
788
      'CP865',
789
      'IBM865',
790
      'CSIBM865',
791
      '869',
792
      'CP-GR',
793
      'CP869',
794
      'IBM869',
795
      'CSIBM869',
796
      'CP1125',
797
      'EUC-JISX0213',
798
      'SHIFT_JISX0213',
799
      'ISO-2022-JP-3',
800
      'BIG5-2003',
801
      'ISO-IR-230',
802
      'TDS565',
803
      'ATARI',
804
      'ATARIST',
805
      'RISCOS-LATIN1',
806
  );
807
808
  /**
809
   * @var array
810
   */
811
  private static $SUPPORT = array();
812
813
  /**
814
   * __construct()
815
   */
816 1
  public function __construct()
817
  {
818 1
    self::checkForSupport();
819 1
  }
820
821
  /**
822
   * Return the character at the specified position: $str[1] like functionality.
823
   *
824
   * @param string $str <p>A UTF-8 string.</p>
825
   * @param int    $pos <p>The position of character to return.</p>
826
   *
827
   * @return string <p>Single Multi-Byte character.</p>
828
   */
829 3
  public static function access($str, $pos)
830
  {
831 3
    $str = (string)$str;
832
833 3
    if (!isset($str[0])) {
834 1
      return '';
835
    }
836
837 3
    $pos = (int)$pos;
838
839 3
    if ($pos < 0) {
840
      return '';
841
    }
842
843 3
    return (string)self::substr($str, $pos, 1);
844
  }
845
846
  /**
847
   * Prepends UTF-8 BOM character to the string and returns the whole string.
848
   *
849
   * INFO: If BOM already existed there, the Input string is returned.
850
   *
851
   * @param string $str <p>The input string.</p>
852
   *
853
   * @return string <p>The output string that contains BOM.</p>
854
   */
855 1
  public static function add_bom_to_string($str)
856
  {
857 1
    if (self::string_has_bom($str) === false) {
858 1
      $str = self::bom() . $str;
859 1
    }
860
861 1
    return $str;
862
  }
863
864
  /**
865
   * Convert binary into an string.
866
   *
867
   * @param mixed $bin 1|0
868
   *
869
   * @return string
870
   */
871 1
  public static function binary_to_str($bin)
872
  {
873 1
    if (!isset($bin[0])) {
874
      return '';
875
    }
876
877 1
    return pack('H*', base_convert($bin, 2, 16));
878
  }
879
880
  /**
881
   * Returns the UTF-8 Byte Order Mark Character.
882
   *
883
   * INFO: take a look at UTF8::$bom for e.g. UTF-16 and UTF-32 BOM values
884
   *
885
   * @return string UTF-8 Byte Order Mark
886
   */
887 2
  public static function bom()
888
  {
889 2
    return "\xef\xbb\xbf";
890
  }
891
892
  /**
893
   * @alias of UTF8::chr_map()
894
   *
895
   * @see   UTF8::chr_map()
896
   *
897
   * @param string|array $callback
898
   * @param string       $str
899
   *
900
   * @return array
901
   */
902 1
  public static function callback($callback, $str)
903
  {
904 1
    return self::chr_map($callback, $str);
905
  }
906
907
  /**
908
   * This method will auto-detect your server environment for UTF-8 support.
909
   *
910
   * INFO: You don't need to run it manually, it will be triggered if it's needed.
911
   */
912 4
  public static function checkForSupport()
913
  {
914 4
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
915
916 1
      self::$SUPPORT['already_checked_via_portable_utf8'] = true;
917
918
      // http://php.net/manual/en/book.mbstring.php
919 1
      self::$SUPPORT['mbstring'] = self::mbstring_loaded();
920
921
      if (
922 1
          defined('MB_OVERLOAD_STRING')
923 1
          &&
924 1
          ini_get('mbstring.func_overload') & MB_OVERLOAD_STRING
925 1
      ) {
926
        self::$SUPPORT['mbstring_func_overload'] = true;
927
      } else {
928 1
        self::$SUPPORT['mbstring_func_overload'] = false;
929
      }
930
931
      // http://php.net/manual/en/book.iconv.php
932 1
      self::$SUPPORT['iconv'] = self::iconv_loaded();
933
934
      // http://php.net/manual/en/book.intl.php
935 1
      self::$SUPPORT['intl'] = self::intl_loaded();
936 1
      self::$SUPPORT['intl__transliterator_list_ids'] = array();
937
      if (
938 1
          self::$SUPPORT['intl'] === true
939 1
          &&
940 1
          function_exists('transliterator_list_ids') === true
941 1
      ) {
942
        self::$SUPPORT['intl__transliterator_list_ids'] = transliterator_list_ids();
943
      }
944
945
      // http://php.net/manual/en/class.intlchar.php
946 1
      self::$SUPPORT['intlChar'] = self::intlChar_loaded();
947
948
      // http://php.net/manual/en/book.pcre.php
949 1
      self::$SUPPORT['pcre_utf8'] = self::pcre_utf8_support();
950 1
    }
951 4
  }
952
953
  /**
954
   * Generates a UTF-8 encoded character from the given code point.
955
   *
956
   * INFO: opposite to UTF8::ord()
957
   *
958
   * @param int    $code_point <p>The code point for which to generate a character.</p>
959
   * @param string $encoding   [optional] <p>Default is UTF-8</p>
960
   *
961
   * @return string|null <p>Multi-Byte character, returns null on failure or empty input.</p>
962
   */
963 10
  public static function chr($code_point, $encoding = 'UTF-8')
964
  {
965 10
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
966
      self::checkForSupport();
967
    }
968
969 10
    if ($encoding !== 'UTF-8') {
970 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
971 10
    } elseif (self::$SUPPORT['intlChar'] === true) {
972
      return \IntlChar::chr($code_point);
973
    }
974
975
    // check type of code_point, only if there is no support for "\IntlChar"
976 10
    $i = (int)$code_point;
977 10
    if ($i !== $code_point) {
978 1
      return null;
979
    }
980
981
    // use static cache, only if there is no support for "\IntlChar"
982 10
    static $CHAR_CACHE = array();
983 10
    $cacheKey = $code_point . $encoding;
984 10
    if (isset($CHAR_CACHE[$cacheKey]) === true) {
985 8
      return $CHAR_CACHE[$cacheKey];
986
    }
987
988 9
    if ($code_point <= 0x7F) {
989 7
      $str = self::chr_and_parse_int($code_point);
990 9
    } elseif ($code_point <= 0x7FF) {
991 6
      $str = self::chr_and_parse_int(($code_point >> 6) + 0xC0) .
992 6
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
993 7
    } elseif ($code_point <= 0xFFFF) {
994 7
      $str = self::chr_and_parse_int(($code_point >> 12) + 0xE0) .
995 7
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
996 7
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
997 7
    } else {
998 1
      $str = self::chr_and_parse_int(($code_point >> 18) + 0xF0) .
999 1
             self::chr_and_parse_int((($code_point >> 12) & 0x3F) + 0x80) .
1000 1
             self::chr_and_parse_int((($code_point >> 6) & 0x3F) + 0x80) .
1001 1
             self::chr_and_parse_int(($code_point & 0x3F) + 0x80);
1002
    }
1003
    if ($encoding !== 'UTF-8') {
1004 9
      $str = \mb_convert_encoding($str, $encoding, 'UTF-8');
1005 1
    }
1006 1
    // add into static cache
1007
    $CHAR_CACHE[$cacheKey] = $str;
1008
    return $str;
1009 9
  }
1010
1011 9
  /**
1012
   * @param int $int
1013
   *
1014
   * @return string
1015
   */
1016
  private static function chr_and_parse_int($int)
1017
  {
1018
    return chr((int)$int);
1019 26
  }
1020
1021 26
  /**
1022
   * Applies callback to all characters of a string.
1023
   *
1024
   * @param string|array $callback <p>The callback function.</p>
1025
   * @param string       $str      <p>UTF-8 string to run callback on.</p>
1026
   *
1027
   * @return array <p>The outcome of callback.</p>
1028
   */
1029
  public static function chr_map($callback, $str)
1030
  {
1031
    $chars = self::split($str);
1032 1
1033
    return array_map($callback, $chars);
1034 1
  }
1035
1036 1
  /**
1037
   * Generates an array of byte length of each character of a Unicode string.
1038
   *
1039
   * 1 byte => U+0000  - U+007F
1040
   * 2 byte => U+0080  - U+07FF
1041
   * 3 byte => U+0800  - U+FFFF
1042
   * 4 byte => U+10000 - U+10FFFF
1043
   *
1044
   * @param string $str <p>The original Unicode string.</p>
1045
   *
1046
   * @return array <p>An array of byte lengths of each character.</p>
1047
   */
1048
  public static function chr_size_list($str)
1049
  {
1050
    $str = (string)$str;
1051 4
1052
    if (!isset($str[0])) {
1053 4
      return array();
1054
    }
1055 4
1056 3
    return array_map(
1057
        function ($data) {
1058
          return self::strlen($data, '8BIT');
1059 4
        },
1060
        self::split($str)
1061 4
    );
1062 4
  }
1063 4
1064 4
  /**
1065
   * Get a decimal code representation of a specific character.
1066
   *
1067
   * @param string $char <p>The input character.</p>
1068
   *
1069
   * @return int
1070
   */
1071
  public static function chr_to_decimal($char)
1072
  {
1073
    $char = (string)$char;
1074 2
    $code = self::ord($char[0]);
1075
    $bytes = 1;
1076 2
1077 2
    if (!($code & 0x80)) {
1078 2
      // 0xxxxxxx
1079
      return $code;
1080 2
    }
1081
1082 2
    if (($code & 0xe0) === 0xc0) {
1083
      // 110xxxxx
1084
      $bytes = 2;
1085 2
      $code &= ~0xc0;
1086
    } elseif (($code & 0xf0) === 0xe0) {
1087 2
      // 1110xxxx
1088 2
      $bytes = 3;
1089 2
      $code &= ~0xe0;
1090
    } elseif (($code & 0xf8) === 0xf0) {
1091 2
      // 11110xxx
1092 2
      $bytes = 4;
1093 2
      $code &= ~0xf0;
1094
    }
1095 1
1096 1
    for ($i = 2; $i <= $bytes; $i++) {
1097 1
      // 10xxxxxx
1098
      $code = ($code << 6) + (self::ord($char[$i - 1]) & ~0x80);
1099 2
    }
1100
1101 2
    return $code;
1102 2
  }
1103
1104 2
  /**
1105
   * Get hexadecimal code point (U+xxxx) of a UTF-8 encoded character.
1106
   *
1107
   * @param string $char <p>The input character</p>
1108
   * @param string $pfix [optional]
1109
   *
1110
   * @return string <p>The code point encoded as U+xxxx<p>
1111
   */
1112
  public static function chr_to_hex($char, $pfix = 'U+')
1113
  {
1114
    $char = (string)$char;
1115 1
1116
    if (!isset($char[0])) {
1117 1
      return '';
1118
    }
1119 1
1120 1
    if ($char === '&#0;') {
1121
      $char = '';
1122
    }
1123 1
1124
    return self::int_to_hex(self::ord($char), $pfix);
1125
  }
1126
1127 1
  /**
1128
   * alias for "UTF8::chr_to_decimal()"
1129
   *
1130
   * @see UTF8::chr_to_decimal()
1131
   *
1132
   * @param string $chr
1133
   *
1134
   * @return int
1135
   */
1136
  public static function chr_to_int($chr)
1137
  {
1138
    return self::chr_to_decimal($chr);
1139 1
  }
1140
1141 1
  /**
1142
   * Splits a string into smaller chunks and multiple lines, using the specified line ending character.
1143
   *
1144
   * @param string $body     <p>The original string to be split.</p>
1145
   * @param int    $chunklen [optional] <p>The maximum character length of a chunk.</p>
1146
   * @param string $end      [optional] <p>The character(s) to be inserted at the end of each chunk.</p>
1147
   *
1148
   * @return string <p>The chunked string</p>
1149
   */
1150
  public static function chunk_split($body, $chunklen = 76, $end = "\r\n")
1151
  {
1152
    return implode($end, self::split($body, $chunklen));
1153 1
  }
1154
1155 1
  /**
1156
   * Accepts a string and removes all non-UTF-8 characters from it + extras if needed.
1157
   *
1158
   * @param string $str                     <p>The string to be sanitized.</p>
1159
   * @param bool   $remove_bom              [optional] <p>Set to true, if you need to remove UTF-BOM.</p>
1160
   * @param bool   $normalize_whitespace    [optional] <p>Set to true, if you need to normalize the whitespace.</p>
1161
   * @param bool   $normalize_msword        [optional] <p>Set to true, if you need to normalize MS Word chars e.g.: "…"
1162
   *                                        => "..."</p>
1163
   * @param bool   $keep_non_breaking_space [optional] <p>Set to true, to keep non-breaking-spaces, in combination with
1164
   *                                        $normalize_whitespace</p>
1165
   *
1166
   * @return string <p>Clean UTF-8 encoded string.</p>
1167
   */
1168
  public static function clean($str, $remove_bom = false, $normalize_whitespace = false, $normalize_msword = false, $keep_non_breaking_space = false)
1169
  {
1170
    // http://stackoverflow.com/questions/1401317/remove-non-utf8-characters-from-string
1171 56
    // caused connection reset problem on larger strings
1172
1173
    $regx = '/
1174
      (
1175
        (?: [\x00-\x7F]               # single-byte sequences   0xxxxxxx
1176
        |   [\xC0-\xDF][\x80-\xBF]    # double-byte sequences   110xxxxx 10xxxxxx
1177
        |   [\xE0-\xEF][\x80-\xBF]{2} # triple-byte sequences   1110xxxx 10xxxxxx * 2
1178
        |   [\xF0-\xF7][\x80-\xBF]{3} # quadruple-byte sequence 11110xxx 10xxxxxx * 3
1179
        ){1,100}                      # ...one or more times
1180
      )
1181
    | ( [\x80-\xBF] )                 # invalid byte in range 10000000 - 10111111
1182
    | ( [\xC0-\xFF] )                 # invalid byte in range 11000000 - 11111111
1183
    /x';
1184
    $str = preg_replace($regx, '$1', $str);
1185
1186 56
    $str = self::replace_diamond_question_mark($str, '');
1187 56
    $str = self::remove_invisible_characters($str);
1188
1189 56
    if ($normalize_whitespace === true) {
1190 56
      $str = self::normalize_whitespace($str, $keep_non_breaking_space);
1191
    }
1192 56
1193 36
    if ($normalize_msword === true) {
1194 36
      $str = self::normalize_msword($str);
1195
    }
1196 56
1197 15
    if ($remove_bom === true) {
1198 15
      $str = self::remove_bom($str);
1199
    }
1200 56
1201 35
    return $str;
1202 35
  }
1203
1204 56
  /**
1205
   * Clean-up a and show only printable UTF-8 chars at the end  + fix UTF-8 encoding.
1206
   *
1207
   * @param string $str <p>The input string.</p>
1208
   *
1209
   * @return string
1210
   */
1211 View Code Duplication
  public static function cleanup($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1212
  {
1213
    $str = (string)$str;
1214 21
1215
    if (!isset($str[0])) {
1216 21
      return '';
1217
    }
1218 21
1219 2
    // fixed ISO <-> UTF-8 Errors
1220
    $str = self::fix_simple_utf8($str);
1221
1222
    // remove all none UTF-8 symbols
1223 21
    // && remove diamond question mark (�)
1224
    // && remove remove invisible characters (e.g. "\0")
1225
    // && remove BOM
1226
    // && normalize whitespace chars (but keep non-breaking-spaces)
1227
    $str = self::clean($str, true, true, false, true);
1228
1229
    return (string)$str;
1230 21
  }
1231
1232 21
  /**
1233
   * Accepts a string or a array of strings and returns an array of Unicode code points.
1234
   *
1235
   * INFO: opposite to UTF8::string()
1236
   *
1237
   * @param string|string[] $arg        <p>A UTF-8 encoded string or an array of such strings.</p>
1238
   * @param bool            $u_style    <p>If True, will return code points in U+xxxx format,
1239
   *                                    default, code points will be returned as integers.</p>
1240
   *
1241
   * @return array <p>The array of code points.</p>
1242
   */
1243
  public static function codepoints($arg, $u_style = false)
1244
  {
1245
    if (is_string($arg) === true) {
1246 7
      $arg = self::split($arg);
1247
    }
1248 7
1249 7
    $arg = array_map(
1250 7
        array(
1251
            '\\voku\\helper\\UTF8',
1252 7
            'ord',
1253
        ),
1254 7
        $arg
1255 7
    );
1256 7
1257
    if ($u_style) {
1258 7
      $arg = array_map(
1259
          array(
1260 7
              '\\voku\\helper\\UTF8',
1261 1
              'int_to_hex',
1262
          ),
1263 1
          $arg
1264 1
      );
1265 1
    }
1266
1267 1
    return $arg;
1268 1
  }
1269
1270 7
  /**
1271
   * Returns count of characters used in a string.
1272
   *
1273
   * @param string $str       <p>The input string.</p>
1274
   * @param bool   $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
1275
   *
1276
   * @return array <p>An associative array of Character as keys and
1277
   *               their count as values.</p>
1278
   */
1279
  public static function count_chars($str, $cleanUtf8 = false)
1280
  {
1281
    return array_count_values(self::split($str, 1, $cleanUtf8));
1282 7
  }
1283
1284 7
  /**
1285
   * Converts a int-value into an UTF-8 character.
1286
   *
1287
   * @param mixed $int
1288
   *
1289
   * @return string
1290
   */
1291
  public static function decimal_to_chr($int)
1292
  {
1293
    if (Bootup::is_php('5.4') === true) {
1294 5
      $flags = ENT_QUOTES | ENT_HTML5;
1295
    } else {
1296 5
      $flags = ENT_QUOTES;
1297
    }
1298
1299 5
    return self::html_entity_decode('&#' . $int . ';', $flags);
1300
  }
1301
1302 5
  /**
1303
   * Encode a string with a new charset-encoding.
1304
   *
1305
   * INFO:  The different to "UTF8::utf8_encode()" is that this function, try to fix also broken / double encoding,
1306
   *        so you can call this function also on a UTF-8 String and you don't mess the string.
1307
   *
1308
   * @param string $encoding <p>e.g. 'UTF-16', 'UTF-8', 'ISO-8859-1', etc.</p>
1309
   * @param string $str      <p>The input string</p>
1310
   * @param bool   $force    [optional] <p>Force the new encoding (we try to fix broken / double encoding for
1311
   *                         UTF-8)<br> otherwise we auto-detect the current string-encoding</p>
1312
   *
1313
   * @return string
1314
   */
1315
  public static function encode($encoding, $str, $force = true)
1316
  {
1317
    $str = (string)$str;
1318 11
    $encoding = (string)$encoding;
1319
1320 11
    if (!isset($str[0], $encoding[0])) {
1321 11
      return $str;
1322
    }
1323 11
1324 5
    if ($encoding !== 'UTF-8') {
1325
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
1326
    }
1327 11
1328 2
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1329 2
      self::checkForSupport();
1330
    }
1331 11
1332
    $encodingDetected = self::str_detect_encoding($str);
1333
1334
    if (
1335 11
        $encodingDetected !== false
1336
        &&
1337
        (
1338
            $force === true
1339 11
            ||
1340
            $encodingDetected !== $encoding
1341
        )
1342 11
    ) {
1343
1344 3 View Code Duplication
      if (
1345 11
          $encoding === 'UTF-8'
1346
          &&
1347
          (
1348
              $force === true
1349 11
              || $encodingDetected === 'UTF-8'
1350
              || $encodingDetected === 'WINDOWS-1252'
1351
              || $encodingDetected === 'ISO-8859-1'
1352 11
          )
1353 2
      ) {
1354 2
        return self::to_utf8($str);
1355 2
      }
1356 11
1357 11 View Code Duplication
      if (
1358
          $encoding === 'ISO-8859-1'
1359
          &&
1360
          (
1361
              $force === true
1362 3
              || $encodingDetected === 'ISO-8859-1'
1363
              || $encodingDetected === 'WINDOWS-1252'
1364
              || $encodingDetected === 'UTF-8'
1365 2
          )
1366 1
      ) {
1367 1
        return self::to_iso8859($str);
1368 3
      }
1369 2
1370 View Code Duplication
      if (
1371
          $encoding !== 'UTF-8'
1372
          &&
1373
          $encoding !== 'WINDOWS-1252'
1374 2
          &&
1375
          self::$SUPPORT['mbstring'] === false
1376 2
      ) {
1377 1
        trigger_error('UTF8::encode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
1378 2
      }
1379
1380
      $strEncoded = \mb_convert_encoding(
1381
          $str,
1382 2
          $encoding,
1383 2
          $encodingDetected
1384 2
      );
1385
1386 2
      if ($strEncoded) {
1387
        return $strEncoded;
1388 2
      }
1389 2
    }
1390
1391
    return $str;
1392
  }
1393 1
1394
  /**
1395
   * Reads entire file into a string.
1396
   *
1397
   * WARNING: do not use UTF-8 Option ($convertToUtf8) for binary-files (e.g.: images) !!!
1398
   *
1399
   * @link http://php.net/manual/en/function.file-get-contents.php
1400
   *
1401
   * @param string        $filename      <p>
1402
   *                                     Name of the file to read.
1403
   *                                     </p>
1404
   * @param int|false     $flags         [optional] <p>
1405
   *                                     Prior to PHP 6, this parameter is called
1406
   *                                     use_include_path and is a bool.
1407
   *                                     As of PHP 5 the FILE_USE_INCLUDE_PATH can be used
1408
   *                                     to trigger include path
1409
   *                                     search.
1410
   *                                     </p>
1411
   *                                     <p>
1412
   *                                     The value of flags can be any combination of
1413
   *                                     the following flags (with some restrictions), joined with the
1414
   *                                     binary OR (|)
1415
   *                                     operator.
1416
   *                                     </p>
1417
   *                                     <p>
1418
   *                                     <table>
1419
   *                                     Available flags
1420
   *                                     <tr valign="top">
1421
   *                                     <td>Flag</td>
1422
   *                                     <td>Description</td>
1423
   *                                     </tr>
1424
   *                                     <tr valign="top">
1425
   *                                     <td>
1426
   *                                     FILE_USE_INCLUDE_PATH
1427
   *                                     </td>
1428
   *                                     <td>
1429
   *                                     Search for filename in the include directory.
1430
   *                                     See include_path for more
1431
   *                                     information.
1432
   *                                     </td>
1433
   *                                     </tr>
1434
   *                                     <tr valign="top">
1435
   *                                     <td>
1436
   *                                     FILE_TEXT
1437
   *                                     </td>
1438
   *                                     <td>
1439
   *                                     As of PHP 6, the default encoding of the read
1440
   *                                     data is UTF-8. You can specify a different encoding by creating a
1441
   *                                     custom context or by changing the default using
1442
   *                                     stream_default_encoding. This flag cannot be
1443
   *                                     used with FILE_BINARY.
1444
   *                                     </td>
1445
   *                                     </tr>
1446
   *                                     <tr valign="top">
1447
   *                                     <td>
1448
   *                                     FILE_BINARY
1449
   *                                     </td>
1450
   *                                     <td>
1451
   *                                     With this flag, the file is read in binary mode. This is the default
1452
   *                                     setting and cannot be used with FILE_TEXT.
1453
   *                                     </td>
1454
   *                                     </tr>
1455
   *                                     </table>
1456
   *                                     </p>
1457
   * @param resource|null $context       [optional] <p>
1458
   *                                     A valid context resource created with
1459
   *                                     stream_context_create. If you don't need to use a
1460
   *                                     custom context, you can skip this parameter by &null;.
1461
   *                                     </p>
1462
   * @param int|null $offset             [optional] <p>
1463
   *                                     The offset where the reading starts.
1464
   *                                     </p>
1465
   * @param int|null $maxLength          [optional] <p>
1466
   *                                     Maximum length of data read. The default is to read until end
1467
   *                                     of file is reached.
1468
   *                                     </p>
1469
   * @param int      $timeout            <p>The time in seconds for the timeout.</p>
1470
   *
1471
   * @param boolean  $convertToUtf8      <strong>WARNING!!!</strong> <p>Maybe you can't use this option for e.g. images
1472
   *                                     or pdf, because they used non default utf-8 chars</p>
1473
   *
1474
   * @return string <p>The function returns the read data or false on failure.</p>
1475
   */
1476
  public static function file_get_contents($filename, $flags = null, $context = null, $offset = null, $maxLength = null, $timeout = 10, $convertToUtf8 = true)
1477
  {
1478 3
    // init
1479
    $timeout = (int)$timeout;
1480
    $filename = filter_var($filename, FILTER_SANITIZE_STRING);
1481 3
1482 3
    if ($timeout && $context === null) {
1483
      $context = stream_context_create(
1484 3
          array(
1485 2
              'http' =>
1486
                  array(
1487
                      'timeout' => $timeout,
1488
                  ),
1489 2
          )
1490 2
      );
1491
    }
1492 2
1493 2
    if (!$flags) {
1494
      $flags = false;
1495 3
    }
1496 3
1497 3
    if ($offset === null) {
1498
      $offset = 0;
1499 3
    }
1500 3
1501 3
    if (is_int($maxLength) === true) {
1502
      $data = file_get_contents($filename, $flags, $context, $offset, $maxLength);
1503 3
    } else {
1504 1
      $data = file_get_contents($filename, $flags, $context, $offset);
1505 1
    }
1506 3
1507
    // return false on error
1508
    if ($data === false) {
1509
      return false;
1510 3
    }
1511 1
1512
    if ($convertToUtf8 === true) {
1513
      $data = self::encode('UTF-8', $data, false);
1514 2
      $data = self::cleanup($data);
0 ignored issues
show
Bug introduced by
It seems like $data can also be of type array; however, voku\helper\UTF8::cleanup() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1515 2
    }
1516 2
1517 2
    return $data;
1518
  }
1519 2
1520
  /**
1521
   * Checks if a file starts with BOM (Byte Order Mark) character.
1522
   *
1523
   * @param string $file_path <p>Path to a valid file.</p>
1524
   *
1525
   * @return bool <p><strong>true</strong> if the file has BOM at the start, <strong>false</strong> otherwise.</>
1526
   */
1527
  public static function file_has_bom($file_path)
1528
  {
1529 1
    return self::string_has_bom(file_get_contents($file_path));
1530
  }
1531 1
1532
  /**
1533
   * Normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1534
   *
1535
   * @param mixed  $var
1536
   * @param int    $normalization_form
1537
   * @param string $leading_combining
1538
   *
1539
   * @return mixed
1540
   */
1541
  public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
1542
  {
1543 9
    switch (gettype($var)) {
1544 View Code Duplication
      case 'array':
1545 9
        foreach ($var as $k => $v) {
1546 9
          /** @noinspection AlterInForeachInspection */
1547 3
          $var[$k] = self::filter($v, $normalization_form, $leading_combining);
1548
        }
1549 3
        break;
1550 3 View Code Duplication
      case 'object':
1551 3
        foreach ($var as $k => $v) {
1552 9
          $var->{$k} = self::filter($v, $normalization_form, $leading_combining);
1553 2
        }
1554 2
        break;
1555 2
      case 'string':
0 ignored issues
show
Coding Style introduced by
The case body in a switch statement must start on the line following the statement.

According to the PSR-2, the body of a case statement must start on the line immediately following the case statement.

switch ($expr) {
case "A":
    doSomething(); //right
    break;
case "B":

    doSomethingElse(); //wrong
    break;

}

To learn more about the PSR-2 coding standard, please refer to the PHP-Fig.

Loading history...
1556 2
1557 9
        if (false !== strpos($var, "\r")) {
1558
          // Workaround https://bugs.php.net/65732
1559 8
          $var = str_replace(array("\r\n", "\r"), "\n", $var);
1560
        }
1561 2
1562 2
        if (self::is_ascii($var) === false) {
1563
          /** @noinspection PhpUndefinedClassInspection */
1564 8
          if (\Normalizer::isNormalized($var, $normalization_form)) {
1565
            $n = '-';
1566 8
          } else {
1567 6
            /** @noinspection PhpUndefinedClassInspection */
1568 6
            $n = \Normalizer::normalize($var, $normalization_form);
1569
1570 6
            if (isset($n[0])) {
1571
              $var = $n;
1572 6
            } else {
1573 3
              $var = self::encode('UTF-8', $var, true);
1574 3
            }
1575 5
          }
1576
1577
          if (
1578
              $var[0] >= "\x80"
1579
              &&
1580 8
              isset($n[0], $leading_combining[0])
1581 8
              &&
1582 6
              preg_match('/^\p{Mn}/u', $var)
1583 8
          ) {
1584 5
            // Prevent leading combining chars
1585 8
            // for NFC-safe concatenations.
1586
            $var = $leading_combining . $var;
1587
          }
1588 2
        }
1589 2
1590 8
        break;
1591
    }
1592 8
1593 9
    return $var;
1594
  }
1595 9
1596
  /**
1597
   * "filter_input()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1598
   *
1599
   * Gets a specific external variable by name and optionally filters it
1600
   *
1601
   * @link  http://php.net/manual/en/function.filter-input.php
1602
   *
1603
   * @param int    $type          <p>
1604
   *                              One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1605
   *                              <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1606
   *                              <b>INPUT_ENV</b>.
1607
   *                              </p>
1608
   * @param string $variable_name <p>
1609
   *                              Name of a variable to get.
1610
   *                              </p>
1611
   * @param int    $filter        [optional] <p>
1612
   *                              The ID of the filter to apply. The
1613
   *                              manual page lists the available filters.
1614
   *                              </p>
1615
   * @param mixed  $options       [optional] <p>
1616
   *                              Associative array of options or bitwise disjunction of flags. If filter
1617
   *                              accepts options, flags can be provided in "flags" field of array.
1618
   *                              </p>
1619
   *
1620
   * @return mixed Value of the requested variable on success, <b>FALSE</b> if the filter fails,
1621
   * or <b>NULL</b> if the <i>variable_name</i> variable is not set.
1622
   * If the flag <b>FILTER_NULL_ON_FAILURE</b> is used, it
1623
   * returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter fails.
1624
   * @since 5.2.0
1625
   */
1626 View Code Duplication
  public static function filter_input($type, $variable_name, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1627
  {
1628
    if (4 > func_num_args()) {
1629
      $var = filter_input($type, $variable_name, $filter);
1630
    } else {
1631
      $var = filter_input($type, $variable_name, $filter, $options);
1632
    }
1633
1634
    return self::filter($var);
1635
  }
1636
1637
  /**
1638
   * "filter_input_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1639
   *
1640
   * Gets external variables and optionally filters them
1641
   *
1642
   * @link  http://php.net/manual/en/function.filter-input-array.php
1643
   *
1644
   * @param int   $type       <p>
1645
   *                          One of <b>INPUT_GET</b>, <b>INPUT_POST</b>,
1646
   *                          <b>INPUT_COOKIE</b>, <b>INPUT_SERVER</b>, or
1647
   *                          <b>INPUT_ENV</b>.
1648
   *                          </p>
1649
   * @param mixed $definition [optional] <p>
1650
   *                          An array defining the arguments. A valid key is a string
1651
   *                          containing a variable name and a valid value is either a filter type, or an array
1652
   *                          optionally specifying the filter, flags and options. If the value is an
1653
   *                          array, valid keys are filter which specifies the
1654
   *                          filter type,
1655
   *                          flags which specifies any flags that apply to the
1656
   *                          filter, and options which specifies any options that
1657
   *                          apply to the filter. See the example below for a better understanding.
1658
   *                          </p>
1659
   *                          <p>
1660
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1661
   *                          input array are filtered by this filter.
1662
   *                          </p>
1663
   * @param bool  $add_empty  [optional] <p>
1664
   *                          Add missing keys as <b>NULL</b> to the return value.
1665
   *                          </p>
1666
   *
1667
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1668
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1669
   * the variable is not set. Or if the flag <b>FILTER_NULL_ON_FAILURE</b>
1670
   * is used, it returns <b>FALSE</b> if the variable is not set and <b>NULL</b> if the filter
1671
   * fails.
1672
   * @since 5.2.0
1673
   */
1674 View Code Duplication
  public static function filter_input_array($type, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1675
  {
1676
    if (2 > func_num_args()) {
1677
      $a = filter_input_array($type);
1678
    } else {
1679
      $a = filter_input_array($type, $definition, $add_empty);
1680
    }
1681
1682
    return self::filter($a);
1683
  }
1684
1685
  /**
1686
   * "filter_var()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1687
   *
1688
   * Filters a variable with a specified filter
1689
   *
1690
   * @link  http://php.net/manual/en/function.filter-var.php
1691
   *
1692
   * @param mixed $variable <p>
1693
   *                        Value to filter.
1694
   *                        </p>
1695
   * @param int   $filter   [optional] <p>
1696
   *                        The ID of the filter to apply. The
1697
   *                        manual page lists the available filters.
1698
   *                        </p>
1699
   * @param mixed $options  [optional] <p>
1700
   *                        Associative array of options or bitwise disjunction of flags. If filter
1701
   *                        accepts options, flags can be provided in "flags" field of array. For
1702
   *                        the "callback" filter, callable type should be passed. The
1703
   *                        callback must accept one argument, the value to be filtered, and return
1704
   *                        the value after filtering/sanitizing it.
1705
   *                        </p>
1706
   *                        <p>
1707
   *                        <code>
1708
   *                        // for filters that accept options, use this format
1709
   *                        $options = array(
1710
   *                        'options' => array(
1711
   *                        'default' => 3, // value to return if the filter fails
1712
   *                        // other options here
1713
   *                        'min_range' => 0
1714
   *                        ),
1715
   *                        'flags' => FILTER_FLAG_ALLOW_OCTAL,
1716
   *                        );
1717
   *                        $var = filter_var('0755', FILTER_VALIDATE_INT, $options);
1718
   *                        // for filter that only accept flags, you can pass them directly
1719
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN, FILTER_NULL_ON_FAILURE);
1720
   *                        // for filter that only accept flags, you can also pass as an array
1721
   *                        $var = filter_var('oops', FILTER_VALIDATE_BOOLEAN,
1722
   *                        array('flags' => FILTER_NULL_ON_FAILURE));
1723
   *                        // callback validate filter
1724
   *                        function foo($value)
1725
   *                        {
1726
   *                        // Expected format: Surname, GivenNames
1727
   *                        if (strpos($value, ", ") === false) return false;
1728
   *                        list($surname, $givennames) = explode(", ", $value, 2);
1729
   *                        $empty = (empty($surname) || empty($givennames));
1730
   *                        $notstrings = (!is_string($surname) || !is_string($givennames));
1731
   *                        if ($empty || $notstrings) {
1732
   *                        return false;
1733
   *                        } else {
1734
   *                        return $value;
1735
   *                        }
1736
   *                        }
1737
   *                        $var = filter_var('Doe, Jane Sue', FILTER_CALLBACK, array('options' => 'foo'));
1738
   *                        </code>
1739
   *                        </p>
1740
   *
1741
   * @return mixed the filtered data, or <b>FALSE</b> if the filter fails.
1742
   * @since 5.2.0
1743
   */
1744 View Code Duplication
  public static function filter_var($variable, $filter = FILTER_DEFAULT, $options = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1745
  {
1746 1
    if (3 > func_num_args()) {
1747
      $variable = filter_var($variable, $filter);
1748 1
    } else {
1749 1
      $variable = filter_var($variable, $filter, $options);
1750 1
    }
1751 1
1752
    return self::filter($variable);
1753
  }
1754 1
1755
  /**
1756
   * "filter_var_array()"-wrapper with normalizes to UTF-8 NFC, converting from WINDOWS-1252 when needed.
1757
   *
1758
   * Gets multiple variables and optionally filters them
1759
   *
1760
   * @link  http://php.net/manual/en/function.filter-var-array.php
1761
   *
1762
   * @param array $data       <p>
1763
   *                          An array with string keys containing the data to filter.
1764
   *                          </p>
1765
   * @param mixed $definition [optional] <p>
1766
   *                          An array defining the arguments. A valid key is a string
1767
   *                          containing a variable name and a valid value is either a
1768
   *                          filter type, or an
1769
   *                          array optionally specifying the filter, flags and options.
1770
   *                          If the value is an array, valid keys are filter
1771
   *                          which specifies the filter type,
1772
   *                          flags which specifies any flags that apply to the
1773
   *                          filter, and options which specifies any options that
1774
   *                          apply to the filter. See the example below for a better understanding.
1775
   *                          </p>
1776
   *                          <p>
1777
   *                          This parameter can be also an integer holding a filter constant. Then all values in the
1778
   *                          input array are filtered by this filter.
1779
   *                          </p>
1780
   * @param bool  $add_empty  [optional] <p>
1781
   *                          Add missing keys as <b>NULL</b> to the return value.
1782
   *                          </p>
1783
   *
1784
   * @return mixed An array containing the values of the requested variables on success, or <b>FALSE</b>
1785
   * on failure. An array value will be <b>FALSE</b> if the filter fails, or <b>NULL</b> if
1786
   * the variable is not set.
1787
   * @since 5.2.0
1788
   */
1789 View Code Duplication
  public static function filter_var_array($data, $definition = null, $add_empty = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1790
  {
1791 1
    if (2 > func_num_args()) {
1792
      $a = filter_var_array($data);
1793 1
    } else {
1794 1
      $a = filter_var_array($data, $definition, $add_empty);
1795 1
    }
1796 1
1797
    return self::filter($a);
1798
  }
1799 1
1800
  /**
1801
   * Check if the number of unicode characters are not more than the specified integer.
1802
   *
1803
   * @param string $str      The original string to be checked.
1804
   * @param int    $box_size The size in number of chars to be checked against string.
1805
   *
1806
   * @return bool true if string is less than or equal to $box_size, false otherwise.
1807
   */
1808
  public static function fits_inside($str, $box_size)
1809
  {
1810 1
    return (self::strlen($str) <= $box_size);
1811
  }
1812 1
1813
  /**
1814
   * Try to fix simple broken UTF-8 strings.
1815
   *
1816
   * INFO: Take a look at "UTF8::fix_utf8()" if you need a more advanced fix for broken UTF-8 strings.
1817
   *
1818
   * If you received an UTF-8 string that was converted from Windows-1252 as it was ISO-8859-1
1819
   * (ignoring Windows-1252 chars from 80 to 9F) use this function to fix it.
1820
   * See: http://en.wikipedia.org/wiki/Windows-1252
1821
   *
1822
   * @param string $str <p>The input string</p>
1823
   *
1824
   * @return string
1825
   */
1826 View Code Duplication
  public static function fix_simple_utf8($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
1827
  {
1828 26
    // init
1829
    $str = (string)$str;
1830
1831 26
    if (!isset($str[0])) {
1832
      return '';
1833 26
    }
1834 2
1835
    static $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = null;
1836
    static $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = null;
1837 26
1838 26
    if ($BROKEN_UTF8_TO_UTF8_KEYS_CACHE === null) {
1839
      $BROKEN_UTF8_TO_UTF8_KEYS_CACHE = array_keys(self::$BROKEN_UTF8_FIX);
1840 26
      $BROKEN_UTF8_TO_UTF8_VALUES_CACHE = array_values(self::$BROKEN_UTF8_FIX);
1841 1
    }
1842 1
1843 1
    return str_replace($BROKEN_UTF8_TO_UTF8_KEYS_CACHE, $BROKEN_UTF8_TO_UTF8_VALUES_CACHE, $str);
1844
  }
1845 26
1846
  /**
1847
   * Fix a double (or multiple) encoded UTF8 string.
1848
   *
1849
   * @param string|string[] $str <p>You can use a string or an array of strings.</p>
1850
   *
1851
   * @return string|string[] <p>Will return the fixed input-"array" or
1852
   *                         the fixed input-"string".</p>
1853
   */
1854
  public static function fix_utf8($str)
1855
  {
1856 1
    if (is_array($str) === true) {
1857
1858 1
      /** @noinspection ForeachSourceInspection */
1859
      foreach ($str as $k => $v) {
1860
        /** @noinspection AlterInForeachInspection */
1861 1
        /** @noinspection OffsetOperationsInspection */
1862
        $str[$k] = self::fix_utf8($v);
1863
      }
1864 1
1865 1
      return $str;
1866
    }
1867 1
1868
    $last = '';
1869
    while ($last !== $str) {
1870 1
      $last = $str;
1871 1
      $str = self::to_utf8(
1872 1
          self::utf8_decode($str)
0 ignored issues
show
Bug introduced by
It seems like $str defined by self::to_utf8(self::utf8_decode($str)) on line 1871 can also be of type array; however, voku\helper\UTF8::utf8_decode() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
1873 1
      );
1874 1
    }
1875 1
1876 1
    return $str;
1877
  }
1878 1
1879
  /**
1880
   * Get character of a specific character.
1881
   *
1882
   * @param string $char
1883
   *
1884
   * @return string <p>'RTL' or 'LTR'</p>
1885
   */
1886
  public static function getCharDirection($char)
1887
  {
1888 1
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
1889
      self::checkForSupport();
1890 1
    }
1891
1892
    if (self::$SUPPORT['intlChar'] === true) {
1893
      $tmpReturn = \IntlChar::charDirection($char);
1894 1
1895
      // from "IntlChar"-Class
1896
      $charDirection = array(
1897
          'RTL' => array(1, 13, 14, 15, 21),
1898
          'LTR' => array(0, 11, 12, 20),
1899
      );
1900
1901
      if (in_array($tmpReturn, $charDirection['LTR'], true)) {
1902
        return 'LTR';
1903
      }
1904
1905
      if (in_array($tmpReturn, $charDirection['RTL'], true)) {
1906
        return 'RTL';
1907
      }
1908
    }
1909
1910
    $c = static::chr_to_decimal($char);
1911
1912 1
    if (!(0x5be <= $c && 0x10b7f >= $c)) {
1913
      return 'LTR';
1914 1
    }
1915 1
1916
    if (0x85e >= $c) {
1917
1918 1
      if (0x5be === $c ||
1919
          0x5c0 === $c ||
1920 1
          0x5c3 === $c ||
1921 1
          0x5c6 === $c ||
1922 1
          (0x5d0 <= $c && 0x5ea >= $c) ||
1923 1
          (0x5f0 <= $c && 0x5f4 >= $c) ||
1924 1
          0x608 === $c ||
1925 1
          0x60b === $c ||
1926 1
          0x60d === $c ||
1927 1
          0x61b === $c ||
1928 1
          (0x61e <= $c && 0x64a >= $c) ||
1929 1
          (0x66d <= $c && 0x66f >= $c) ||
1930 1
          (0x671 <= $c && 0x6d5 >= $c) ||
1931 1
          (0x6e5 <= $c && 0x6e6 >= $c) ||
1932 1
          (0x6ee <= $c && 0x6ef >= $c) ||
1933 1
          (0x6fa <= $c && 0x70d >= $c) ||
1934 1
          0x710 === $c ||
1935 1
          (0x712 <= $c && 0x72f >= $c) ||
1936 1
          (0x74d <= $c && 0x7a5 >= $c) ||
1937 1
          0x7b1 === $c ||
1938 1
          (0x7c0 <= $c && 0x7ea >= $c) ||
1939 1
          (0x7f4 <= $c && 0x7f5 >= $c) ||
1940 1
          0x7fa === $c ||
1941 1
          (0x800 <= $c && 0x815 >= $c) ||
1942 1
          0x81a === $c ||
1943 1
          0x824 === $c ||
1944 1
          0x828 === $c ||
1945 1
          (0x830 <= $c && 0x83e >= $c) ||
1946 1
          (0x840 <= $c && 0x858 >= $c) ||
1947 1
          0x85e === $c
1948 1
      ) {
1949
        return 'RTL';
1950 1
      }
1951 1
1952
    } elseif (0x200f === $c) {
1953
1954 1
      return 'RTL';
1955
1956
    } elseif (0xfb1d <= $c) {
1957
1958 1
      if (0xfb1d === $c ||
1959
          (0xfb1f <= $c && 0xfb28 >= $c) ||
1960 1
          (0xfb2a <= $c && 0xfb36 >= $c) ||
1961 1
          (0xfb38 <= $c && 0xfb3c >= $c) ||
1962 1
          0xfb3e === $c ||
1963 1
          (0xfb40 <= $c && 0xfb41 >= $c) ||
1964 1
          (0xfb43 <= $c && 0xfb44 >= $c) ||
1965 1
          (0xfb46 <= $c && 0xfbc1 >= $c) ||
1966 1
          (0xfbd3 <= $c && 0xfd3d >= $c) ||
1967 1
          (0xfd50 <= $c && 0xfd8f >= $c) ||
1968 1
          (0xfd92 <= $c && 0xfdc7 >= $c) ||
1969 1
          (0xfdf0 <= $c && 0xfdfc >= $c) ||
1970 1
          (0xfe70 <= $c && 0xfe74 >= $c) ||
1971 1
          (0xfe76 <= $c && 0xfefc >= $c) ||
1972 1
          (0x10800 <= $c && 0x10805 >= $c) ||
1973 1
          0x10808 === $c ||
1974 1
          (0x1080a <= $c && 0x10835 >= $c) ||
1975 1
          (0x10837 <= $c && 0x10838 >= $c) ||
1976 1
          0x1083c === $c ||
1977 1
          (0x1083f <= $c && 0x10855 >= $c) ||
1978 1
          (0x10857 <= $c && 0x1085f >= $c) ||
1979 1
          (0x10900 <= $c && 0x1091b >= $c) ||
1980 1
          (0x10920 <= $c && 0x10939 >= $c) ||
1981 1
          0x1093f === $c ||
1982 1
          0x10a00 === $c ||
1983 1
          (0x10a10 <= $c && 0x10a13 >= $c) ||
1984 1
          (0x10a15 <= $c && 0x10a17 >= $c) ||
1985 1
          (0x10a19 <= $c && 0x10a33 >= $c) ||
1986 1
          (0x10a40 <= $c && 0x10a47 >= $c) ||
1987 1
          (0x10a50 <= $c && 0x10a58 >= $c) ||
1988 1
          (0x10a60 <= $c && 0x10a7f >= $c) ||
1989 1
          (0x10b00 <= $c && 0x10b35 >= $c) ||
1990 1
          (0x10b40 <= $c && 0x10b55 >= $c) ||
1991 1
          (0x10b58 <= $c && 0x10b72 >= $c) ||
1992 1
          (0x10b78 <= $c && 0x10b7f >= $c)
1993 1
      ) {
1994
        return 'RTL';
1995 1
      }
1996 1
    }
1997
1998
    return 'LTR';
1999
  }
2000 1
2001
  /**
2002
   * get data from "/data/*.ser"
2003
   *
2004
   * @param string $file
2005
   *
2006
   * @return bool|string|array|int <p>Will return false on error.</p>
2007
   */
2008
  private static function getData($file)
2009
  {
2010 4
    $file = __DIR__ . '/data/' . $file . '.php';
2011
    if (file_exists($file)) {
2012 4
      /** @noinspection PhpIncludeInspection */
2013 4
      return require $file;
2014
    }
2015 4
2016
    return false;
2017
  }
2018 1
2019
  /**
2020
   * Check for php-support.
2021
   *
2022
   * @param string|null $key
2023
   *
2024
   * @return mixed <p>Return the full support-"array", if $key === null<br>
2025
   *               return bool-value, if $key is used and available<br>
2026
   *               otherwise return null</p>
2027
   */
2028
  public static function getSupportInfo($key = null)
2029
  {
2030 7
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
2031
      self::checkForSupport();
2032 7
    }
2033
2034
    if ($key === null) {
2035
      return self::$SUPPORT;
2036 7
    }
2037 2
2038
    if (!isset(self::$SUPPORT[$key])) {
2039
      return null;
2040 5
    }
2041
2042
    return self::$SUPPORT[$key];
2043
  }
2044 5
2045
  /**
2046
   * alias for "UTF8::string_has_bom()"
2047
   *
2048
   * @see UTF8::string_has_bom()
2049
   *
2050
   * @param string $str
2051
   *
2052
   * @return bool
2053
   *
2054
   * @deprecated <p>use "UTF8::string_has_bom()"</p>
2055
   */
2056
  public static function hasBom($str)
2057
  {
2058
    return self::string_has_bom($str);
2059
  }
2060
2061
  /**
2062
   * Converts a hexadecimal-value into an UTF-8 character.
2063
   *
2064
   * @param string $hexdec <p>The hexadecimal value.</p>
2065
   *
2066
   * @return string|false <p>One single UTF-8 character.</p>
2067
   */
2068
  public static function hex_to_chr($hexdec)
2069
  {
2070 2
    return self::decimal_to_chr(hexdec($hexdec));
2071
  }
2072 2
2073
  /**
2074
   * Converts hexadecimal U+xxxx code point representation to integer.
2075
   *
2076
   * INFO: opposite to UTF8::int_to_hex()
2077
   *
2078
   * @param string $hexDec <p>The hexadecimal code point representation.</p>
2079
   *
2080
   * @return int|false <p>The code point, or false on failure.</p>
2081
   */
2082
  public static function hex_to_int($hexDec)
2083
  {
2084 1
    $hexDec = (string)$hexDec;
2085
2086 1
    if (!isset($hexDec[0])) {
2087
      return false;
2088 1
    }
2089 1
2090
    if (preg_match('/^(?:\\\u|U\+|)([a-z0-9]{4,6})$/i', $hexDec, $match)) {
2091
      return intval($match[1], 16);
2092 1
    }
2093 1
2094
    return false;
2095
  }
2096 1
2097
  /**
2098
   * alias for "UTF8::html_entity_decode()"
2099
   *
2100
   * @see UTF8::html_entity_decode()
2101
   *
2102
   * @param string $str
2103
   * @param int    $flags
2104
   * @param string $encoding
2105
   *
2106
   * @return string
2107
   */
2108
  public static function html_decode($str, $flags = null, $encoding = 'UTF-8')
2109
  {
2110 1
    return self::html_entity_decode($str, $flags, $encoding);
2111
  }
2112 1
2113
  /**
2114
   * Converts a UTF-8 string to a series of HTML numbered entities.
2115
   *
2116
   * INFO: opposite to UTF8::html_decode()
2117
   *
2118
   * @param string $str            <p>The Unicode string to be encoded as numbered entities.</p>
2119
   * @param bool   $keepAsciiChars [optional] <p>Keep ASCII chars.</p>
2120
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
2121
   *
2122
   * @return string <p>HTML numbered entities.</p>
2123
   */
2124
  public static function html_encode($str, $keepAsciiChars = false, $encoding = 'UTF-8')
2125
  {
2126 2
    // init
2127
    $str = (string)$str;
2128
2129 2
    if (!isset($str[0])) {
2130
      return '';
2131 2
    }
2132 1
2133
    if ($encoding !== 'UTF-8') {
2134
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2135 2
    }
2136 1
2137 1
    # INFO: http://stackoverflow.com/questions/35854535/better-explanation-of-convmap-in-mb-encode-numericentity
2138
    if (function_exists('mb_encode_numericentity')) {
2139
2140 2
      $startCode = 0x00;
2141
      if ($keepAsciiChars === true) {
2142 2
        $startCode = 0x80;
2143 2
      }
2144 1
2145 1
      return mb_encode_numericentity(
2146
          $str,
2147 2
          array($startCode, 0xfffff, 0, 0xfffff, 0),
2148 2
          $encoding
2149 2
      );
2150
    }
2151 2
2152
    return implode(
2153
        '',
2154
        array_map(
2155
            function ($data) use ($keepAsciiChars, $encoding) {
2156
              return self::single_chr_html_encode($data, $keepAsciiChars, $encoding);
2157
            },
2158
            self::split($str)
2159
        )
2160
    );
2161
  }
2162
2163
  /**
2164
   * UTF-8 version of html_entity_decode()
2165
   *
2166
   * The reason we are not using html_entity_decode() by itself is because
2167
   * while it is not technically correct to leave out the semicolon
2168
   * at the end of an entity most browsers will still interpret the entity
2169
   * correctly. html_entity_decode() does not convert entities without
2170
   * semicolons, so we are left with our own little solution here. Bummer.
2171
   *
2172
   * Convert all HTML entities to their applicable characters
2173
   *
2174
   * INFO: opposite to UTF8::html_encode()
2175
   *
2176
   * @link http://php.net/manual/en/function.html-entity-decode.php
2177
   *
2178
   * @param string $str      <p>
2179
   *                         The input string.
2180
   *                         </p>
2181
   * @param int    $flags    [optional] <p>
2182
   *                         A bitmask of one or more of the following flags, which specify how to handle quotes and
2183
   *                         which document type to use. The default is ENT_COMPAT | ENT_HTML401.
2184
   *                         <table>
2185
   *                         Available <i>flags</i> constants
2186
   *                         <tr valign="top">
2187
   *                         <td>Constant Name</td>
2188
   *                         <td>Description</td>
2189
   *                         </tr>
2190
   *                         <tr valign="top">
2191
   *                         <td><b>ENT_COMPAT</b></td>
2192
   *                         <td>Will convert double-quotes and leave single-quotes alone.</td>
2193
   *                         </tr>
2194
   *                         <tr valign="top">
2195
   *                         <td><b>ENT_QUOTES</b></td>
2196
   *                         <td>Will convert both double and single quotes.</td>
2197
   *                         </tr>
2198
   *                         <tr valign="top">
2199
   *                         <td><b>ENT_NOQUOTES</b></td>
2200
   *                         <td>Will leave both double and single quotes unconverted.</td>
2201
   *                         </tr>
2202
   *                         <tr valign="top">
2203
   *                         <td><b>ENT_HTML401</b></td>
2204
   *                         <td>
2205
   *                         Handle code as HTML 4.01.
2206
   *                         </td>
2207
   *                         </tr>
2208
   *                         <tr valign="top">
2209
   *                         <td><b>ENT_XML1</b></td>
2210
   *                         <td>
2211
   *                         Handle code as XML 1.
2212
   *                         </td>
2213
   *                         </tr>
2214
   *                         <tr valign="top">
2215
   *                         <td><b>ENT_XHTML</b></td>
2216
   *                         <td>
2217
   *                         Handle code as XHTML.
2218
   *                         </td>
2219
   *                         </tr>
2220
   *                         <tr valign="top">
2221
   *                         <td><b>ENT_HTML5</b></td>
2222
   *                         <td>
2223
   *                         Handle code as HTML 5.
2224
   *                         </td>
2225
   *                         </tr>
2226
   *                         </table>
2227
   *                         </p>
2228
   * @param string $encoding [optional] <p>Encoding to use.</p>
2229
   *
2230
   * @return string <p>The decoded string.</p>
2231
   */
2232
  public static function html_entity_decode($str, $flags = null, $encoding = 'UTF-8')
2233
  {
2234 16
    // init
2235
    $str = (string)$str;
2236
2237 16
    if (!isset($str[0])) {
2238
      return '';
2239 16
    }
2240 5
2241
    if (!isset($str[3])) { // examples: &; || &x;
2242
      return $str;
2243 16
    }
2244 9
2245
    if (
2246
        strpos($str, '&') === false
2247
        ||
2248 15
        (
2249 15
            strpos($str, '&#') === false
2250
            &&
2251 15
            strpos($str, ';') === false
2252 15
        )
2253 9
    ) {
2254 9
      return $str;
2255 15
    }
2256 8
2257
    if ($encoding !== 'UTF-8') {
2258
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2259 15
    }
2260 2
2261 2
    if ($flags === null) {
2262
      if (Bootup::is_php('5.4') === true) {
2263 15
        $flags = ENT_QUOTES | ENT_HTML5;
2264 5
      } else {
2265
        $flags = ENT_QUOTES;
2266
      }
2267 5
    }
2268
2269 5 View Code Duplication
    if (
2270
        $encoding !== 'UTF-8'
2271
        &&
2272 15
        $encoding !== 'WINDOWS-1252'
2273
        &&
2274 15
        self::$SUPPORT['mbstring'] === false
2275 15
    ) {
2276
      trigger_error('UTF8::html_entity_decode() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
2277 13
    }
2278
2279 13
    do {
2280 13
      $str_compare = $str;
2281
2282
      $str = preg_replace_callback(
2283 6
          "/&#\d{2,6};/",
2284 15
          function ($matches) use ($encoding) {
2285
            $returnTmp = \mb_convert_encoding($matches[0], $encoding, 'HTML-ENTITIES');
2286 15
2287
            if ($returnTmp !== '"' && $returnTmp !== "'") {
2288
              return $returnTmp;
2289 15
            }
2290 15
2291 15
            return $matches[0];
2292
          },
2293 15
          $str
2294
      );
2295 15
2296
      // decode numeric & UTF16 two byte entities
2297 15
      $str = html_entity_decode(
2298
          preg_replace('/(&#(?:x0*[0-9a-f]{2,6}(?![0-9a-f;])|(?:0*\d{2,6}(?![0-9;]))))/iS', '$1;', $str),
2299
          $flags,
2300
          $encoding
2301
      );
2302
2303
    } while ($str_compare !== $str);
2304
2305
    return $str;
2306
  }
2307
2308
  /**
2309
   * Convert all applicable characters to HTML entities: UTF-8 version of htmlentities()
2310
   *
2311
   * @link http://php.net/manual/en/function.htmlentities.php
2312
   *
2313
   * @param string $str           <p>
2314
   *                              The input string.
2315
   *                              </p>
2316
   * @param int    $flags         [optional] <p>
2317
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2318
   *                              invalid code unit sequences and the used document type. The default is
2319
   *                              ENT_COMPAT | ENT_HTML401.
2320
   *                              <table>
2321
   *                              Available <i>flags</i> constants
2322
   *                              <tr valign="top">
2323
   *                              <td>Constant Name</td>
2324
   *                              <td>Description</td>
2325
   *                              </tr>
2326
   *                              <tr valign="top">
2327
   *                              <td><b>ENT_COMPAT</b></td>
2328
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2329
   *                              </tr>
2330
   *                              <tr valign="top">
2331
   *                              <td><b>ENT_QUOTES</b></td>
2332
   *                              <td>Will convert both double and single quotes.</td>
2333
   *                              </tr>
2334
   *                              <tr valign="top">
2335
   *                              <td><b>ENT_NOQUOTES</b></td>
2336
   *                              <td>Will leave both double and single quotes unconverted.</td>
2337
   *                              </tr>
2338
   *                              <tr valign="top">
2339
   *                              <td><b>ENT_IGNORE</b></td>
2340
   *                              <td>
2341
   *                              Silently discard invalid code unit sequences instead of returning
2342
   *                              an empty string. Using this flag is discouraged as it
2343
   *                              may have security implications.
2344
   *                              </td>
2345
   *                              </tr>
2346
   *                              <tr valign="top">
2347
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2348
   *                              <td>
2349
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2350
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2351
   *                              </td>
2352
   *                              </tr>
2353
   *                              <tr valign="top">
2354
   *                              <td><b>ENT_DISALLOWED</b></td>
2355
   *                              <td>
2356
   *                              Replace invalid code points for the given document type with a
2357
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2358
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2359
   *                              instance, to ensure the well-formedness of XML documents with
2360
   *                              embedded external content.
2361
   *                              </td>
2362
   *                              </tr>
2363
   *                              <tr valign="top">
2364
   *                              <td><b>ENT_HTML401</b></td>
2365
   *                              <td>
2366
   *                              Handle code as HTML 4.01.
2367
   *                              </td>
2368
   *                              </tr>
2369
   *                              <tr valign="top">
2370
   *                              <td><b>ENT_XML1</b></td>
2371
   *                              <td>
2372
   *                              Handle code as XML 1.
2373
   *                              </td>
2374
   *                              </tr>
2375
   *                              <tr valign="top">
2376
   *                              <td><b>ENT_XHTML</b></td>
2377
   *                              <td>
2378
   *                              Handle code as XHTML.
2379
   *                              </td>
2380
   *                              </tr>
2381
   *                              <tr valign="top">
2382
   *                              <td><b>ENT_HTML5</b></td>
2383
   *                              <td>
2384
   *                              Handle code as HTML 5.
2385
   *                              </td>
2386
   *                              </tr>
2387
   *                              </table>
2388
   *                              </p>
2389
   * @param string $encoding      [optional] <p>
2390
   *                              Like <b>htmlspecialchars</b>,
2391
   *                              <b>htmlentities</b> takes an optional third argument
2392
   *                              <i>encoding</i> which defines encoding used in
2393
   *                              conversion.
2394
   *                              Although this argument is technically optional, you are highly
2395
   *                              encouraged to specify the correct value for your code.
2396
   *                              </p>
2397
   * @param bool   $double_encode [optional] <p>
2398
   *                              When <i>double_encode</i> is turned off PHP will not
2399
   *                              encode existing html entities. The default is to convert everything.
2400
   *                              </p>
2401
   *
2402
   *
2403 2
   * @return string the encoded string.
2404
   * </p>
2405 2
   * <p>
2406 1
   * If the input <i>string</i> contains an invalid code unit
2407 1
   * sequence within the given <i>encoding</i> an empty string
2408
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2409 2
   * <b>ENT_SUBSTITUTE</b> flags are set.
2410
   */
2411
  public static function htmlentities($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2412
  {
2413
    if ($encoding !== 'UTF-8') {
2414
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2415
    }
2416
2417
    $str = htmlentities($str, $flags, $encoding, $double_encode);
2418
2419 2
    /**
2420
     * PHP doesn't replace a backslash to its html entity since this is something
2421 2
     * that's mostly used to escape characters when inserting in a database. Since
2422 1
     * we're using a decent database layer, we don't need this shit and we're replacing
2423
     * the double backslashes by its' html entity equivalent.
2424
     *
2425 2
     * https://github.com/forkcms/library/blob/master/spoon/filter/filter.php#L303
2426 2
     */
2427 2
    $str = str_replace('\\', '&#92;', $str);
2428 2
2429 2
    if ($encoding !== 'UTF-8') {
2430 1
      return $str;
2431
    }
2432 1
2433 1
    $byteLengths = self::chr_size_list($str);
2434 1
    $search = array();
2435 1
    $replacements = array();
2436 1
    foreach ($byteLengths as $counter => $byteLength) {
2437 2
      if ($byteLength >= 3) {
2438
        $char = self::access($str, $counter);
2439 2
2440
        if (!isset($replacements[$char])) {
2441
          $search[$char] = $char;
2442
          $replacements[$char] = self::html_encode($char);
2443
        }
2444
      }
2445
    }
2446
2447
    return str_replace($search, $replacements, $str);
2448
  }
2449
2450
  /**
2451
   * Convert only special characters to HTML entities: UTF-8 version of htmlspecialchars()
2452
   *
2453
   * INFO: Take a look at "UTF8::htmlentities()"
2454
   *
2455
   * @link http://php.net/manual/en/function.htmlspecialchars.php
2456
   *
2457
   * @param string $str           <p>
2458
   *                              The string being converted.
2459
   *                              </p>
2460
   * @param int    $flags         [optional] <p>
2461
   *                              A bitmask of one or more of the following flags, which specify how to handle quotes,
2462
   *                              invalid code unit sequences and the used document type. The default is
2463
   *                              ENT_COMPAT | ENT_HTML401.
2464
   *                              <table>
2465
   *                              Available <i>flags</i> constants
2466
   *                              <tr valign="top">
2467
   *                              <td>Constant Name</td>
2468
   *                              <td>Description</td>
2469
   *                              </tr>
2470
   *                              <tr valign="top">
2471
   *                              <td><b>ENT_COMPAT</b></td>
2472
   *                              <td>Will convert double-quotes and leave single-quotes alone.</td>
2473
   *                              </tr>
2474
   *                              <tr valign="top">
2475
   *                              <td><b>ENT_QUOTES</b></td>
2476
   *                              <td>Will convert both double and single quotes.</td>
2477
   *                              </tr>
2478
   *                              <tr valign="top">
2479
   *                              <td><b>ENT_NOQUOTES</b></td>
2480
   *                              <td>Will leave both double and single quotes unconverted.</td>
2481
   *                              </tr>
2482
   *                              <tr valign="top">
2483
   *                              <td><b>ENT_IGNORE</b></td>
2484
   *                              <td>
2485
   *                              Silently discard invalid code unit sequences instead of returning
2486
   *                              an empty string. Using this flag is discouraged as it
2487
   *                              may have security implications.
2488
   *                              </td>
2489
   *                              </tr>
2490
   *                              <tr valign="top">
2491
   *                              <td><b>ENT_SUBSTITUTE</b></td>
2492
   *                              <td>
2493
   *                              Replace invalid code unit sequences with a Unicode Replacement Character
2494
   *                              U+FFFD (UTF-8) or &#38;#38;#FFFD; (otherwise) instead of returning an empty string.
2495
   *                              </td>
2496
   *                              </tr>
2497
   *                              <tr valign="top">
2498
   *                              <td><b>ENT_DISALLOWED</b></td>
2499
   *                              <td>
2500
   *                              Replace invalid code points for the given document type with a
2501
   *                              Unicode Replacement Character U+FFFD (UTF-8) or &#38;#38;#FFFD;
2502
   *                              (otherwise) instead of leaving them as is. This may be useful, for
2503
   *                              instance, to ensure the well-formedness of XML documents with
2504
   *                              embedded external content.
2505
   *                              </td>
2506
   *                              </tr>
2507
   *                              <tr valign="top">
2508
   *                              <td><b>ENT_HTML401</b></td>
2509
   *                              <td>
2510
   *                              Handle code as HTML 4.01.
2511
   *                              </td>
2512
   *                              </tr>
2513
   *                              <tr valign="top">
2514
   *                              <td><b>ENT_XML1</b></td>
2515
   *                              <td>
2516
   *                              Handle code as XML 1.
2517
   *                              </td>
2518
   *                              </tr>
2519
   *                              <tr valign="top">
2520
   *                              <td><b>ENT_XHTML</b></td>
2521
   *                              <td>
2522
   *                              Handle code as XHTML.
2523
   *                              </td>
2524
   *                              </tr>
2525
   *                              <tr valign="top">
2526
   *                              <td><b>ENT_HTML5</b></td>
2527
   *                              <td>
2528
   *                              Handle code as HTML 5.
2529
   *                              </td>
2530
   *                              </tr>
2531
   *                              </table>
2532
   *                              </p>
2533
   * @param string $encoding      [optional] <p>
2534
   *                              Defines encoding used in conversion.
2535
   *                              </p>
2536
   *                              <p>
2537
   *                              For the purposes of this function, the encodings
2538
   *                              ISO-8859-1, ISO-8859-15,
2539
   *                              UTF-8, cp866,
2540
   *                              cp1251, cp1252, and
2541
   *                              KOI8-R are effectively equivalent, provided the
2542
   *                              <i>string</i> itself is valid for the encoding, as
2543
   *                              the characters affected by <b>htmlspecialchars</b> occupy
2544
   *                              the same positions in all of these encodings.
2545
   *                              </p>
2546
   * @param bool   $double_encode [optional] <p>
2547
   *                              When <i>double_encode</i> is turned off PHP will not
2548
   *                              encode existing html entities, the default is to convert everything.
2549
   *                              </p>
2550
   *
2551 1
   * @return string The converted string.
2552
   * </p>
2553 1
   * <p>
2554 1
   * If the input <i>string</i> contains an invalid code unit
2555 1
   * sequence within the given <i>encoding</i> an empty string
2556
   * will be returned, unless either the <b>ENT_IGNORE</b> or
2557 1
   * <b>ENT_SUBSTITUTE</b> flags are set.
2558
   */
2559
  public static function htmlspecialchars($str, $flags = ENT_COMPAT, $encoding = 'UTF-8', $double_encode = true)
2560
  {
2561
    if ($encoding !== 'UTF-8') {
2562
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
2563
    }
2564
2565 1
    return htmlspecialchars($str, $flags, $encoding, $double_encode);
2566
  }
2567 1
2568
  /**
2569
   * Checks whether iconv is available on the server.
2570
   *
2571 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2572
   */
2573 1
  public static function iconv_loaded()
2574 1
  {
2575 1
    $return = extension_loaded('iconv') ? true : false;
2576 1
2577
    // INFO: "default_charset" is already set by the "Bootup"-class
2578 1
2579
    if (Bootup::is_php('5.6') === false) {
2580
      // INFO: "iconv_set_encoding" is deprecated since PHP >= 5.6
2581
      iconv_set_encoding('input_encoding', 'UTF-8');
2582
      iconv_set_encoding('output_encoding', 'UTF-8');
2583
      iconv_set_encoding('internal_encoding', 'UTF-8');
2584
    }
2585
2586
    return $return;
2587
  }
2588
2589
  /**
2590 2
   * alias for "UTF8::decimal_to_chr()"
2591
   *
2592 2
   * @see UTF8::decimal_to_chr()
2593
   *
2594
   * @param mixed $int
2595
   *
2596
   * @return string
2597
   */
2598
  public static function int_to_chr($int)
2599
  {
2600
    return self::decimal_to_chr($int);
2601
  }
2602
2603
  /**
2604
   * Converts Integer to hexadecimal U+xxxx code point representation.
2605 3
   *
2606
   * INFO: opposite to UTF8::hex_to_int()
2607 3
   *
2608 3
   * @param int    $int  <p>The integer to be converted to hexadecimal code point.</p>
2609
   * @param string $pfix [optional]
2610 3
   *
2611
   * @return string <p>The code point, or empty string on failure.</p>
2612 3
   */
2613
  public static function int_to_hex($int, $pfix = 'U+')
2614
  {
2615 1
    if ((int)$int === $int) {
2616
      $hex = dechex($int);
2617
2618
      $hex = (strlen($hex) < 4 ? substr('0000' . $hex, -4) : $hex);
2619
2620
      return $pfix . $hex;
2621
    }
2622
2623 1
    return '';
2624
  }
2625
2626 1
  /**
2627 1
   * Checks whether intl-char is available on the server.
2628
   *
2629 1
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2630
   */
2631
  public static function intlChar_loaded()
2632
  {
2633
    return (
2634
        Bootup::is_php('7.0') === true
2635
        &&
2636
        class_exists('IntlChar') === true
2637 4
    );
2638
  }
2639 4
2640
  /**
2641
   * Checks whether intl is available on the server.
2642
   *
2643
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
2644
   */
2645
  public static function intl_loaded()
2646
  {
2647
    return extension_loaded('intl') ? true : false;
2648
  }
2649
2650
  /**
2651
   * alias for "UTF8::is_ascii()"
2652
   *
2653
   * @see UTF8::is_ascii()
2654
   *
2655
   * @param string $str
2656
   *
2657
   * @return boolean
2658
   *
2659
   * @deprecated <p>use "UTF8::is_ascii()"</p>
2660
   */
2661
  public static function isAscii($str)
2662
  {
2663
    return self::is_ascii($str);
2664
  }
2665
2666
  /**
2667
   * alias for "UTF8::is_base64()"
2668
   *
2669
   * @see UTF8::is_base64()
2670
   *
2671
   * @param string $str
2672
   *
2673
   * @return bool
2674
   *
2675
   * @deprecated <p>use "UTF8::is_base64()"</p>
2676
   */
2677
  public static function isBase64($str)
2678
  {
2679
    return self::is_base64($str);
2680
  }
2681
2682
  /**
2683
   * alias for "UTF8::is_binary()"
2684
   *
2685
   * @see UTF8::is_binary()
2686
   *
2687
   * @param string $str
2688
   *
2689
   * @return bool
2690
   *
2691
   * @deprecated <p>use "UTF8::is_binary()"</p>
2692
   */
2693
  public static function isBinary($str)
2694
  {
2695
    return self::is_binary($str);
2696
  }
2697
2698
  /**
2699
   * alias for "UTF8::is_bom()"
2700
   *
2701
   * @see UTF8::is_bom()
2702
   *
2703
   * @param string $utf8_chr
2704
   *
2705
   * @return boolean
2706
   *
2707
   * @deprecated <p>use "UTF8::is_bom()"</p>
2708
   */
2709
  public static function isBom($utf8_chr)
2710
  {
2711
    return self::is_bom($utf8_chr);
2712
  }
2713
2714
  /**
2715
   * alias for "UTF8::is_html()"
2716
   *
2717
   * @see UTF8::is_html()
2718
   *
2719
   * @param string $str
2720
   *
2721
   * @return boolean
2722
   *
2723
   * @deprecated <p>use "UTF8::is_html()"</p>
2724
   */
2725
  public static function isHtml($str)
2726
  {
2727
    return self::is_html($str);
2728
  }
2729
2730
  /**
2731
   * alias for "UTF8::is_json()"
2732
   *
2733
   * @see UTF8::is_json()
2734
   *
2735
   * @param string $str
2736
   *
2737
   * @return bool
2738
   *
2739
   * @deprecated <p>use "UTF8::is_json()"</p>
2740
   */
2741
  public static function isJson($str)
2742
  {
2743
    return self::is_json($str);
2744
  }
2745
2746
  /**
2747
   * alias for "UTF8::is_utf16()"
2748
   *
2749
   * @see UTF8::is_utf16()
2750
   *
2751
   * @param string $str
2752
   *
2753
   * @return int|false false if is't not UTF16, 1 for UTF-16LE, 2 for UTF-16BE.
2754
   *
2755
   * @deprecated <p>use "UTF8::is_utf16()"</p>
2756
   */
2757
  public static function isUtf16($str)
2758
  {
2759
    return self::is_utf16($str);
2760
  }
2761
2762
  /**
2763
   * alias for "UTF8::is_utf32()"
2764
   *
2765
   * @see UTF8::is_utf32()
2766
   *
2767
   * @param string $str
2768
   *
2769
   * @return int|false false if is't not UTF16, 1 for UTF-32LE, 2 for UTF-32BE.
2770
   *
2771
   * @deprecated <p>use "UTF8::is_utf32()"</p>
2772
   */
2773
  public static function isUtf32($str)
2774
  {
2775
    return self::is_utf32($str);
2776
  }
2777
2778
  /**
2779
   * alias for "UTF8::is_utf8()"
2780
   *
2781
   * @see UTF8::is_utf8()
2782
   *
2783
   * @param string $str
2784
   * @param bool   $strict
2785
   *
2786
   * @return bool
2787
   *
2788
   * @deprecated <p>use "UTF8::is_utf8()"</p>
2789
   */
2790
  public static function isUtf8($str, $strict = false)
2791
  {
2792
    return self::is_utf8($str, $strict);
2793
  }
2794
2795
  /**
2796
   * Checks if a string is 7 bit ASCII.
2797 53
   *
2798
   * @param string $str <p>The string to check.</p>
2799 53
   *
2800
   * @return bool <p>
2801 53
   *              <strong>true</strong> if it is ASCII<br>
2802 6
   *              <strong>false</strong> otherwise
2803
   *              </p>
2804
   */
2805 52
  public static function is_ascii($str)
2806
  {
2807
    $str = (string)$str;
2808
2809
    if (!isset($str[0])) {
2810
      return true;
2811
    }
2812
2813
    return (bool)!preg_match('/[^\x09\x10\x13\x0A\x0D\x20-\x7E]/', $str);
2814
  }
2815 1
2816
  /**
2817 1
   * Returns true if the string is base64 encoded, false otherwise.
2818
   *
2819 1
   * @param string $str <p>The input string.</p>
2820 1
   *
2821
   * @return bool <p>Whether or not $str is base64 encoded.</p>
2822
   */
2823 1
  public static function is_base64($str)
2824 1
  {
2825 1
    $str = (string)$str;
2826
2827
    if (!isset($str[0])) {
2828 1
      return false;
2829
    }
2830
2831
    $base64String = (string)base64_decode($str, true);
2832
    if ($base64String && base64_encode($base64String) === $str) {
2833
      return true;
2834
    }
2835
2836
    return false;
2837
  }
2838 16
2839
  /**
2840 16
   * Check if the input is binary... (is look like a hack).
2841
   *
2842 16
   * @param mixed $input
2843 4
   *
2844
   * @return bool
2845
   */
2846 16
  public static function is_binary($input)
2847 4
  {
2848
    $input = (string)$input;
2849
2850 16
    if (!isset($input[0])) {
2851 16
      return false;
2852 5
    }
2853
2854
    if (preg_match('~^[01]+$~', $input)) {
2855 15
      return true;
2856 1
    }
2857
2858
    $testLength = strlen($input);
2859 15
    if ($testLength && substr_count($input, "\x0") / $testLength > 0.3) {
2860
      return true;
2861
    }
2862
2863
    if (substr_count($input, "\x00") > 0) {
2864
      return true;
2865
    }
2866
2867
    return false;
2868
  }
2869
2870
  /**
2871
   * Check if the file is binary.
2872
   *
2873
   * @param string $file
2874
   *
2875
   * @return boolean
2876
   */
2877
  public static function is_binary_file($file)
2878
  {
2879
    try {
2880
      $fp = fopen($file, 'rb');
2881
      $block = fread($fp, 512);
2882
      fclose($fp);
2883
    } catch (\Exception $e) {
2884
      $block = '';
2885
    }
2886
2887
    return self::is_binary($block);
2888
  }
2889
2890
  /**
2891 1
   * Checks if the given string is equal to any "Byte Order Mark".
2892
   *
2893 1
   * WARNING: Use "UTF8::string_has_bom()" if you will check BOM in a string.
2894 1
   *
2895 1
   * @param string $str <p>The input string.</p>
2896
   *
2897 1
   * @return bool <p><strong>true</strong> if the $utf8_chr is Byte Order Mark, <strong>false</strong> otherwise.</p>
2898
   */
2899 1
  public static function is_bom($str)
2900
  {
2901
    foreach (self::$BOM as $bomString => $bomByteLength) {
2902
      if ($str === $bomString) {
2903
        return true;
2904
      }
2905
    }
2906
2907
    return false;
2908
  }
2909 1
2910
  /**
2911 1
   * Check if the string contains any html-tags <lall>.
2912
   *
2913 1
   * @param string $str <p>The input string.</p>
2914 1
   *
2915
   * @return boolean
2916
   */
2917
  public static function is_html($str)
2918 1
  {
2919
    $str = (string)$str;
2920 1
2921
    if (!isset($str[0])) {
2922 1
      return false;
2923 1
    }
2924
2925
    // init
2926 1
    $matches = array();
2927
2928
    preg_match("/<\/?\w+(?:(?:\s+\w+(?:\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)*+\s*|\s*)\/?>/", $str, $matches);
2929
2930
    if (count($matches) === 0) {
2931
      return false;
2932
    }
2933
2934
    return true;
2935
  }
2936 1
2937
  /**
2938 1
   * Try to check if "$str" is an json-string.
2939
   *
2940 1
   * @param string $str <p>The input string.</p>
2941
   *
2942
   * @return bool
2943
   */
2944 1
  public static function is_json($str)
2945
  {
2946
    $str = (string)$str;
2947
2948 1
    if (!isset($str[0])) {
2949 1
      return false;
2950 1
    }
2951 1
2952 1
    $json = self::json_decode($str);
2953 1
2954 1
    if (
2955 1
        (
2956
            is_object($json) === true
2957
            ||
2958 1
            is_array($json) === true
2959
        )
2960
        &&
2961
        json_last_error() === JSON_ERROR_NONE
2962
    ) {
2963
      return true;
2964
    }
2965
2966
    return false;
2967
  }
2968
2969
  /**
2970
   * Check if the string is UTF-16.
2971
   *
2972 5
   * @param string $str <p>The input string.</p>
2973
   *
2974 5
   * @return int|false <p>
2975
   *                   <strong>false</strong> if is't not UTF-16,<br>
2976 5
   *                   <strong>1</strong> for UTF-16LE,<br>
2977
   *                   <strong>2</strong> for UTF-16BE.
2978 5
   *                   </p>
2979 5
   */
2980 5 View Code Duplication
  public static function is_utf16($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
2981 5
  {
2982 5
    $str = self::remove_bom($str);
2983 5
2984 5
    if (self::is_binary($str) === true) {
2985 5
2986 4
      $maybeUTF16LE = 0;
2987 2
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16LE');
2988 2
      if ($test) {
2989 5
        $test2 = \mb_convert_encoding($test, 'UTF-16LE', 'UTF-8');
2990 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16LE');
2991 5
        if ($test3 === $test) {
2992
          $strChars = self::count_chars($str, true);
2993 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
2994 5
            if (in_array($test3char, $strChars, true) === true) {
2995 5
              $maybeUTF16LE++;
2996 5
            }
2997 5
          }
2998 5
        }
2999 5
      }
3000 5
3001 4
      $maybeUTF16BE = 0;
3002 3
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-16BE');
3003 3
      if ($test) {
3004 5
        $test2 = \mb_convert_encoding($test, 'UTF-16BE', 'UTF-8');
3005 5
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-16BE');
3006 5
        if ($test3 === $test) {
3007
          $strChars = self::count_chars($str, true);
3008 5
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3009 3
            if (in_array($test3char, $strChars, true) === true) {
3010 2
              $maybeUTF16BE++;
3011
            }
3012
          }
3013 3
        }
3014
      }
3015
3016 3
      if ($maybeUTF16BE !== $maybeUTF16LE) {
3017
        if ($maybeUTF16LE > $maybeUTF16BE) {
3018 3
          return 1;
3019
        }
3020
3021
        return 2;
3022
      }
3023
3024
    }
3025
3026
    return false;
3027
  }
3028
3029
  /**
3030
   * Check if the string is UTF-32.
3031
   *
3032 3
   * @param string $str
3033
   *
3034 3
   * @return int|false <p>
3035
   *                   <strong>false</strong> if is't not UTF-32,<br>
3036 3
   *                   <strong>1</strong> for UTF-32LE,<br>
3037
   *                   <strong>2</strong> for UTF-32BE.
3038 3
   *                   </p>
3039 3
   */
3040 3 View Code Duplication
  public static function is_utf32($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3041 2
  {
3042 2
    $str = self::remove_bom($str);
3043 2
3044 2
    if (self::is_binary($str) === true) {
3045 2
3046 2
      $maybeUTF32LE = 0;
3047 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32LE');
3048 1
      if ($test) {
3049 2
        $test2 = \mb_convert_encoding($test, 'UTF-32LE', 'UTF-8');
3050 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32LE');
3051 2
        if ($test3 === $test) {
3052
          $strChars = self::count_chars($str, true);
3053 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3054 3
            if (in_array($test3char, $strChars, true) === true) {
3055 3
              $maybeUTF32LE++;
3056 2
            }
3057 2
          }
3058 2
        }
3059 2
      }
3060 2
3061 2
      $maybeUTF32BE = 0;
3062 1
      $test = \mb_convert_encoding($str, 'UTF-8', 'UTF-32BE');
3063 1
      if ($test) {
3064 2
        $test2 = \mb_convert_encoding($test, 'UTF-32BE', 'UTF-8');
3065 2
        $test3 = \mb_convert_encoding($test2, 'UTF-8', 'UTF-32BE');
3066 2
        if ($test3 === $test) {
3067
          $strChars = self::count_chars($str, true);
3068 3
          foreach (self::count_chars($test3, true) as $test3char => $test3charEmpty) {
3069 1
            if (in_array($test3char, $strChars, true) === true) {
3070 1
              $maybeUTF32BE++;
3071
            }
3072
          }
3073 1
        }
3074
      }
3075
3076 3
      if ($maybeUTF32BE !== $maybeUTF32LE) {
3077
        if ($maybeUTF32LE > $maybeUTF32BE) {
3078 3
          return 1;
3079
        }
3080
3081
        return 2;
3082
      }
3083
3084
    }
3085
3086
    return false;
3087
  }
3088
3089
  /**
3090
   * Checks whether the passed string contains only byte sequences that appear valid UTF-8 characters.
3091 60
   *
3092
   * @see    http://hsivonen.iki.fi/php-utf8/
3093 60
   *
3094
   * @param string $str    <p>The string to be checked.</p>
3095 60
   * @param bool   $strict <p>Check also if the string is not UTF-16 or UTF-32.</p>
3096 3
   *
3097
   * @return bool
3098
   */
3099 58
  public static function is_utf8($str, $strict = false)
3100 1
  {
3101 1
    $str = (string)$str;
3102
3103
    if (!isset($str[0])) {
3104
      return true;
3105
    }
3106
3107
    if ($strict === true) {
3108
      if (self::is_utf16($str) !== false) {
3109 58
        return false;
3110
      }
3111
3112
      if (self::is_utf32($str) !== false) {
3113
        return false;
3114
      }
3115
    }
3116
3117
    if (self::pcre_utf8_support() !== true) {
3118 58
3119
      // If even just the first character can be matched, when the /u
3120 58
      // modifier is used, then it's valid UTF-8. If the UTF-8 is somehow
3121 58
      // invalid, nothing at all will match, even if the string contains
3122
      // some valid sequences
3123 58
      return (preg_match('/^.{1}/us', $str, $ar) === 1);
3124
    }
3125
3126
    $mState = 0; // cached expected number of octets after the current octet
3127 58
    // until the beginning of the next UTF8 character sequence
3128
    $mUcs4 = 0; // cached Unicode character
3129
    $mBytes = 1; // cached expected number of octets in the current sequence
3130 58
3131
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3132
      self::checkForSupport();
3133
    }
3134 58
3135 58 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
3136 58
      $len = \mb_strlen($str, '8BIT');
3137
    } else {
3138
      $len = strlen($str);
3139 58
    }
3140
3141 52
    /** @noinspection ForeachInvariantsInspection */
3142 58
    for ($i = 0; $i < $len; $i++) {
3143
      $in = ord($str[$i]);
3144 48
      if ($mState === 0) {
3145 48
        // When mState is zero we expect either a US-ASCII character or a
3146 48
        // multi-octet sequence.
3147 48
        if (0 === (0x80 & $in)) {
3148 55
          // US-ASCII, pass straight through.
3149
          $mBytes = 1;
3150 29 View Code Duplication
        } elseif (0xC0 === (0xE0 & $in)) {
3151 29
          // First octet of 2 octet sequence.
3152 29
          $mUcs4 = $in;
3153 29
          $mUcs4 = ($mUcs4 & 0x1F) << 6;
3154 46
          $mState = 1;
3155
          $mBytes = 2;
3156 11
        } elseif (0xE0 === (0xF0 & $in)) {
3157 11
          // First octet of 3 octet sequence.
3158 11
          $mUcs4 = $in;
3159 11
          $mUcs4 = ($mUcs4 & 0x0F) << 12;
3160 22
          $mState = 2;
3161
          $mBytes = 3;
3162 View Code Duplication
        } elseif (0xF0 === (0xF8 & $in)) {
3163
          // First octet of 4 octet sequence.
3164
          $mUcs4 = $in;
3165
          $mUcs4 = ($mUcs4 & 0x07) << 18;
3166
          $mState = 3;
3167
          $mBytes = 4;
3168
        } elseif (0xF8 === (0xFC & $in)) {
3169 4
          /* First octet of 5 octet sequence.
3170 4
          *
3171 4
          * This is illegal because the encoded codepoint must be either
3172 4
          * (a) not the shortest form or
3173 12
          * (b) outside the Unicode range of 0-0x10FFFF.
3174
          * Rather than trying to resynchronize, we will carry on until the end
3175 4
          * of the sequence and let the later error handling code catch it.
3176 4
          */
3177 4
          $mUcs4 = $in;
3178 4
          $mUcs4 = ($mUcs4 & 0x03) << 24;
3179 4
          $mState = 4;
3180
          $mBytes = 5;
3181 View Code Duplication
        } elseif (0xFC === (0xFE & $in)) {
3182
          // First octet of 6 octet sequence, see comments for 5 octet sequence.
3183 6
          $mUcs4 = $in;
3184
          $mUcs4 = ($mUcs4 & 1) << 30;
3185 57
          $mState = 5;
3186
          $mBytes = 6;
3187
        } else {
3188 52
          /* Current octet is neither in the US-ASCII range nor a legal first
3189
           * octet of a multi-octet sequence.
3190 48
           */
3191 48
          return false;
3192 48
        }
3193 48
      } else {
3194
        // When mState is non-zero, we expect a continuation of the multi-octet
3195
        // sequence
3196
        if (0x80 === (0xC0 & $in)) {
3197
          // Legal continuation.
3198 48
          $shift = ($mState - 1) * 6;
3199
          $tmp = $in;
3200
          $tmp = ($tmp & 0x0000003F) << $shift;
3201
          $mUcs4 |= $tmp;
3202
          /**
3203
           * End of the multi-octet sequence. mUcs4 now contains the final
3204 48
           * Unicode code point to be output
3205 48
           */
3206 48
          if (0 === --$mState) {
3207 48
            /*
3208
            * Check for illegal sequences and code points.
3209 48
            */
3210
            // From Unicode 3.1, non-shortest form is illegal
3211 48
            if (
3212 48
                (2 === $mBytes && $mUcs4 < 0x0080) ||
3213 7
                (3 === $mBytes && $mUcs4 < 0x0800) ||
3214
                (4 === $mBytes && $mUcs4 < 0x10000) ||
3215
                (4 < $mBytes) ||
3216 48
                // From Unicode 3.2, surrogate characters are illegal.
3217 48
                (($mUcs4 & 0xFFFFF800) === 0xD800) ||
3218 48
                // Code points outside the Unicode range are illegal.
3219 48
                ($mUcs4 > 0x10FFFF)
3220 48
            ) {
3221
              return false;
3222
            }
3223
            // initialize UTF8 cache
3224
            $mState = 0;
3225 26
            $mUcs4 = 0;
3226
            $mBytes = 1;
3227
          }
3228 57
        } else {
3229
          /**
3230 27
           *((0xC0 & (*in) != 0x80) && (mState != 0))
3231
           * Incomplete multi-octet sequence.
3232
           */
3233
          return false;
3234
        }
3235
      }
3236
    }
3237
3238
    return true;
3239
  }
3240
3241
  /**
3242
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3243
   * Decodes a JSON string
3244
   *
3245
   * @link http://php.net/manual/en/function.json-decode.php
3246
   *
3247
   * @param string $json    <p>
3248
   *                        The <i>json</i> string being decoded.
3249
   *                        </p>
3250
   *                        <p>
3251
   *                        This function only works with UTF-8 encoded strings.
3252
   *                        </p>
3253
   *                        <p>PHP implements a superset of
3254
   *                        JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3255
   *                        only supports these values when they are nested inside an array or an object.
3256
   *                        </p>
3257
   * @param bool   $assoc   [optional] <p>
3258
   *                        When <b>TRUE</b>, returned objects will be converted into
3259
   *                        associative arrays.
3260
   *                        </p>
3261
   * @param int    $depth   [optional] <p>
3262
   *                        User specified recursion depth.
3263
   *                        </p>
3264
   * @param int    $options [optional] <p>
3265
   *                        Bitmask of JSON decode options. Currently only
3266
   *                        <b>JSON_BIGINT_AS_STRING</b>
3267
   *                        is supported (default is to cast large integers as floats)
3268
   *                        </p>
3269 2
   *
3270
   * @return mixed the value encoded in <i>json</i> in appropriate
3271 2
   * PHP type. Values true, false and
3272
   * null (case-insensitive) are returned as <b>TRUE</b>, <b>FALSE</b>
3273 2
   * and <b>NULL</b> respectively. <b>NULL</b> is returned if the
3274
   * <i>json</i> cannot be decoded or if the encoded
3275
   * data is deeper than the recursion limit.
3276 2
   */
3277 View Code Duplication
  public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3278
  {
3279 2
    $json = (string)self::filter($json);
3280
3281
    if (Bootup::is_php('5.4') === true) {
3282
      $json = json_decode($json, $assoc, $depth, $options);
3283
    } else {
3284
      $json = json_decode($json, $assoc, $depth);
3285
    }
3286
3287
    return $json;
3288
  }
3289
3290
  /**
3291
   * (PHP 5 &gt;= 5.2.0, PECL json &gt;= 1.2.0)<br/>
3292
   * Returns the JSON representation of a value.
3293
   *
3294
   * @link http://php.net/manual/en/function.json-encode.php
3295
   *
3296
   * @param mixed $value   <p>
3297
   *                       The <i>value</i> being encoded. Can be any type except
3298
   *                       a resource.
3299
   *                       </p>
3300
   *                       <p>
3301
   *                       All string data must be UTF-8 encoded.
3302
   *                       </p>
3303
   *                       <p>PHP implements a superset of
3304
   *                       JSON - it will also encode and decode scalar types and <b>NULL</b>. The JSON standard
3305
   *                       only supports these values when they are nested inside an array or an object.
3306
   *                       </p>
3307
   * @param int   $options [optional] <p>
3308
   *                       Bitmask consisting of <b>JSON_HEX_QUOT</b>,
3309
   *                       <b>JSON_HEX_TAG</b>,
3310
   *                       <b>JSON_HEX_AMP</b>,
3311
   *                       <b>JSON_HEX_APOS</b>,
3312
   *                       <b>JSON_NUMERIC_CHECK</b>,
3313
   *                       <b>JSON_PRETTY_PRINT</b>,
3314
   *                       <b>JSON_UNESCAPED_SLASHES</b>,
3315
   *                       <b>JSON_FORCE_OBJECT</b>,
3316
   *                       <b>JSON_UNESCAPED_UNICODE</b>. The behaviour of these
3317
   *                       constants is described on
3318 2
   *                       the JSON constants page.
3319
   *                       </p>
3320 2
   * @param int   $depth   [optional] <p>
3321
   *                       Set the maximum depth. Must be greater than zero.
3322 2
   *                       </p>
3323
   *
3324
   * @return string a JSON encoded string on success or <b>FALSE</b> on failure.
3325 2
   */
3326 View Code Duplication
  public static function json_encode($value, $options = 0, $depth = 512)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3327
  {
3328 2
    $value = self::filter($value);
3329
3330
    if (Bootup::is_php('5.5') === true) {
3331
      $json = json_encode($value, $options, $depth);
3332
    } else {
3333
      $json = json_encode($value, $options);
3334
    }
3335
3336
    return $json;
3337
  }
3338
3339
  /**
3340 7
   * Makes string's first char lowercase.
3341
   *
3342 7
   * @param string $str <p>The input string</p>
3343 7
   * @param string  $encoding  [optional] <p>Set the charset.</p>
3344
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3345
   *
3346
   * @return string <p>The resulting string</p>
3347 7
   */
3348 7
  public static function lcfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
3349 7
  {
3350
    $strPartTwo = self::substr($str, 1, null, $encoding, $cleanUtf8);
3351 7
    if ($strPartTwo === false) {
3352
      $strPartTwo = '';
3353 7
    }
3354
3355
    $strPartOne = self::strtolower(
3356
        (string)self::substr($str, 0, 1, $encoding, $cleanUtf8),
3357
        $encoding,
3358
        $cleanUtf8
3359
    );
3360
3361
    return $strPartOne . $strPartTwo;
3362
  }
3363
3364
  /**
3365
   * alias for "UTF8::lcfirst()"
3366
   *
3367 1
   * @see UTF8::lcfirst()
3368
   *
3369 1
   * @param string  $word
3370
   * @param string  $encoding
3371
   * @param boolean $cleanUtf8
3372
   *
3373
   * @return string
3374
   */
3375
  public static function lcword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
3376
  {
3377
    return self::lcfirst($word, $encoding, $cleanUtf8);
3378
  }
3379
3380
  /**
3381
   * Lowercase for all words in the string.
3382
   *
3383 1
   * @param string   $str        <p>The input string.</p>
3384
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
3385 1
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
3386 1
   * @param string   $encoding   [optional] <p>Set the charset.</p>
3387
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
3388
   *
3389 1
   * @return string
3390 1
   */
3391
  public static function lcwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
3392 1
  {
3393 1
    if (!$str) {
3394 1
      return '';
3395 1
    }
3396
3397
    $words = self::str_to_words($str, $charlist);
3398 1
    $newWords = array();
3399
3400 1
    if (count($exceptions) > 0) {
3401 1
      $useExceptions = true;
3402
    } else {
3403
      $useExceptions = false;
3404
    }
3405
3406 1 View Code Duplication
    foreach ($words as $word) {
3407
3408
      if (!$word) {
3409 1
        continue;
3410 1
      }
3411 1
3412 1
      if (
3413 1
          $useExceptions === false
3414 1
          ||
3415
          (
3416 1
              $useExceptions === true
3417 1
              &&
3418
              !in_array($word, $exceptions, true)
3419 1
          )
3420
      ) {
3421
        $word = self::lcfirst($word, $encoding, $cleanUtf8);
3422
      }
3423
3424
      $newWords[] = $word;
3425
    }
3426
3427
    return implode('', $newWords);
3428
  }
3429
3430 24
  /**
3431
   * Strip whitespace or other characters from beginning of a UTF-8 string.
3432 24
   *
3433
   * @param string $str   <p>The string to be trimmed</p>
3434 24
   * @param string $chars <p>Optional characters to be stripped</p>
3435 2
   *
3436
   * @return string <p>The string with unwanted characters stripped from the left.</p>
3437
   */
3438 View Code Duplication
  public static function ltrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3439 23
  {
3440 2
    $str = (string)$str;
3441
3442
    if (!isset($str[0])) {
3443 23
      return '';
3444
    }
3445
3446
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
3447
    if ($chars === INF || !$chars) {
3448
      return preg_replace('/^[\pZ\pC]+/u', '', $str);
3449
    }
3450
3451
    return preg_replace('/^' . self::rxClass($chars) . '+/u', '', $str);
3452
  }
3453 1
3454
  /**
3455 1
   * Returns the UTF-8 character with the maximum code point in the given data.
3456 1
   *
3457 1
   * @param mixed $arg <p>A UTF-8 encoded string or an array of such strings.</p>
3458
   *
3459 1
   * @return string <p>The character with the highest code point than others.</p>
3460
   */
3461 View Code Duplication
  public static function max($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3462
  {
3463
    if (is_array($arg) === true) {
3464
      $arg = implode('', $arg);
3465
    }
3466
3467
    return self::chr(max(self::codepoints($arg)));
3468
  }
3469
3470 1
  /**
3471
   * Calculates and returns the maximum number of bytes taken by any
3472 1
   * UTF-8 encoded character in the given string.
3473 1
   *
3474 1
   * @param string $str <p>The original Unicode string.</p>
3475
   *
3476
   * @return int <p>Max byte lengths of the given chars.</p>
3477 1
   */
3478
  public static function max_chr_width($str)
3479
  {
3480
    $bytes = self::chr_size_list($str);
3481
    if (count($bytes) > 0) {
3482
      return (int)max($bytes);
3483
    }
3484
3485 15
    return 0;
3486
  }
3487 15
3488
  /**
3489 15
   * Checks whether mbstring is available on the server.
3490 15
   *
3491 15
   * @return bool <p><strong>true</strong> if available, <strong>false</strong> otherwise.</p>
3492
   */
3493 15
  public static function mbstring_loaded()
3494
  {
3495
    $return = extension_loaded('mbstring') ? true : false;
3496
3497
    if ($return === true) {
3498
      \mb_internal_encoding('UTF-8');
3499
    }
3500
3501
    return $return;
3502
  }
3503 1
3504
  /**
3505 1
   * Returns the UTF-8 character with the minimum code point in the given data.
3506 1
   *
3507 1
   * @param mixed $arg <strong>A UTF-8 encoded string or an array of such strings.</strong>
3508
   *
3509 1
   * @return string <p>The character with the lowest code point than others.</p>
3510
   */
3511 View Code Duplication
  public static function min($arg)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3512
  {
3513
    if (is_array($arg) === true) {
3514
      $arg = implode('', $arg);
3515
    }
3516
3517
    return self::chr(min(self::codepoints($arg)));
3518
  }
3519
3520
  /**
3521
   * alias for "UTF8::normalize_encoding()"
3522
   *
3523
   * @see UTF8::normalize_encoding()
3524
   *
3525
   * @param string $encoding
3526
   * @param mixed  $fallback
3527
   *
3528
   * @return string
3529
   *
3530
   * @deprecated <p>use "UTF8::normalize_encoding()"</p>
3531
   */
3532
  public static function normalizeEncoding($encoding, $fallback = false)
3533
  {
3534
    return self::normalize_encoding($encoding, $fallback);
3535
  }
3536
3537 77
  /**
3538
   * Normalize the encoding-"name" input.
3539 77
   *
3540
   * @param string $encoding <p>e.g.: ISO, UTF8, WINDOWS-1251 etc.</p>
3541 77
   * @param mixed  $fallback <p>e.g.: UTF-8</p>
3542 3
   *
3543
   * @return string <p>e.g.: ISO-8859-1, UTF-8, WINDOWS-1251 etc.</p>
3544
   */
3545 76
  public static function normalize_encoding($encoding, $fallback = false)
3546 1
  {
3547
    static $STATIC_NORMALIZE_ENCODING_CACHE = array();
3548
3549 76
    if (!$encoding) {
3550 6
      return $fallback;
3551
    }
3552
3553 75
    if ('UTF-8' === $encoding) {
3554 74
      return $encoding;
3555
    }
3556
3557 5
    if (in_array($encoding, self::$ICONV_ENCODING, true)) {
3558 5
      return $encoding;
3559 5
    }
3560
3561
    if (isset($STATIC_NORMALIZE_ENCODING_CACHE[$encoding])) {
3562 5
      return $STATIC_NORMALIZE_ENCODING_CACHE[$encoding];
3563 5
    }
3564 5
3565 5
    $encodingOrig = $encoding;
3566 5
    $encoding = strtoupper($encoding);
3567 5
    $encodingUpperHelper = preg_replace('/[^a-zA-Z0-9\s]/', '', $encoding);
3568 5
3569 5
    $equivalences = array(
3570 5
        'ISO8859'     => 'ISO-8859-1',
3571 5
        'ISO88591'    => 'ISO-8859-1',
3572 5
        'ISO'         => 'ISO-8859-1',
3573 5
        'LATIN'       => 'ISO-8859-1',
3574 5
        'LATIN1'      => 'ISO-8859-1', // Western European
3575 5
        'ISO88592'    => 'ISO-8859-2',
3576 5
        'LATIN2'      => 'ISO-8859-2', // Central European
3577
        'ISO88593'    => 'ISO-8859-3',
3578 5
        'LATIN3'      => 'ISO-8859-3', // Southern European
3579 5
        'ISO88594'    => 'ISO-8859-4',
3580 5
        'LATIN4'      => 'ISO-8859-4', // Northern European
3581
        'ISO88595'    => 'ISO-8859-5',
3582 5
        'ISO88596'    => 'ISO-8859-6', // Greek
3583
        'ISO88597'    => 'ISO-8859-7',
3584 5
        'ISO88598'    => 'ISO-8859-8', // Hebrew
3585
        'ISO88599'    => 'ISO-8859-9',
3586
        'LATIN5'      => 'ISO-8859-9', // Turkish
3587
        'ISO885911'   => 'ISO-8859-11',
3588
        'TIS620'      => 'ISO-8859-11', // Thai
3589
        'ISO885910'   => 'ISO-8859-10',
3590
        'LATIN6'      => 'ISO-8859-10', // Nordic
3591
        'ISO885913'   => 'ISO-8859-13',
3592
        'LATIN7'      => 'ISO-8859-13', // Baltic
3593
        'ISO885914'   => 'ISO-8859-14',
3594 16
        'LATIN8'      => 'ISO-8859-14', // Celtic
3595
        'ISO885915'   => 'ISO-8859-15',
3596 16
        'LATIN9'      => 'ISO-8859-15', // Western European (with some extra chars e.g. €)
3597
        'ISO885916'   => 'ISO-8859-16',
3598 16
        'LATIN10'     => 'ISO-8859-16', // Southeast European
3599 1
        'CP1250'      => 'WINDOWS-1250',
3600
        'WIN1250'     => 'WINDOWS-1250',
3601
        'WINDOWS1250' => 'WINDOWS-1250',
3602 16
        'CP1251'      => 'WINDOWS-1251',
3603 16
        'WIN1251'     => 'WINDOWS-1251',
3604
        'WINDOWS1251' => 'WINDOWS-1251',
3605 16
        'CP1252'      => 'WINDOWS-1252',
3606 1
        'WIN1252'     => 'WINDOWS-1252',
3607 1
        'WINDOWS1252' => 'WINDOWS-1252',
3608 1
        'CP1253'      => 'WINDOWS-1253',
3609
        'WIN1253'     => 'WINDOWS-1253',
3610 16
        'WINDOWS1253' => 'WINDOWS-1253',
3611
        'CP1254'      => 'WINDOWS-1254',
3612
        'WIN1254'     => 'WINDOWS-1254',
3613
        'WINDOWS1254' => 'WINDOWS-1254',
3614
        'CP1255'      => 'WINDOWS-1255',
3615
        'WIN1255'     => 'WINDOWS-1255',
3616
        'WINDOWS1255' => 'WINDOWS-1255',
3617
        'CP1256'      => 'WINDOWS-1256',
3618
        'WIN1256'     => 'WINDOWS-1256',
3619
        'WINDOWS1256' => 'WINDOWS-1256',
3620
        'CP1257'      => 'WINDOWS-1257',
3621
        'WIN1257'     => 'WINDOWS-1257',
3622
        'WINDOWS1257' => 'WINDOWS-1257',
3623 37
        'CP1258'      => 'WINDOWS-1258',
3624
        'WIN1258'     => 'WINDOWS-1258',
3625 37
        'WINDOWS1258' => 'WINDOWS-1258',
3626
        'UTF16'       => 'UTF-16',
3627 37
        'UTF32'       => 'UTF-32',
3628 4
        'UTF8'        => 'UTF-8',
3629
        'UTF'         => 'UTF-8',
3630
        'UTF7'        => 'UTF-7',
3631 37
        '8BIT'        => 'CP850',
3632 37
        'BINARY'      => 'CP850',
3633
    );
3634 37
3635
    if (!empty($equivalences[$encodingUpperHelper])) {
3636 2
      $encoding = $equivalences[$encodingUpperHelper];
3637
    }
3638 2
3639
    $STATIC_NORMALIZE_ENCODING_CACHE[$encodingOrig] = $encoding;
3640 1
3641 1
    return $encoding;
3642
  }
3643 2
3644 2
  /**
3645
   * Normalize some MS Word special characters.
3646 37
   *
3647 37
   * @param string $str <p>The string to be normalized.</p>
3648
   *
3649 37
   * @return string
3650 1
   */
3651 1 View Code Duplication
  public static function normalize_msword($str)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3652
  {
3653 37
    $str = (string)$str;
3654 37
3655
    if (!isset($str[0])) {
3656 37
      return '';
3657
    }
3658
3659
    static $UTF8_MSWORD_KEYS_CACHE = null;
3660
    static $UTF8_MSWORD_VALUES_CACHE = null;
3661
3662
    if ($UTF8_MSWORD_KEYS_CACHE === null) {
3663
      $UTF8_MSWORD_KEYS_CACHE = array_keys(self::$UTF8_MSWORD);
3664
      $UTF8_MSWORD_VALUES_CACHE = array_values(self::$UTF8_MSWORD);
3665
    }
3666
3667
    return str_replace($UTF8_MSWORD_KEYS_CACHE, $UTF8_MSWORD_VALUES_CACHE, $str);
3668 12
  }
3669
3670 12
  /**
3671
   * Normalize the whitespace.
3672 12
   *
3673 1
   * @param string $str                     <p>The string to be normalized.</p>
3674
   * @param bool   $keepNonBreakingSpace    [optional] <p>Set to true, to keep non-breaking-spaces.</p>
3675
   * @param bool   $keepBidiUnicodeControls [optional] <p>Set to true, to keep non-printable (for the web)
3676 11
   *                                        bidirectional text chars.</p>
3677
   *
3678
   * @return string
3679
   */
3680
  public static function normalize_whitespace($str, $keepNonBreakingSpace = false, $keepBidiUnicodeControls = false)
3681
  {
3682
    $str = (string)$str;
3683
3684
    if (!isset($str[0])) {
3685
      return '';
3686
    }
3687
3688
    static $WHITESPACE_CACHE = array();
3689
    $cacheKey = (int)$keepNonBreakingSpace;
3690
3691
    if (!isset($WHITESPACE_CACHE[$cacheKey])) {
3692
3693
      $WHITESPACE_CACHE[$cacheKey] = self::$WHITESPACE_TABLE;
3694
3695
      if ($keepNonBreakingSpace === true) {
3696
        /** @noinspection OffsetOperationsInspection */
3697
        unset($WHITESPACE_CACHE[$cacheKey]['NO-BREAK SPACE']);
3698
      }
3699
3700
      $WHITESPACE_CACHE[$cacheKey] = array_values($WHITESPACE_CACHE[$cacheKey]);
3701
    }
3702
3703
    if ($keepBidiUnicodeControls === false) {
3704
      static $BIDI_UNICODE_CONTROLS_CACHE = null;
3705
3706
      if ($BIDI_UNICODE_CONTROLS_CACHE === null) {
3707
        $BIDI_UNICODE_CONTROLS_CACHE = array_values(self::$BIDI_UNI_CODE_CONTROLS_TABLE);
3708
      }
3709
3710
      $str = str_replace($BIDI_UNICODE_CONTROLS_CACHE, '', $str);
3711
    }
3712
3713
    return str_replace($WHITESPACE_CACHE[$cacheKey], ' ', $str);
3714
  }
3715
3716
  /**
3717
   * Strip all whitespace characters. This includes tabs and newline
3718
   * characters, as well as multibyte whitespace such as the thin space
3719
   * and ideographic space.
3720
   *
3721
   * @param string $str
3722
   *
3723
   * @return string
3724
   */
3725
  public static function strip_whitespace($str)
3726
  {
3727
    $str = (string)$str;
3728
3729
    if (!isset($str[0])) {
3730
      return '';
3731 23
    }
3732
3733
    return (string)preg_replace('/[[:space:]]+/u', '', $str);
3734 23
  }
3735 1
3736
  /**
3737
   * Format a number with grouped thousands.
3738
   *
3739 1
   * @param float  $number
3740 1
   * @param int    $decimals
3741 1
   * @param string $dec_point
3742 1
   * @param string $thousands_sep
3743
   *
3744 23
   * @return string
3745
   *
3746
   * @deprecated <p>This has nothing to do with UTF-8.</p>
3747
   */
3748 23
  public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
3749
  {
3750
    $thousands_sep = (string)$thousands_sep;
3751
    $dec_point = (string)$dec_point;
3752
    $number = (float)$number;
3753
3754
    if (
3755
        isset($thousands_sep[1], $dec_point[1])
3756 23
        &&
3757 23
        Bootup::is_php('5.4') === true
3758 23
    ) {
3759
      return str_replace(
3760
          array(
3761 10
              '.',
3762
              ',',
3763 10
          ),
3764 10
          array(
3765
              $dec_point,
3766 10
              $thousands_sep,
3767 1
          ),
3768
          number_format($number, $decimals, '.', ',')
3769
      );
3770 10
    }
3771 4
3772
    return number_format($number, $decimals, $dec_point, $thousands_sep);
3773
  }
3774 10
3775 6
  /**
3776
   * Calculates Unicode code point of the given UTF-8 encoded character.
3777
   *
3778 10
   * INFO: opposite to UTF8::chr()
3779
   *
3780
   * @param string      $chr      <p>The character of which to calculate code point.<p/>
3781
   * @param string|null $encoding [optional] <p>Default is UTF-8</p>
3782
   *
3783
   * @return int <p>
3784
   *             Unicode code point of the given character,<br />
3785
   *             0 on invalid UTF-8 byte sequence.
3786
   *             </p>
3787
   */
3788
  public static function ord($chr, $encoding = 'UTF-8')
3789
  {
3790
    if ($encoding !== 'UTF-8') {
3791
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
3792
3793
      // check again, if it's still not UTF-8
3794
      /** @noinspection NotOptimalIfConditionsInspection */
3795 1
      if ($encoding !== 'UTF-8') {
3796
        $chr = (string)\mb_convert_encoding($chr, 'UTF-8', $encoding);
3797 1
      }
3798 1
    }
3799 1
3800
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
3801
      self::checkForSupport();
3802 1
    }
3803 1
3804 1
    if (self::$SUPPORT['intlChar'] === true) {
3805
      $tmpReturn = \IntlChar::ord($chr);
3806
      if ($tmpReturn) {
3807 1
        return $tmpReturn;
3808
      }
3809
    }
3810
3811
    // use static cache, if there is no support for "\IntlChar"
3812
    static $CHAR_CACHE = array();
3813
    if (isset($CHAR_CACHE[$chr]) === true) {
3814
      return $CHAR_CACHE[$chr];
3815 58
    }
3816
3817
    $chr_orig = $chr;
3818
3819 58
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
3820
    $chr = unpack('C*', (string)self::substr($chr, 0, 4, '8BIT'));
3821
    $code = $chr ? $chr[1] : 0;
3822
    if (0xF0 <= $code && isset($chr[4])) {
3823
      return $CHAR_CACHE[$chr_orig] = (($code - 0xF0) << 18) + (($chr[2] - 0x80) << 12) + (($chr[3] - 0x80) << 6) + $chr[4] - 0x80;
3824
    }
3825
3826
    if (0xE0 <= $code && isset($chr[3])) {
3827
      return $CHAR_CACHE[$chr_orig] = (($code - 0xE0) << 12) + (($chr[2] - 0x80) << 6) + $chr[3] - 0x80;
3828
    }
3829
3830 1
    if (0xC0 <= $code && isset($chr[2])) {
3831
      return $CHAR_CACHE[$chr_orig] = (($code - 0xC0) << 6) + $chr[2] - 0x80;
3832 1
    }
3833 1
3834
    return $CHAR_CACHE[$chr_orig] = $code;
3835
  }
3836 1
3837 1
  /**
3838 1
   * Parses the string into an array (into the the second parameter).
3839
   *
3840
   * WARNING: Instead of "parse_str()" this method do not (re-)placing variables in the current scope,
3841 1
   *          if the second parameter is not set!
3842
   *
3843
   * @link http://php.net/manual/en/function.parse-str.php
3844 1
   *
3845
   * @param string  $str       <p>The input string.</p>
3846
   * @param array   $result    <p>The result will be returned into this reference parameter.</p>
3847
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
3848 1
   *
3849 1
   * @return bool <p>Will return <strong>false</strong> if php can't parse the string and we haven't any $result.</p>
3850 1
   */
3851
  public static function parse_str($str, &$result, $cleanUtf8 = false)
3852
  {
3853 1
    if ($cleanUtf8 === true) {
3854
      $str = self::clean($str);
3855
    }
3856 1
3857
    /** @noinspection PhpVoidFunctionResultUsedInspection */
3858
    $return = \mb_parse_str($str, $result);
3859
    if ($return === false || empty($result)) {
3860 1
      return false;
3861
    }
3862 1
3863 1
    return true;
3864 1
  }
3865 1
3866 1
  /**
3867
   * Checks if \u modifier is available that enables Unicode support in PCRE.
3868
   *
3869
   * @return bool <p><strong>true</strong> if support is available, <strong>false</strong> otherwise.</p>
3870
   */
3871
  public static function pcre_utf8_support()
3872
  {
3873
    /** @noinspection PhpUsageOfSilenceOperatorInspection */
3874
    return (bool)@preg_match('//u', '');
3875
  }
3876
3877
  /**
3878
   * Create an array containing a range of UTF-8 characters.
3879
   *
3880
   * @param mixed $var1 <p>Numeric or hexadecimal code points, or a UTF-8 character to start from.</p>
3881
   * @param mixed $var2 <p>Numeric or hexadecimal code points, or a UTF-8 character to end at.</p>
3882
   *
3883
   * @return array
3884
   */
3885
  public static function range($var1, $var2)
3886
  {
3887
    if (!$var1 || !$var2) {
3888 2
      return array();
3889
    }
3890 2
3891 View Code Duplication
    if (ctype_digit((string)$var1)) {
3892 2
      $start = (int)$var1;
3893 1
    } elseif (ctype_xdigit($var1)) {
3894
      $start = (int)self::hex_to_int($var1);
3895
    } else {
3896 2
      $start = self::ord($var1);
3897 2
    }
3898 1
3899 1
    if (!$start) {
3900
      return array();
3901 2
    }
3902
3903 View Code Duplication
    if (ctype_digit((string)$var2)) {
3904 2
      $end = (int)$var2;
3905
    } elseif (ctype_xdigit($var2)) {
3906 2
      $end = (int)self::hex_to_int($var2);
3907 2
    } else {
3908 2
      $end = self::ord($var2);
3909 2
    }
3910
3911 2
    if (!$end) {
3912 2
      return array();
3913 2
    }
3914
3915 2
    return array_map(
3916
        array(
3917 2
            '\\voku\\helper\\UTF8',
3918
            'chr',
3919
        ),
3920
        range($start, $end)
3921
    );
3922
  }
3923
3924
  /**
3925
   * Multi decode html entity & fix urlencoded-win1252-chars.
3926
   *
3927
   * e.g:
3928
   * 'test+test'                     => 'test+test'
3929
   * 'D&#252;sseldorf'               => 'Düsseldorf'
3930
   * 'D%FCsseldorf'                  => 'Düsseldorf'
3931
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
3932
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
3933
   * 'Düsseldorf'                   => 'Düsseldorf'
3934
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
3935
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
3936
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
3937
   *
3938
   * @param string $str          <p>The input string.</p>
3939
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
3940
   *
3941
   * @return string
3942
   */
3943 40 View Code Duplication
  public static function rawurldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
3944
  {
3945 40
    $str = (string)$str;
3946
3947 40
    if (!isset($str[0])) {
3948 5
      return '';
3949
    }
3950
3951 40
    $pattern = '/%u([0-9a-f]{3,4})/i';
3952 40
    if (preg_match($pattern, $str)) {
3953 5
      $str = preg_replace($pattern, '&#x\\1;', rawurldecode($str));
3954 5
    }
3955
3956
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
3957 5
3958 5
    do {
3959 40
      $str_compare = $str;
3960
3961 40
      $str = self::fix_simple_utf8(
3962
          rawurldecode(
3963
              self::html_entity_decode(
3964
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
3965
                  $flags
3966
              )
3967
          )
3968
      );
3969
3970
    } while ($multi_decode === true && $str_compare !== $str);
3971
3972 1
    return (string)$str;
3973
  }
3974 1
3975 1
  /**
3976 1
   * alias for "UTF8::remove_bom()"
3977
   *
3978 1
   * @see UTF8::remove_bom()
3979
   *
3980 1
   * @param string $str
3981 1
   *
3982 1
   * @return string
3983 1
   *
3984
   * @deprecated <p>use "UTF8::remove_bom()"</p>
3985 1
   */
3986
  public static function removeBOM($str)
3987
  {
3988
    return self::remove_bom($str);
3989
  }
3990
3991
  /**
3992
   * Remove the BOM from UTF-8 / UTF-16 / UTF-32 strings.
3993
   *
3994
   * @param string $str <p>The input string.</p>
3995
   *
3996
   * @return string <p>String without UTF-BOM</p>
3997
   */
3998
  public static function remove_bom($str)
3999
  {
4000
    $str = (string)$str;
4001 57
4002
    if (!isset($str[0])) {
4003
      return '';
4004 57
    }
4005
4006
    foreach (self::$BOM as $bomString => $bomByteLength) {
4007
      if (0 === self::strpos($str, $bomString, 0, '8BIT')) {
4008 57
        $strTmp = self::substr($str, $bomByteLength, null, '8BIT');
4009 57
        if ($strTmp === false) {
4010 57
          $strTmp = '';
4011 57
        }
4012
        $str = (string)$strTmp;
4013 57
      }
4014
    }
4015
4016 57
    return $str;
4017 57
  }
4018
4019 57
  /**
4020
   * Removes duplicate occurrences of a string in another string.
4021
   *
4022
   * @param string          $str  <p>The base string.</p>
4023
   * @param string|string[] $what <p>String to search for in the base string.</p>
4024
   *
4025
   * @return string <p>The result string with removed duplicates.</p>
4026
   */
4027
  public static function remove_duplicates($str, $what = ' ')
4028
  {
4029
    if (is_string($what) === true) {
4030
      $what = array($what);
4031 57
    }
4032
4033 57
    if (is_array($what) === true) {
4034
      /** @noinspection ForeachSourceInspection */
4035 57
      foreach ($what as $item) {
4036 4
        $str = preg_replace('/(' . preg_quote($item, '/') . ')+/', $item, $str);
4037
      }
4038
    }
4039 57
4040 57
    return $str;
4041 57
  }
4042 57
4043 57
  /**
4044
   * Remove invisible characters from a string.
4045 57
   *
4046
   * e.g.: This prevents sandwiching null characters between ascii characters, like Java\0script.
4047
   *
4048
   * copy&past from https://github.com/bcit-ci/CodeIgniter/blob/develop/system/core/Common.php
4049 57
   *
4050 57
   * @param string $str
4051
   * @param bool   $url_encoded
4052 57
   * @param string $replacement
4053 57
   *
4054 57
   * @return string
4055
   */
4056 57
  public static function remove_invisible_characters($str, $url_encoded = true, $replacement = '')
4057
  {
4058 57
    // init
4059 57
    $non_displayables = array();
4060 57
4061
    // every control character except newline (dec 10),
4062 57
    // carriage return (dec 13) and horizontal tab (dec 09)
4063 57
    if ($url_encoded) {
4064 57
      $non_displayables[] = '/%0[0-8bcef]/'; // url encoded 00-08, 11, 12, 14, 15
4065
      $non_displayables[] = '/%1[0-9a-f]/'; // url encoded 16-31
4066 57
    }
4067
4068
    $non_displayables[] = '/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S'; // 00-08, 11, 12, 14-31, 127
4069
4070
    do {
4071
      $str = preg_replace($non_displayables, $replacement, $str, -1, $count);
4072
    } while ($count !== 0);
4073
4074
    return $str;
4075
  }
4076
4077 23
  /**
4078
   * Replace the diamond question mark (�) and invalid-UTF8 chars with the replacement.
4079 23
   *
4080
   * @param string $str                <p>The input string</p>
4081 23
   * @param string $replacementChar    <p>The replacement character.</p>
4082 5
   * @param bool   $processInvalidUtf8 <p>Convert invalid UTF-8 chars </p>
4083
   *
4084
   * @return string
4085
   */
4086 19
  public static function replace_diamond_question_mark($str, $replacementChar = '', $processInvalidUtf8 = true)
4087 3
  {
4088
    $str = (string)$str;
4089
4090 18
    if (!isset($str[0])) {
4091
      return '';
4092
    }
4093
4094
    if ($processInvalidUtf8 === true) {
4095
      $replacementCharHelper = $replacementChar;
4096
      if ($replacementChar === '') {
4097
        $replacementCharHelper = 'none';
4098
      }
4099
4100
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4101 60
        self::checkForSupport();
4102
      }
4103 60
4104
      $save = \mb_substitute_character();
4105 60
      \mb_substitute_character($replacementCharHelper);
4106
      $str = \mb_convert_encoding($str, 'UTF-8', 'UTF-8');
4107 60
      \mb_substitute_character($save);
4108 48
    }
4109
4110
    return str_replace(
4111
        array(
4112 19
            "\xEF\xBF\xBD",
4113
            '�',
4114
        ),
4115 19
        array(
4116 18
            $replacementChar,
4117
            $replacementChar,
4118 18
        ),
4119 18
        $str
4120 18
    );
4121 2
  }
4122 2
4123
  /**
4124
   * Strip whitespace or other characters from end of a UTF-8 string.
4125 19
   *
4126
   * @param string $str   <p>The string to be trimmed.</p>
4127 19
   * @param string $chars <p>Optional characters to be stripped.</p>
4128 19
   *
4129 19
   * @return string <p>The string with unwanted characters stripped from the right.</p>
4130
   */
4131 19 View Code Duplication
  public static function rtrim($str = '', $chars = INF)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4132 19
  {
4133 19
    $str = (string)$str;
4134
4135
    if (!isset($str[0])) {
4136
      return '';
4137 19
    }
4138
4139 19
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
4140
    if ($chars === INF || !$chars) {
4141
      return preg_replace('/[\pZ\pC]+$/u', '', $str);
4142
    }
4143
4144
    return preg_replace('/' . self::rxClass($chars) . '+$/u', '', $str);
4145
  }
4146
4147
  /**
4148
   * rxClass
4149
   *
4150
   * @param string $s
4151
   * @param string $class
4152
   *
4153
   * @return string
4154
   */
4155
  private static function rxClass($s, $class = '')
4156
  {
4157
    static $RX_CLASSS_CACHE = array();
4158
4159
    $cacheKey = $s . $class;
4160
4161
    if (isset($RX_CLASSS_CACHE[$cacheKey])) {
4162
      return $RX_CLASSS_CACHE[$cacheKey];
4163
    }
4164
4165 1
    /** @noinspection CallableParameterUseCaseInTypeContextInspection */
4166
    $class = array($class);
4167 1
4168
    /** @noinspection SuspiciousLoopInspection */
4169 1
    foreach (self::str_split($s) as $s) {
4170 1
      if ('-' === $s) {
4171
        $class[0] = '-' . $class[0];
4172
      } elseif (!isset($s[2])) {
4173
        $class[0] .= preg_quote($s, '/');
4174
      } elseif (1 === self::strlen($s)) {
4175 1
        $class[0] .= $s;
4176 1
      } else {
4177 1
        $class[] = $s;
4178 1
      }
4179
    }
4180
4181 1
    if ($class[0]) {
4182
      $class[0] = '[' . $class[0] . ']';
4183
    }
4184
4185 1
    if (1 === count($class)) {
4186
      $return = $class[0];
4187
    } else {
4188
      $return = '(?:' . implode('|', $class) . ')';
4189
    }
4190
4191
    $RX_CLASSS_CACHE[$cacheKey] = $return;
4192
4193
    return $return;
4194
  }
4195
4196
  /**
4197 39
   * WARNING: Print native UTF-8 support (libs), e.g. for debugging.
4198
   */
4199 39
  public static function showSupport()
4200
  {
4201 39
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4202 3
      self::checkForSupport();
4203
    }
4204
4205
    foreach (self::$SUPPORT as $utf8Support) {
4206 38
      echo $utf8Support . "\n<br>";
4207
    }
4208 38
  }
4209
4210
  /**
4211
   * Converts a UTF-8 character to HTML Numbered Entity like "&#123;".
4212 38
   *
4213 7
   * @param string $char           <p>The Unicode character to be encoded as numbered entity.</p>
4214 7
   * @param bool   $keepAsciiChars <p>Set to <strong>true</strong> to keep ASCII chars.</>
4215
   * @param string $encoding       [optional] <p>Default is UTF-8</p>
4216 38
   *
4217
   * @return string <p>The HTML numbered entity.</p>
4218 38
   */
4219 38
  public static function single_chr_html_encode($char, $keepAsciiChars = false, $encoding = 'UTF-8')
4220 38
  {
4221 38
    $char = (string)$char;
4222 38
4223
    if (!isset($char[0])) {
4224 38
      return '';
4225
    }
4226
4227
    if (
4228
        $keepAsciiChars === true
4229
        &&
4230
        self::is_ascii($char) === true
4231
    ) {
4232
      return $char;
4233
    }
4234
4235
    if ($encoding !== 'UTF-8') {
4236
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
4237
    }
4238
4239
    return '&#' . self::ord($char, $encoding) . ';';
4240
  }
4241
4242
  /**
4243
   * Convert a string to an array of Unicode characters.
4244
   *
4245
   * @param string  $str       <p>The string to split into array.</p>
4246
   * @param int     $length    [optional] <p>Max character length of each array element.</p>
4247
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
4248
   *
4249
   * @return string[] <p>An array containing chunks of the string.</p>
4250
   */
4251
  public static function split($str, $length = 1, $cleanUtf8 = false)
4252
  {
4253
    $str = (string)$str;
4254
4255
    if (!isset($str[0])) {
4256
      return array();
4257
    }
4258
4259
    // init
4260
    $ret = array();
4261
4262
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4263
      self::checkForSupport();
4264
    }
4265
4266
    if ($cleanUtf8 === true) {
4267
      $str = self::clean($str);
4268
    }
4269
4270
    if (self::$SUPPORT['pcre_utf8'] === true) {
4271
4272
      preg_match_all('/./us', $str, $retArray);
4273
      if (isset($retArray[0])) {
4274
        $ret = $retArray[0];
4275
      }
4276
      unset($retArray);
4277
4278
    } else {
4279
4280
      // fallback
4281
4282
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
4283
        self::checkForSupport();
4284
      }
4285
4286 View Code Duplication
      if (self::$SUPPORT['mbstring_func_overload'] === true) {
4287
        $len = \mb_strlen($str, '8BIT');
4288
      } else {
4289
        $len = strlen($str);
4290
      }
4291
4292
      /** @noinspection ForeachInvariantsInspection */
4293
      for ($i = 0; $i < $len; $i++) {
4294
4295 38
        if (($str[$i] & "\x80") === "\x00") {
4296 5
4297
          $ret[] = $str[$i];
4298 5
4299
        } elseif (
4300 5
            isset($str[$i + 1])
4301 5
            &&
4302 5
            ($str[$i] & "\xE0") === "\xC0"
4303
        ) {
4304
4305
          if (($str[$i + 1] & "\xC0") === "\x80") {
4306 34
            $ret[] = $str[$i] . $str[$i + 1];
4307
4308
            $i++;
4309
          }
4310 34
4311 View Code Duplication
        } elseif (
4312
            isset($str[$i + 2])
4313
            &&
4314
            ($str[$i] & "\xF0") === "\xE0"
4315
        ) {
4316
4317
          if (
4318
              ($str[$i + 1] & "\xC0") === "\x80"
4319
              &&
4320
              ($str[$i + 2] & "\xC0") === "\x80"
4321
          ) {
4322
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2];
4323 12
4324
            $i += 2;
4325
          }
4326
4327
        } elseif (
4328
            isset($str[$i + 3])
4329 12
            &&
4330
            ($str[$i] & "\xF8") === "\xF0"
4331 3
        ) {
4332 1
4333 View Code Duplication
          if (
4334
              ($str[$i + 1] & "\xC0") === "\x80"
4335 3
              &&
4336 1
              ($str[$i + 2] & "\xC0") === "\x80"
4337
              &&
4338
              ($str[$i + 3] & "\xC0") === "\x80"
4339 2
          ) {
4340
            $ret[] = $str[$i] . $str[$i + 1] . $str[$i + 2] . $str[$i + 3];
4341
4342
            $i += 3;
4343 2
          }
4344
4345
        }
4346
      }
4347 2
    }
4348
4349
    if ($length > 1) {
4350
      $ret = array_chunk($ret, $length);
4351
4352
      return array_map(
4353 12
          function ($item) {
4354 3
            return implode('', $item);
4355
          }, $ret
4356
      );
4357
    }
4358
4359
    if (isset($ret[0]) && $ret[0] === '') {
4360
      return array();
4361 12
    }
4362 9
4363
    return $ret;
4364
  }
4365
4366
  /**
4367
   * Optimized "\mb_detect_encoding()"-function -> with support for UTF-16 and UTF-32.
4368
   *
4369
   * @param string $str <p>The input string.</p>
4370
   *
4371 7
   * @return false|string <p>
4372 7
   *                      The detected string-encoding e.g. UTF-8 or UTF-16BE,<br>
4373 7
   *                      otherwise it will return false.
4374 7
   *                      </p>
4375 7
   */
4376 7
  public static function str_detect_encoding($str)
4377 7
  {
4378 7
    //
4379 7
    // 1.) check binary strings (010001001...) like UTF-16 / UTF-32
4380 7
    //
4381 7
4382 7
    if (self::is_binary($str) === true) {
4383 7
4384 7
      if (self::is_utf16($str) === 1) {
4385 7
        return 'UTF-16LE';
4386 7
      }
4387 7
4388 7
      if (self::is_utf16($str) === 2) {
4389 7
        return 'UTF-16BE';
4390 7
      }
4391 7
4392
      if (self::is_utf32($str) === 1) {
4393 7
        return 'UTF-32LE';
4394 7
      }
4395 7
4396
      if (self::is_utf32($str) === 2) {
4397
        return 'UTF-32BE';
4398
      }
4399
4400
    }
4401
4402
    //
4403
    // 2.) simple check for ASCII chars
4404
    //
4405
4406
    if (self::is_ascii($str) === true) {
4407
      return 'ASCII';
4408
    }
4409
4410
    //
4411
    // 3.) simple check for UTF-8 chars
4412
    //
4413
4414
    if (self::is_utf8($str) === true) {
4415
      return 'UTF-8';
4416
    }
4417
4418
    //
4419
    // 4.) check via "\mb_detect_encoding()"
4420
    //
4421
    // INFO: UTF-16, UTF-32, UCS2 and UCS4, encoding detection will fail always with "\mb_detect_encoding()"
4422 2
4423
    $detectOrder = array(
4424 2
        'ISO-8859-1',
4425 2
        'ISO-8859-2',
4426
        'ISO-8859-3',
4427 2
        'ISO-8859-4',
4428 1
        'ISO-8859-5',
4429
        'ISO-8859-6',
4430
        'ISO-8859-7',
4431 2
        'ISO-8859-8',
4432 2
        'ISO-8859-9',
4433
        'ISO-8859-10',
4434
        'ISO-8859-13',
4435
        'ISO-8859-14',
4436 2
        'ISO-8859-15',
4437 2
        'ISO-8859-16',
4438
        'WINDOWS-1251',
4439
        'WINDOWS-1252',
4440 2
        'WINDOWS-1254',
4441
        'ISO-2022-JP',
4442
        'JIS',
4443
        'EUC-JP',
4444
    );
4445
4446
    $encoding = \mb_detect_encoding($str, $detectOrder, true);
4447
    if ($encoding) {
4448
      return $encoding;
4449
    }
4450
4451 2
    //
4452
    // 5.) check via "iconv()"
4453 2
    //
4454 2
4455
    $md5 = md5($str);
4456 2
    foreach (self::$ICONV_ENCODING as $encodingTmp) {
4457 1
      # INFO: //IGNORE and //TRANSLIT still throw notice
4458
      /** @noinspection PhpUsageOfSilenceOperatorInspection */
4459
      if (md5(@\iconv($encodingTmp, $encodingTmp . '//IGNORE', $str)) === $md5) {
4460 2
        return $encodingTmp;
4461 2
      }
4462
    }
4463
4464 2
    return false;
4465
  }
4466
4467
  /**
4468
   * Check if the string ends with the given substring.
4469
   *
4470
   * @param string $haystack <p>The string to search in.</p>
4471
   * @param string $needle   <p>The substring to search for.</p>
4472
   *
4473
   * @return bool
4474
   */
4475
  public static function str_ends_with($haystack, $needle)
4476
  {
4477
    $haystack = (string)$haystack;
4478
    $needle = (string)$needle;
4479
4480
    if (!isset($haystack[0], $needle[0])) {
4481
      return false;
4482
    }
4483
4484
    $haystackSub = self::substr($haystack, -self::strlen($needle));
4485
    if ($haystackSub === false) {
4486
      return false;
4487
    }
4488
4489
    if ($needle === $haystackSub) {
4490
      return true;
4491
    }
4492 26
4493
    return false;
4494 26
  }
4495
4496
  /**
4497 26
   * Check if the string ends with the given substring, case insensitive.
4498 26
   *
4499 2
   * @param string $haystack <p>The string to search in.</p>
4500 2
   * @param string $needle   <p>The substring to search for.</p>
4501 24
   *
4502
   * @return bool
4503 26
   */
4504 View Code Duplication
  public static function str_iends_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4505 26
  {
4506 26
    $haystack = (string)$haystack;
4507
    $needle = (string)$needle;
4508 26
4509
    if (!isset($haystack[0], $needle[0])) {
4510
      return false;
4511
    }
4512
4513
    if (self::strcasecmp(self::substr($haystack, -self::strlen($needle)), $needle) === 0) {
0 ignored issues
show
Security Bug introduced by
It seems like self::substr($haystack, -self::strlen($needle)) targeting voku\helper\UTF8::substr() can also be of type false; however, voku\helper\UTF8::strcasecmp() does only seem to accept string, did you maybe forget to handle an error condition?
Loading history...
4514
      return true;
4515
    }
4516
4517
    return false;
4518
  }
4519 2
4520
  /**
4521 2
   * Case-insensitive and UTF-8 safe version of <function>str_replace</function>.
4522 2
   *
4523
   * @link  http://php.net/manual/en/function.str-ireplace.php
4524 2
   *
4525 1
   * @param mixed $search  <p>
4526
   *                       Every replacement with search array is
4527
   *                       performed on the result of previous replacement.
4528 2
   *                       </p>
4529 2
   * @param mixed $replace <p>
4530
   *                       </p>
4531
   * @param mixed $subject <p>
4532 2
   *                       If subject is an array, then the search and
4533
   *                       replace is performed with every entry of
4534
   *                       subject, and the return value is an array as
4535
   *                       well.
4536
   *                       </p>
4537
   * @param int   $count   [optional] <p>
4538
   *                       The number of matched and replaced needles will
4539
   *                       be returned in count which is passed by
4540
   *                       reference.
4541
   *                       </p>
4542
   *
4543
   * @return mixed <p>A string or an array of replacements.</p>
4544 1
   */
4545
  public static function str_ireplace($search, $replace, $subject, &$count = null)
4546 1
  {
4547
    $search = (array)$search;
4548 1
4549 1
    /** @noinspection AlterInForeachInspection */
4550
    foreach ($search as &$s) {
4551
      if ('' === $s .= '') {
4552 1
        $s = '/^(?<=.)$/';
4553
      } else {
4554 1
        $s = '/' . preg_quote($s, '/') . '/ui';
4555 1
      }
4556
    }
4557
4558 1
    $subject = preg_replace($search, $replace, $subject, -1, $replace);
4559 1
    $count = $replace; // used as reference parameter
4560
4561
    return $subject;
4562 1
  }
4563 1
4564 1
  /**
4565 1
   * Check if the string starts with the given substring, case insensitive.
4566
   *
4567 1
   * @param string $haystack <p>The string to search in.</p>
4568 1
   * @param string $needle   <p>The substring to search for.</p>
4569 1
   *
4570 1
   * @return bool
4571
   */
4572 View Code Duplication
  public static function str_istarts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4573 1
  {
4574
    $haystack = (string)$haystack;
4575
    $needle = (string)$needle;
4576
4577
    if (!isset($haystack[0], $needle[0])) {
4578
      return false;
4579
    }
4580
4581
    if (self::stripos($haystack, $needle) === 0) {
4582
      return true;
4583
    }
4584
4585
    return false;
4586
  }
4587
4588
  /**
4589 2
   * Limit the number of characters in a string, but also after the next word.
4590
   *
4591 2
   * @param string $str
4592
   * @param int    $length
4593
   * @param string $strAddOn
4594 2
   *
4595 2
   * @return string
4596
   */
4597 2
  public static function str_limit_after_word($str, $length = 100, $strAddOn = '...')
4598
  {
4599 2
    $str = (string)$str;
4600 2
4601
    if (!isset($str[0])) {
4602 2
      return '';
4603
    }
4604
4605 2
    $length = (int)$length;
4606 2
4607 2
    if (self::strlen($str) <= $length) {
4608 2
      return $str;
4609 2
    }
4610
4611 2
    if (self::substr($str, $length - 1, 1) === ' ') {
4612 2
      return (string)self::substr($str, 0, $length - 1) . $strAddOn;
4613 2
    }
4614 2
4615 2
    $str = (string)self::substr($str, 0, $length);
4616 2
    $array = explode(' ', $str);
4617
    array_pop($array);
4618 2
    $new_str = implode(' ', $array);
4619 2
4620 2
    if ($new_str === '') {
4621 2
      $str = (string)self::substr($str, 0, $length - 1) . $strAddOn;
4622 2
    } else {
4623 2
      $str = $new_str . $strAddOn;
4624
    }
4625 2
4626
    return $str;
4627
  }
4628 2
4629
  /**
4630
   * Pad a UTF-8 string to given length with another string.
4631
   *
4632
   * @param string $str        <p>The input string.</p>
4633
   * @param int    $pad_length <p>The length of return string.</p>
4634
   * @param string $pad_string [optional] <p>String to use for padding the input string.</p>
4635
   * @param int    $pad_type   [optional] <p>
4636
   *                           Can be <strong>STR_PAD_RIGHT</strong> (default),
4637
   *                           <strong>STR_PAD_LEFT</strong> or <strong>STR_PAD_BOTH</strong>
4638
   *                           </p>
4639
   *
4640
   * @return string <strong>Returns the padded string</strong>
4641
   */
4642
  public static function str_pad($str, $pad_length, $pad_string = ' ', $pad_type = STR_PAD_RIGHT)
4643
  {
4644
    $str_length = self::strlen($str);
4645
4646
    if (
4647
        is_int($pad_length) === true
4648
        &&
4649 1
        $pad_length > 0
4650
        &&
4651 1
        $pad_length >= $str_length
4652
    ) {
4653 1
      $ps_length = self::strlen($pad_string);
4654
4655
      $diff = $pad_length - $str_length;
4656
4657
      switch ($pad_type) {
4658 View Code Duplication
        case STR_PAD_LEFT:
4659
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4660
          $pre = (string)self::substr($pre, 0, $diff);
4661
          $post = '';
4662
          break;
4663
4664
        case STR_PAD_BOTH:
4665
          $pre = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4666
          $pre = (string)self::substr($pre, 0, (int)$diff / 2);
4667
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length / 2));
4668
          $post = (string)self::substr($post, 0, (int)ceil($diff / 2));
4669
          break;
4670
4671
        case STR_PAD_RIGHT:
4672 View Code Duplication
        default:
4673
          $post = str_repeat($pad_string, (int)ceil($diff / $ps_length));
4674
          $post = (string)self::substr($post, 0, $diff);
4675
          $pre = '';
4676
      }
4677
4678
      return $pre . $str . $post;
4679
    }
4680
4681
    return $str;
4682
  }
4683
4684
  /**
4685 12
   * Repeat a string.
4686
   *
4687 12
   * @param string $str        <p>
4688
   *                           The string to be repeated.
4689
   *                           </p>
4690
   * @param int    $multiplier <p>
4691
   *                           Number of time the input string should be
4692
   *                           repeated.
4693
   *                           </p>
4694
   *                           <p>
4695
   *                           multiplier has to be greater than or equal to 0.
4696
   *                           If the multiplier is set to 0, the function
4697
   *                           will return an empty string.
4698
   *                           </p>
4699 1
   *
4700
   * @return string <p>The repeated string.</p>
4701 1
   */
4702
  public static function str_repeat($str, $multiplier)
4703 1
  {
4704 1
    $str = self::filter($str);
4705
4706
    return str_repeat($str, $multiplier);
4707 1
  }
4708
4709
  /**
4710
   * INFO: This is only a wrapper for "str_replace()"  -> the original functions is already UTF-8 safe.
4711
   *
4712
   * Replace all occurrences of the search string with the replacement string
4713
   *
4714
   * @link http://php.net/manual/en/function.str-replace.php
4715
   *
4716
   * @param mixed $search  <p>
4717 1
   *                       The value being searched for, otherwise known as the needle.
4718
   *                       An array may be used to designate multiple needles.
4719 1
   *                       </p>
4720
   * @param mixed $replace <p>
4721 1
   *                       The replacement value that replaces found search
4722
   *                       values. An array may be used to designate multiple replacements.
4723 1
   *                       </p>
4724
   * @param mixed $subject <p>
4725
   *                       The string or array being searched and replaced on,
4726
   *                       otherwise known as the haystack.
4727
   *                       </p>
4728
   *                       <p>
4729
   *                       If subject is an array, then the search and
4730
   *                       replace is performed with every entry of
4731
   *                       subject, and the return value is an array as
4732
   *                       well.
4733
   *                       </p>
4734
   * @param int   $count   [optional] If passed, this will hold the number of matched and replaced needles.
4735 1
   *
4736
   * @return mixed <p>This function returns a string or an array with the replaced values.</p>
4737 1
   */
4738
  public static function str_replace($search, $replace, $subject, &$count = null)
4739 1
  {
4740 1
    return str_replace($search, $replace, $subject, $count);
4741 1
  }
4742
4743 1
  /**
4744 1
   * Replace the first "$search"-term with the "$replace"-term.
4745 1
   *
4746 1
   * @param string $search
4747
   * @param string $replace
4748
   * @param string $subject
4749 1
   *
4750
   * @return string
4751
   */
4752
  public static function str_replace_first($search, $replace, $subject)
4753
  {
4754
    $pos = self::strpos($subject, $search);
4755
4756
    if ($pos !== false) {
4757
      return self::substr_replace($subject, $replace, $pos, self::strlen($search));
4758
    }
4759
4760 22
    return $subject;
4761
  }
4762 22
4763
  /**
4764 22
   * Shuffles all the characters in the string.
4765 1
   *
4766
   * @param string $str <p>The input string</p>
4767
   *
4768 21
   * @return string <p>The shuffled string.</p>
4769
   */
4770 21
  public static function str_shuffle($str)
4771
  {
4772
    $array = self::split($str);
4773
4774
    shuffle($array);
4775 21
4776 21
    return implode('', $array);
4777
  }
4778 21
4779 21
  /**
4780
   * Sort all characters according to code points.
4781
   *
4782 1
   * @param string $str    <p>A UTF-8 string.</p>
4783 1
   * @param bool   $unique <p>Sort unique. If <strong>true</strong>, repeated characters are ignored.</p>
4784
   * @param bool   $desc   <p>If <strong>true</strong>, will sort characters in reverse code point order.</p>
4785
   *
4786 1
   * @return string <p>String of sorted characters.</p>
4787 1
   */
4788 1
  public static function str_sort($str, $unique = false, $desc = false)
4789 1
  {
4790 1
    $array = self::codepoints($str);
4791
4792 1
    if ($unique) {
4793
      $array = array_flip(array_flip($array));
4794 1
    }
4795
4796
    if ($desc) {
4797
      arsort($array);
4798
    } else {
4799
      asort($array);
4800
    }
4801
4802
    return self::string($array);
4803
  }
4804
4805 2
  /**
4806
   * Split a string into an array.
4807 2
   *
4808 2
   * @param string $str
4809
   * @param int    $len
4810 2
   *
4811 1
   * @return array
4812
   */
4813
  public static function str_split($str, $len = 1)
4814 2
  {
4815 2
    $str = (string)$str;
4816
4817
    if (!isset($str[0])) {
4818 2
      return array();
4819
    }
4820
4821
    $len = (int)$len;
4822
4823
    if ($len < 1) {
4824
      return str_split($str, $len);
4825
    }
4826
4827
    /** @noinspection PhpInternalEntityUsedInspection */
4828 1
    preg_match_all('/' . self::GRAPHEME_CLUSTER_RX . '/u', $str, $a);
4829
    $a = $a[0];
4830 1
4831
    if ($len === 1) {
4832 1
      return $a;
4833
    }
4834 1
4835
    $arrayOutput = array();
4836
    $p = -1;
4837
4838
    /** @noinspection PhpForeachArrayIsUsedAsValueInspection */
4839
    foreach ($a as $l => $a) {
4840
      if ($l % $len) {
4841
        $arrayOutput[$p] .= $a;
4842
      } else {
4843
        $arrayOutput[++$p] = $a;
4844
      }
4845
    }
4846
4847 10
    return $arrayOutput;
4848
  }
4849 10
4850
  /**
4851 10
   * Check if the string starts with the given substring.
4852 1
   *
4853 1
   * @param string $haystack <p>The string to search in.</p>
4854
   * @param string $needle   <p>The substring to search for.</p>
4855 10
   *
4856 2
   * @return bool
4857
   */
4858 View Code Duplication
  public static function str_starts_with($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
4859
  {
4860 2
    $haystack = (string)$haystack;
4861
    $needle = (string)$needle;
4862
4863 10
    if (!isset($haystack[0], $needle[0])) {
4864
      return false;
4865 10
    }
4866
4867
    if (self::strpos($haystack, $needle) === 0) {
4868
      return true;
4869 10
    }
4870
4871 10
    return false;
4872 10
  }
4873
4874
  /**
4875 1
   * Get a binary representation of a specific string.
4876 1
   *
4877
   * @param string $str <p>The input string.</p>
4878
   *
4879 1
   * @return string
4880 1
   */
4881 1
  public static function str_to_binary($str)
4882 1
  {
4883
    $str = (string)$str;
4884
4885
    $value = unpack('H*', $str);
4886
4887 1
    return base_convert($value[1], 16, 2);
4888 1
  }
4889 1
4890 1
  /**
4891
   * Convert a string into an array of words.
4892
   *
4893 1
   * @param string   $str
4894 1
   * @param string   $charList <p>Additional chars for the definition of "words".</p>
4895
   * @param bool     $removeEmptyValues <p>Remove empty values.</p>
4896 1
   * @param null|int $removeShortValues
4897
   *
4898
   * @return array
4899
   */
4900
  public static function str_to_words($str, $charList = '', $removeEmptyValues = false, $removeShortValues = null)
4901
  {
4902
    $str = (string)$str;
4903
4904
    if ($removeShortValues !== null) {
4905
      $removeShortValues = (int)$removeShortValues;
4906
    }
4907
4908
    if (!isset($str[0])) {
4909
      if ($removeEmptyValues === true) {
4910 7
        return array();
4911
      }
4912 7
4913
      return array('');
4914
    }
4915
4916
    $charList = self::rxClass($charList, '\pL');
4917
4918
    $return = \preg_split("/({$charList}+(?:[\p{Pd}’']{$charList}+)*)/u", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
4919
4920
    if (
4921
        $removeShortValues === null
4922
        &&
4923
        $removeEmptyValues === false
4924
    ) {
4925
      return $return;
4926
    }
4927
4928 1
    $tmpReturn = array();
4929
    foreach ($return as $returnValue) {
4930 1
      if (
4931
          $removeShortValues !== null
4932 1
          &&
4933
          self::strlen($returnValue) <= $removeShortValues
4934 1
      ) {
4935
        continue;
4936 1
      }
4937 1
4938 1
      if (
4939 1
          $removeEmptyValues === true
4940
          &&
4941 1
          trim($returnValue) === ''
4942
      ) {
4943 1
        continue;
4944 1
      }
4945 1
4946 1
      $tmpReturn[] = $returnValue;
4947 1
    }
4948 1
4949
    return $tmpReturn;
4950 1
  }
4951
4952 1
  /**
4953
   * alias for "UTF8::to_ascii()"
4954
   *
4955
   * @see UTF8::to_ascii()
4956 1
   *
4957
   * @param string $str
4958
   * @param string $unknown
4959
   * @param bool   $strict
4960
   *
4961
   * @return string
4962
   */
4963
  public static function str_transliterate($str, $unknown = '?', $strict = false)
4964
  {
4965
    return self::to_ascii($str, $unknown, $strict);
4966
  }
4967
4968
  /**
4969
   * Counts number of words in the UTF-8 string.
4970
   *
4971
   * @param string $str      <p>The input string.</p>
4972
   * @param int    $format   [optional] <p>
4973 11
   *                         <strong>0</strong> => return a number of words (default)<br>
4974
   *                         <strong>1</strong> => return an array of words<br>
4975 11
   *                         <strong>2</strong> => return an array of words with word-offset as key
4976
   *                         </p>
4977
   * @param string $charlist [optional] <p>Additional chars that contains to words and do not start a new word.</p>
4978
   *
4979
   * @return array|int <p>The number of words in the string</p>
4980
   */
4981
  public static function str_word_count($str, $format = 0, $charlist = '')
4982
  {
4983
    $strParts = self::str_to_words($str, $charlist);
4984
4985
    $len = count($strParts);
4986
4987
    if ($format === 1) {
4988
4989
      $numberOfWords = array();
4990
      for ($i = 1; $i < $len; $i += 2) {
4991 1
        $numberOfWords[] = $strParts[$i];
4992
      }
4993 1
4994
    } elseif ($format === 2) {
4995
4996
      $numberOfWords = array();
4997
      $offset = self::strlen($strParts[0]);
4998
      for ($i = 1; $i < $len; $i += 2) {
4999
        $numberOfWords[$offset] = $strParts[$i];
5000
        $offset += self::strlen($strParts[$i]) + self::strlen($strParts[$i + 1]);
5001
      }
5002
5003
    } else {
5004
5005
      $numberOfWords = ($len - 1) / 2;
5006
5007
    }
5008 14
5009
    return $numberOfWords;
5010
  }
5011 14
5012 13
  /**
5013 13
   * Case-insensitive string comparison.
5014 14
   *
5015
   * INFO: Case-insensitive version of UTF8::strcmp()
5016
   *
5017
   * @param string $str1
5018
   * @param string $str2
5019
   *
5020
   * @return int <p>
5021
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
5022
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
5023
   *             <strong>0</strong> if they are equal.
5024
   *             </p>
5025
   */
5026
  public static function strcasecmp($str1, $str2)
5027 15
  {
5028
    return self::strcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5029 15
  }
5030 1
5031
  /**
5032
   * alias for "UTF8::strstr()"
5033 14
   *
5034 2
   * @see UTF8::strstr()
5035 2
   *
5036
   * @param string  $haystack
5037
   * @param string  $needle
5038 2
   * @param bool    $before_needle
5039 2
   * @param string  $encoding
5040
   * @param boolean $cleanUtf8
5041 14
   *
5042 14
   * @return string|false
5043 1
   */
5044
  public static function strchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5045
  {
5046 13
    return self::strstr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5047
  }
5048 13
5049
  /**
5050
   * Case-sensitive string comparison.
5051 1
   *
5052
   * @param string $str1
5053
   * @param string $str2
5054
   *
5055
   * @return int  <p>
5056
   *              <strong>&lt; 0</strong> if str1 is less than str2<br>
5057
   *              <strong>&gt; 0</strong> if str1 is greater than str2<br>
5058
   *              <strong>0</strong> if they are equal.
5059
   *              </p>
5060
   */
5061
  public static function strcmp($str1, $str2)
5062
  {
5063
    /** @noinspection PhpUndefinedClassInspection */
5064
    return $str1 . '' === $str2 . '' ? 0 : strcmp(
5065
        \Normalizer::normalize($str1, \Normalizer::NFD),
5066
        \Normalizer::normalize($str2, \Normalizer::NFD)
5067 1
    );
5068
  }
5069 1
5070
  /**
5071
   * Find length of initial segment not matching mask.
5072
   *
5073
   * @param string $str
5074
   * @param string $charList
5075
   * @param int    $offset
5076
   * @param int    $length
5077
   *
5078
   * @return int|null
5079
   */
5080
  public static function strcspn($str, $charList, $offset = 0, $length = null)
5081 2
  {
5082
    if ('' === $charList .= '') {
5083 2
      return null;
5084 2
    }
5085 2
5086 View Code Duplication
    if ($offset || $length !== null) {
5087 2
      $strTmp = self::substr($str, $offset, $length);
5088 2
      if ($strTmp === false) {
5089 2
        return null;
5090
      }
5091 2
      $str = (string)$strTmp;
5092 2
    }
5093
5094
    $str = (string)$str;
5095
    if (!isset($str[0])) {
5096
      return null;
5097
    }
5098
5099
    if (preg_match('/^(.*?)' . self::rxClass($charList) . '/us', $str, $length)) {
5100
      /** @noinspection OffsetOperationsInspection */
5101
      return self::strlen($length[1]);
5102 3
    }
5103
5104 3
    return self::strlen($str);
5105 3
  }
5106 3
5107
  /**
5108 3
   * alias for "UTF8::stristr()"
5109
   *
5110 3
   * @see UTF8::stristr()
5111
   *
5112
   * @param string  $haystack
5113
   * @param string  $needle
5114
   * @param bool    $before_needle
5115
   * @param string  $encoding
5116
   * @param boolean $cleanUtf8
5117
   *
5118
   * @return string|false
5119
   */
5120
  public static function strichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5121
  {
5122
    return self::stristr($haystack, $needle, $before_needle, $encoding, $cleanUtf8);
5123
  }
5124
5125
  /**
5126
   * Create a UTF-8 string from code points.
5127
   *
5128
   * INFO: opposite to UTF8::codepoints()
5129
   *
5130
   * @param array $array <p>Integer or Hexadecimal codepoints.</p>
5131
   *
5132
   * @return string <p>UTF-8 encoded string.</p>
5133 2
   */
5134
  public static function string(array $array)
5135 2
  {
5136
    return implode(
5137 2
        '',
5138 1
        array_map(
5139
            array(
5140
                '\\voku\\helper\\UTF8',
5141 2
                'chr',
5142 1
            ),
5143 1
            $array
5144
        )
5145 2
    );
5146
  }
5147
5148
  /**
5149
   * Checks if string starts with "BOM" (Byte Order Mark Character) character.
5150
   *
5151
   * @param string $str <p>The input string.</p>
5152
   *
5153
   * @return bool <p><strong>true</strong> if the string has BOM at the start, <strong>false</strong> otherwise.</p>
5154
   */
5155
  public static function string_has_bom($str)
5156
  {
5157
    foreach (self::$BOM as $bomString => $bomByteLength) {
5158
      if (0 === strpos($str, $bomString)) {
5159
        return true;
5160
      }
5161
    }
5162
5163
    return false;
5164 10
  }
5165
5166 10
  /**
5167 10
   * Strip HTML and PHP tags from a string + clean invalid UTF-8.
5168 10
   *
5169
   * @link http://php.net/manual/en/function.strip-tags.php
5170 10
   *
5171 3
   * @param string  $str            <p>
5172
   *                                The input string.
5173
   *                                </p>
5174 9
   * @param string  $allowable_tags [optional] <p>
5175
   *                                You can use the optional second parameter to specify tags which should
5176
   *                                not be stripped.
5177 1
   *                                </p>
5178 1
   *                                <p>
5179 1
   *                                HTML comments and PHP tags are also stripped. This is hardcoded and
5180
   *                                can not be changed with allowable_tags.
5181
   *                                </p>
5182
   * @param boolean $cleanUtf8      [optional] <p>Remove non UTF-8 chars from the string.</p>
5183 9
   *
5184 2
   * @return string <p>The stripped string.</p>
5185 9
   */
5186 9 View Code Duplication
  public static function strip_tags($str, $allowable_tags = null, $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5187 9
  {
5188 1
    $str = (string)$str;
5189
5190
    if (!isset($str[0])) {
5191 9
      return '';
5192
    }
5193
5194
    if ($cleanUtf8 === true) {
5195
      $str = self::clean($str);
5196
    }
5197 9
5198 9
    return strip_tags($str, $allowable_tags);
5199 9
  }
5200 9
5201 9
  /**
5202
   * Finds position of first occurrence of a string within another, case insensitive.
5203
   *
5204
   * @link http://php.net/manual/en/function.mb-stripos.php
5205
   *
5206 9
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5207
   * @param string  $needle    <p>The string to find in haystack.</p>
5208
   * @param int     $offset    [optional] <p>The position in haystack to start searching.</p>
5209
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5210
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5211
   *
5212
   * @return int|false <p>
5213
   *                   Return the numeric position of the first occurrence of needle in the haystack string,<br>
5214
   *                   or false if needle is not found.
5215
   *                   </p>
5216
   */
5217
  public static function stripos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5218
  {
5219
    $haystack = (string)$haystack;
5220
    $needle = (string)$needle;
5221
    $offset = (int)$offset;
5222
5223 17
    if (!isset($haystack[0], $needle[0])) {
5224
      return false;
5225 17
    }
5226 17
5227 17
    if ($cleanUtf8 === true) {
5228
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5229 17
      // if invalid characters are found in $haystack before $needle
5230 6
      $haystack = self::clean($haystack);
5231
      $needle = self::clean($needle);
5232
    }
5233 11
5234 1 View Code Duplication
    if (
5235 1
        $encoding === 'UTF-8'
5236
        ||
5237 11
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5238
    ) {
5239
      $encoding = 'UTF-8';
5240 1
    } else {
5241 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5242 1
    }
5243
5244 11
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5245
      self::checkForSupport();
5246
    }
5247
5248 11
    if (
5249
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5250
        &&
5251
        self::$SUPPORT['intl'] === true
5252
        &&
5253
        Bootup::is_php('5.4') === true
5254 11
    ) {
5255 1
      return \grapheme_stripos($haystack, $needle, $offset);
5256 11
    }
5257
5258
    // fallback to "mb_"-function via polyfill
5259
    return \mb_stripos($haystack, $needle, $offset, $encoding);
5260 11
  }
5261 11
5262
  /**
5263
   * Returns all of haystack starting from and including the first occurrence of needle to the end.
5264
   *
5265
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
5266
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
5267
   * @param bool    $before_needle [optional] <p>
5268
   *                               If <b>TRUE</b>, grapheme_strstr() returns the part of the
5269
   *                               haystack before the first occurrence of the needle (excluding the needle).
5270
   *                               </p>
5271
   * @param string  $encoding      [optional] <p>Set the charset for e.g. "\mb_" function</p>
5272
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5273
   *
5274
   * @return false|string A sub-string,<br>or <strong>false</strong> if needle is not found.
5275
   */
5276
  public static function stristr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
5277
  {
5278
    $haystack = (string)$haystack;
5279
    $needle = (string)$needle;
5280
    $before_needle = (bool)$before_needle;
5281
5282
    if (!isset($haystack[0], $needle[0])) {
5283
      return false;
5284
    }
5285
5286
    if ($encoding !== 'UTF-8') {
5287
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5288
    }
5289
5290
    if ($cleanUtf8 === true) {
5291
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5292
      // if invalid characters are found in $haystack before $needle
5293
      $needle = self::clean($needle);
5294
      $haystack = self::clean($haystack);
5295
    }
5296
5297
    if (!$needle) {
5298
      return $haystack;
5299
    }
5300
5301
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5302
      self::checkForSupport();
5303 88
    }
5304
5305 88 View Code Duplication
    if (
5306
        $encoding !== 'UTF-8'
5307 88
        &&
5308 5
        self::$SUPPORT['mbstring'] === false
5309
    ) {
5310
      trigger_error('UTF8::stristr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5311
    }
5312
5313 87
    if (self::$SUPPORT['mbstring'] === true) {
5314 13
      return \mb_stristr($haystack, $needle, $before_needle, $encoding);
5315 87
    }
5316 78
5317 78
    if (
5318 12
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5319
        &&
5320
        self::$SUPPORT['intl'] === true
5321 87
        &&
5322
        Bootup::is_php('5.4') === true
5323
    ) {
5324
      return \grapheme_stristr($haystack, $needle, $before_needle);
5325
    }
5326 87
5327 87
    if (self::is_ascii($haystack) && self::is_ascii($needle)) {
5328
      return stristr($haystack, $needle, $before_needle);
5329
    }
5330 10
5331 10
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/usi', $haystack, $match);
5332 10
5333 10
    if (!isset($match[1])) {
5334
      return false;
5335
    }
5336
5337
    if ($before_needle) {
5338
      return $match[1];
5339 79
    }
5340
5341
    return self::substr($haystack, self::strlen($match[1]));
5342 2
  }
5343 2
5344
  /**
5345
   * Get the string length, not the byte-length!
5346
   *
5347 79
   * @link     http://php.net/manual/en/function.mb-strlen.php
5348 2
   *
5349 79
   * @param string  $str       <p>The string being checked for length.</p>
5350
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5351 79
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5352
   *
5353
   * @return int <p>The number of characters in the string $str having character encoding $encoding. (One multi-byte
5354
   *             character counted as +1)</p>
5355
   */
5356
  public static function strlen($str, $encoding = 'UTF-8', $cleanUtf8 = false)
5357 79
  {
5358 2
    $str = (string)$str;
5359 79
5360 2
    if (!isset($str[0])) {
5361 79
      return 0;
5362
    }
5363
5364 View Code Duplication
    if (
5365 79
        $encoding === 'UTF-8'
5366 79
        ||
5367
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5368
    ) {
5369
      $encoding = 'UTF-8';
5370
    } else {
5371
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5372
    }
5373
5374
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5375
      self::checkForSupport();
5376
    }
5377
5378
    switch ($encoding) {
5379
      case 'ASCII':
5380
      case 'CP850':
5381
      case '8BIT':
5382
        if (
5383
            $encoding === 'CP850'
5384
            &&
5385
            self::$SUPPORT['mbstring_func_overload'] === false
5386
        ) {
5387
          return strlen($str);
5388
        }
5389
5390
        return \mb_strlen($str, '8BIT');
5391
    }
5392
5393
    if ($cleanUtf8 === true) {
5394
      // "\mb_strlen" and "\iconv_strlen" returns wrong length,
5395
      // if invalid characters are found in $str
5396
      $str = self::clean($str);
5397
    }
5398
5399 View Code Duplication
    if (
5400
        $encoding !== 'UTF-8'
5401
        &&
5402
        self::$SUPPORT['mbstring'] === false
5403
        &&
5404
        self::$SUPPORT['iconv'] === false
5405
    ) {
5406
      trigger_error('UTF8::strlen() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5407
    }
5408
5409
    if (
5410 1
        $encoding !== 'UTF-8'
5411
        &&
5412 1
        self::$SUPPORT['iconv'] === true
5413
        &&
5414
        self::$SUPPORT['mbstring'] === false
5415
    ) {
5416
      return \iconv_strlen($str, $encoding);
5417
    }
5418
5419
    if (self::$SUPPORT['mbstring'] === true) {
5420
      return \mb_strlen($str, $encoding);
5421
    }
5422
5423
    if (self::$SUPPORT['iconv'] === true) {
5424
      return \iconv_strlen($str, $encoding);
5425
    }
5426
5427
    if (
5428
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5429 2
        &&
5430
        self::$SUPPORT['intl'] === true
5431 2
        &&
5432
        Bootup::is_php('5.4') === true
5433
    ) {
5434
      return \grapheme_strlen($str);
5435
    }
5436
5437
    if (self::is_ascii($str)) {
5438
      return strlen($str);
5439
    }
5440
5441
    // fallback via vanilla php
5442
    preg_match_all('/./us', $str, $parts);
5443
    $returnTmp = count($parts[0]);
5444
    if ($returnTmp !== 0) {
5445
      return $returnTmp;
5446
    }
5447 1
5448
    // fallback to "mb_"-function via polyfill
5449 1
    return \mb_strlen($str, $encoding);
5450
  }
5451
5452
  /**
5453
   * Case insensitive string comparisons using a "natural order" algorithm.
5454
   *
5455
   * INFO: natural order version of UTF8::strcasecmp()
5456
   *
5457
   * @param string $str1 <p>The first string.</p>
5458
   * @param string $str2 <p>The second string.</p>
5459
   *
5460
   * @return int <strong>&lt; 0</strong> if str1 is less than str2<br>
5461
   *             <strong>&gt; 0</strong> if str1 is greater than str2<br>
5462
   *             <strong>0</strong> if they are equal
5463
   */
5464
  public static function strnatcasecmp($str1, $str2)
5465 2
  {
5466
    return self::strnatcmp(self::strtocasefold($str1), self::strtocasefold($str2));
5467 2
  }
5468 2
5469
  /**
5470 2
   * String comparisons using a "natural order" algorithm
5471
   *
5472
   * INFO: natural order version of UTF8::strcmp()
5473
   *
5474
   * @link  http://php.net/manual/en/function.strnatcmp.php
5475
   *
5476
   * @param string $str1 <p>The first string.</p>
5477
   * @param string $str2 <p>The second string.</p>
5478
   *
5479
   * @return int <strong>&lt; 0</strong> if str1 is less than str2;<br>
5480
   *             <strong>&gt; 0</strong> if str1 is greater than str2;<br>
5481
   *             <strong>0</strong> if they are equal
5482
   */
5483 1
  public static function strnatcmp($str1, $str2)
5484
  {
5485 1
    return $str1 . '' === $str2 . '' ? 0 : strnatcmp(self::strtonatfold($str1), self::strtonatfold($str2));
5486 1
  }
5487
5488 1
  /**
5489 1
   * Case-insensitive string comparison of the first n characters.
5490
   *
5491
   * @link  http://php.net/manual/en/function.strncasecmp.php
5492 1
   *
5493 1
   * @param string $str1 <p>The first string.</p>
5494
   * @param string $str2 <p>The second string.</p>
5495
   * @param int    $len  <p>The length of strings to be used in the comparison.</p>
5496 1
   *
5497
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5498
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5499
   *             <strong>0</strong> if they are equal
5500
   */
5501
  public static function strncasecmp($str1, $str2, $len)
5502
  {
5503
    return self::strncmp(self::strtocasefold($str1), self::strtocasefold($str2), $len);
5504
  }
5505
5506
  /**
5507
   * String comparison of the first n characters.
5508
   *
5509
   * @link  http://php.net/manual/en/function.strncmp.php
5510
   *
5511
   * @param string $str1 <p>The first string.</p>
5512
   * @param string $str2 <p>The second string.</p>
5513
   * @param int    $len  <p>Number of characters to use in the comparison.</p>
5514
   *
5515 58
   * @return int <strong>&lt; 0</strong> if <i>str1</i> is less than <i>str2</i>;<br>
5516
   *             <strong>&gt; 0</strong> if <i>str1</i> is greater than <i>str2</i>;<br>
5517 58
   *             <strong>0</strong> if they are equal
5518 58
   */
5519
  public static function strncmp($str1, $str2, $len)
5520 58
  {
5521 3
    $str1 = (string)self::substr($str1, 0, $len);
5522
    $str2 = (string)self::substr($str2, 0, $len);
5523
5524
    return self::strcmp($str1, $str2);
5525 57
  }
5526
5527
  /**
5528
   * Search a string for any of a set of characters.
5529 57
   *
5530
   * @link  http://php.net/manual/en/function.strpbrk.php
5531
   *
5532
   * @param string $haystack  <p>The string where char_list is looked for.</p>
5533 57
   * @param string $char_list <p>This parameter is case sensitive.</p>
5534
   *
5535
   * @return string String starting from the character found, or false if it is not found.
5536 2
   */
5537 2
  public static function strpbrk($haystack, $char_list)
5538 2
  {
5539
    $haystack = (string)$haystack;
5540
    $char_list = (string)$char_list;
5541
5542 57
    if (!isset($haystack[0], $char_list[0])) {
5543 42
      return false;
5544 57
    }
5545 17
5546 17
    if (preg_match('/' . self::rxClass($char_list) . '/us', $haystack, $m)) {
5547 41
      return substr($haystack, strpos($haystack, $m[0]));
5548
    }
5549
5550 57
    return false;
5551
  }
5552
5553
  /**
5554
   * Find position of first occurrence of string in a string.
5555
   *
5556 57
   * @link http://php.net/manual/en/function.mb-strpos.php
5557 41
   *
5558 57
   * @param string  $haystack  <p>The string from which to get the position of the first occurrence of needle.</p>
5559 41
   * @param string  $needle    <p>The string to find in haystack.</p>
5560
   * @param int     $offset    [optional] <p>The search offset. If it is not specified, 0 is used.</p>
5561
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5562
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5563
   *
5564 17
   * @return int|false <p>
5565 17
   *                   The numeric position of the first occurrence of needle in the haystack string.<br>
5566 17
   *                   If needle is not found it returns false.
5567 1
   *                   </p>
5568 17
   */
5569
  public static function strpos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5570
  {
5571
    $haystack = (string)$haystack;
5572
    $needle = (string)$needle;
5573
5574 17
    if (!isset($haystack[0], $needle[0])) {
5575
      return false;
5576 17
    }
5577 1
5578 17
    // init
5579
    $offset = (int)$offset;
5580 17
5581
    // iconv and mbstring do not support integer $needle
5582
5583
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5584
      $needle = (string)self::chr($needle);
5585
    }
5586 17
5587 17
    if ($cleanUtf8 === true) {
5588
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5589
      // if invalid characters are found in $haystack before $needle
5590
      $needle = self::clean($needle);
5591
      $haystack = self::clean($haystack);
5592
    }
5593
5594 View Code Duplication
    if (
5595
        $encoding === 'UTF-8'
5596
        ||
5597
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5598
    ) {
5599
      $encoding = 'UTF-8';
5600
    } else {
5601
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5602
    }
5603
5604
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5605
      self::checkForSupport();
5606
    }
5607
5608
    if (
5609
        $encoding === 'CP850'
5610
        &&
5611
        self::$SUPPORT['mbstring_func_overload'] === false
5612
    ) {
5613
      return strpos($haystack, $needle, $offset);
5614
    }
5615
5616 View Code Duplication
    if (
5617
        $encoding !== 'UTF-8'
0 ignored issues
show
Comprehensibility introduced by
Consider adding parentheses for clarity. Current Interpretation: ($encoding !== 'UTF-8') ...PPORT['iconv'] === true, Probably Intended Meaning: $encoding !== ('UTF-8' &...PORT['iconv'] === true)

When comparing the result of a bit operation, we suggest to add explicit parenthesis and not to rely on PHP’s built-in operator precedence to ensure the code behaves as intended and to make it more readable.

Let’s take a look at these examples:

// Returns always int(0).
return 0 === $foo & 4;
return (0 === $foo) & 4;

// More likely intended return: true/false
return 0 === ($foo & 4);
Loading history...
5618
        &
5619
        self::$SUPPORT['iconv'] === true
5620
        &&
5621
        self::$SUPPORT['mbstring'] === false
5622
    ) {
5623
      trigger_error('UTF8::strpos() without mbstring / iconv cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5624
    }
5625
5626
    if (
5627
        $offset >= 0 // iconv_strpos() can't handle negative offset
5628
        &&
5629
        $encoding !== 'UTF-8'
5630
        &&
5631
        self::$SUPPORT['mbstring'] === false
5632
        &&
5633
        self::$SUPPORT['iconv'] === true
5634
    ) {
5635
      // ignore invalid negative offset to keep compatibility
5636
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5637
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5638
    }
5639
5640
    if (self::$SUPPORT['mbstring'] === true) {
5641
      return \mb_strpos($haystack, $needle, $offset, $encoding);
5642
    }
5643
5644
    if (
5645
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5646
        &&
5647
        self::$SUPPORT['intl'] === true
5648
        &&
5649
        Bootup::is_php('5.4') === true
5650
    ) {
5651
      return \grapheme_strpos($haystack, $needle, $offset);
5652
    }
5653
5654
    if (
5655
        $offset >= 0 // iconv_strpos() can't handle negative offset
5656
        &&
5657
        self::$SUPPORT['iconv'] === true
5658
    ) {
5659
      // ignore invalid negative offset to keep compatibility
5660
      // with php < 5.5.35, < 5.6.21, < 7.0.6
5661
      return \iconv_strpos($haystack, $needle, $offset > 0 ? $offset : 0, $encoding);
5662
    }
5663
5664
    $haystackIsAscii = self::is_ascii($haystack);
5665
    if ($haystackIsAscii && self::is_ascii($needle)) {
5666
      return strpos($haystack, $needle, $offset);
5667
    }
5668 1
5669
    // fallback via vanilla php
5670 1
5671 1
    if ($haystackIsAscii) {
5672 1
      $haystackTmp = substr($haystack, $offset);
5673
    } else {
5674 1
      $haystackTmp = self::substr($haystack, $offset);
5675
    }
5676
    if ($haystackTmp === false) {
5677
      $haystackTmp = '';
5678
    }
5679
    $haystack = (string)$haystackTmp;
5680
5681
    if ($offset < 0) {
5682 1
      $offset = 0;
5683
    }
5684
5685
    $pos = strpos($haystack, $needle);
5686
    if ($pos === false) {
5687
      return false;
5688
    }
5689
5690
    $returnTmp = $offset + self::strlen(substr($haystack, 0, $pos));
5691
    if ($returnTmp !== false) {
5692 4
      return $returnTmp;
5693
    }
5694 4
5695
    // fallback to "mb_"-function via polyfill
5696 4
    return \mb_strpos($haystack, $needle, $offset, $encoding);
5697 2
  }
5698
5699
  /**
5700 3
   * Finds the last occurrence of a character in a string within another.
5701
   *
5702
   * @link http://php.net/manual/en/function.mb-strrchr.php
5703
   *
5704
   * @param string $haystack      <p>The string from which to get the last occurrence of needle.</p>
5705
   * @param string $needle        <p>The string to find in haystack</p>
5706
   * @param bool   $before_needle [optional] <p>
5707
   *                              Determines which portion of haystack
5708
   *                              this function returns.
5709
   *                              If set to true, it returns all of haystack
5710
   *                              from the beginning to the last occurrence of needle.
5711
   *                              If set to false, it returns all of haystack
5712
   *                              from the last occurrence of needle to the end,
5713
   *                              </p>
5714
   * @param string $encoding      [optional] <p>
5715
   *                              Character encoding name to use.
5716
   *                              If it is omitted, internal character encoding is used.
5717
   *                              </p>
5718
   * @param bool   $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5719
   *
5720
   * @return string|false The portion of haystack or false if needle is not found.
5721
   */
5722 View Code Duplication
  public static function strrchr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5723
  {
5724
    if ($encoding !== 'UTF-8') {
5725
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5726 1
    }
5727
5728 1
    if ($cleanUtf8 === true) {
5729 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5730 1
      // if invalid characters are found in $haystack before $needle
5731
      $needle = self::clean($needle);
5732 1
      $haystack = self::clean($haystack);
5733
    }
5734
5735
    // fallback to "mb_"-function via polyfill
5736
    return \mb_strrchr($haystack, $needle, $before_needle, $encoding);
5737
  }
5738
5739 1
  /**
5740
   * Reverses characters order in the string.
5741
   *
5742
   * @param string $str The input string
5743
   *
5744
   * @return string The string with characters in the reverse sequence
5745
   */
5746
  public static function strrev($str)
5747
  {
5748
    $str = (string)$str;
5749
5750
    if (!isset($str[0])) {
5751
      return '';
5752
    }
5753
5754
    return implode('', array_reverse(self::split($str)));
5755
  }
5756 1
5757
  /**
5758 1
   * Finds the last occurrence of a character in a string within another, case insensitive.
5759
   *
5760
   * @link http://php.net/manual/en/function.mb-strrichr.php
5761
   *
5762
   * @param string  $haystack      <p>The string from which to get the last occurrence of needle.</p>
5763 1
   * @param string  $needle        <p>The string to find in haystack.</p>
5764 1
   * @param bool    $before_needle [optional] <p>
5765 1
   *                               Determines which portion of haystack
5766
   *                               this function returns.
5767 1
   *                               If set to true, it returns all of haystack
5768
   *                               from the beginning to the last occurrence of needle.
5769
   *                               If set to false, it returns all of haystack
5770
   *                               from the last occurrence of needle to the end,
5771
   *                               </p>
5772
   * @param string  $encoding      [optional] <p>
5773 1
   *                               Character encoding name to use.
5774
   *                               If it is omitted, internal character encoding is used.
5775 1
   *                               </p>
5776
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
5777
   *
5778
   * @return string|false <p>The portion of haystack or<br>false if needle is not found.</p>
5779
   */
5780 View Code Duplication
  public static function strrichr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
5781
  {
5782
    if ($encoding !== 'UTF-8') {
5783
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5784 1
    }
5785
5786 1
    if ($cleanUtf8 === true) {
5787 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
5788 1
      // if invalid characters are found in $haystack before $needle
5789
      $needle = self::clean($needle);
5790
      $haystack = self::clean($haystack);
5791
    }
5792 1
5793
    return \mb_strrichr($haystack, $needle, $before_needle, $encoding);
5794
  }
5795
5796
  /**
5797
   * Find position of last occurrence of a case-insensitive string.
5798 1
   *
5799
   * @param string  $haystack  <p>The string to look in.</p>
5800 1
   * @param string  $needle    <p>The string to look for.</p>
5801
   * @param int     $offset    [optional] <p>Number of characters to ignore in the beginning or end.</p>
5802
   * @param string  $encoding  [optional] <p>Set the charset.</p>
5803
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5804 1
   *
5805 1
   * @return int|false <p>
5806
   *                   The numeric position of the last occurrence of needle in the haystack string.<br>If needle is
5807
   *                   not found, it returns false.
5808
   *                   </p>
5809
   */
5810
  public static function strripos($haystack, $needle, $offset = 0, $encoding = 'UTF-8', $cleanUtf8 = false)
5811
  {
5812
    if ((int)$needle === $needle && $needle >= 0) {
0 ignored issues
show
Unused Code Bug introduced by
The strict comparison === seems to always evaluate to false as the types of (int) $needle (integer) and $needle (string) can never be identical. Maybe you want to use a loose comparison == instead?
Loading history...
5813
      $needle = (string)self::chr($needle);
5814
    }
5815
5816
    // init
5817
    $haystack = (string)$haystack;
5818
    $needle = (string)$needle;
5819
    $offset = (int)$offset;
5820
5821
    if (!isset($haystack[0], $needle[0])) {
5822
      return false;
5823
    }
5824
5825 View Code Duplication
    if (
5826
        $cleanUtf8 === true
5827
        ||
5828
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5829
    ) {
5830
      // \mb_strripos && iconv_strripos is not tolerant to invalid characters
5831
5832
      $needle = self::clean($needle);
5833
      $haystack = self::clean($haystack);
5834
    }
5835
5836 View Code Duplication
    if (
5837
        $encoding === 'UTF-8'
5838
        ||
5839
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5840 10
    ) {
5841
      $encoding = 'UTF-8';
5842 10
    } else {
5843 2
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5844 2
    }
5845
5846
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5847 10
      self::checkForSupport();
5848 10
    }
5849 10
5850 View Code Duplication
    if (
5851 10
        $encoding !== 'UTF-8'
5852 2
        &&
5853
        self::$SUPPORT['mbstring'] === false
5854
    ) {
5855
      trigger_error('UTF8::strripos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5856
    }
5857 9
5858
    if (self::$SUPPORT['mbstring'] === true) {
5859 9
      return \mb_strripos($haystack, $needle, $offset, $encoding);
5860
    }
5861 3
5862 3
    if (
5863 3
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5864
        &&
5865
        self::$SUPPORT['intl'] === true
5866
        &&
5867 9
        Bootup::is_php('5.4') === true
5868 1
    ) {
5869 9
      return \grapheme_strripos($haystack, $needle, $offset);
5870 9
    }
5871 9
5872 1
    // fallback via vanilla php
5873
5874
    return self::strrpos(self::strtoupper($haystack), self::strtoupper($needle), $offset, $encoding, $cleanUtf8);
5875 9
  }
5876
5877
  /**
5878
   * Find position of last occurrence of a string in a string.
5879
   *
5880
   * @link http://php.net/manual/en/function.mb-strrpos.php
5881 9
   *
5882 1
   * @param string     $haystack  <p>The string being checked, for the last occurrence of needle</p>
5883 9
   * @param string|int $needle    <p>The string to find in haystack.<br>Or a code point as int.</p>
5884
   * @param int        $offset    [optional] <p>May be specified to begin searching an arbitrary number of characters
5885
   *                              into the string. Negative values will stop searching at an arbitrary point prior to
5886
   *                              the end of the string.
5887 9
   *                              </p>
5888 9
   * @param string     $encoding  [optional] <p>Set the charset.</p>
5889
   * @param boolean    $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
5890
   *
5891
   * @return int|false <p>The numeric position of the last occurrence of needle in the haystack string.<br>If needle
5892
   *                   is not found, it returns false.</p>
5893
   */
5894
  public static function strrpos($haystack, $needle, $offset = null, $encoding = 'UTF-8', $cleanUtf8 = false)
5895
  {
5896
    if ((int)$needle === $needle && $needle >= 0) {
5897
      $needle = (string)self::chr($needle);
5898
    }
5899
5900
    // init
5901
    $haystack = (string)$haystack;
5902
    $needle = (string)$needle;
5903
    $offset = (int)$offset;
5904
5905
    if (!isset($haystack[0], $needle[0])) {
5906
      return false;
5907
    }
5908
5909 View Code Duplication
    if (
5910
        $cleanUtf8 === true
5911
        ||
5912
        $encoding === true // INFO: the "bool"-check is only a fallback for old versions
5913
    ) {
5914
      // \mb_strrpos && iconv_strrpos is not tolerant to invalid characters
5915
      $needle = self::clean($needle);
5916
      $haystack = self::clean($haystack);
5917
    }
5918
5919 View Code Duplication
    if (
5920
        $encoding === 'UTF-8'
5921
        ||
5922
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
5923
    ) {
5924
      $encoding = 'UTF-8';
5925
    } else {
5926
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
5927
    }
5928
5929
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
5930
      self::checkForSupport();
5931
    }
5932
5933 View Code Duplication
    if (
5934
        $encoding !== 'UTF-8'
5935
        &&
5936
        self::$SUPPORT['mbstring'] === false
5937 10
    ) {
5938
      trigger_error('UTF8::strrpos() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
5939 10
    }
5940 2
5941 2
    if (self::$SUPPORT['mbstring'] === true) {
5942
      return \mb_strrpos($haystack, $needle, $offset, $encoding);
5943
    }
5944 2
5945 2
    if (
5946
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
5947 10
        &&
5948 10
        self::$SUPPORT['intl'] === true
5949 2
        &&
5950
        Bootup::is_php('5.4') === true
5951
    ) {
5952 8
      return \grapheme_strrpos($haystack, $needle, $offset);
5953
    }
5954
5955
    // fallback via vanilla php
5956
5957
    $haystackTmp = null;
5958
    if ($offset > 0) {
5959
      $haystackTmp = self::substr($haystack, $offset);
5960
    } elseif ($offset < 0) {
5961
      $haystackTmp = self::substr($haystack, 0, $offset);
5962
      $offset = 0;
5963
    }
5964
5965
    if ($haystackTmp !== null) {
5966
      if ($haystackTmp === false) {
5967
        $haystackTmp = '';
5968
      }
5969 2
      $haystack = (string)$haystackTmp;
5970
    }
5971 2
5972 2
    $pos = strrpos($haystack, $needle);
5973
    if ($pos === false) {
5974 2
      return false;
5975 1
    }
5976
5977
    return $offset + self::strlen(substr($haystack, 0, $pos));
5978 2
  }
5979
5980
  /**
5981
   * Finds the length of the initial segment of a string consisting entirely of characters contained within a given
5982
   * mask.
5983
   *
5984
   * @param string $str    <p>The input string.</p>
5985 2
   * @param string $mask   <p>The mask of chars</p>
5986 1
   * @param int    $offset [optional]
5987 1
   * @param int    $length [optional]
5988
   *
5989 2
   * @return int
5990
   */
5991
  public static function strspn($str, $mask, $offset = 0, $length = null)
5992
  {
5993 View Code Duplication
    if ($offset || $length !== null) {
5994
      $strTmp = self::substr($str, $offset, $length);
5995 2
      if ($strTmp === false) {
5996 1
        $strTmp = '';
5997 2
      }
5998
      $str = (string)$strTmp;
5999
    }
6000
6001 2
    $str = (string)$str;
6002 2
    if (!isset($str[0], $mask[0])) {
6003
      return 0;
6004
    }
6005
6006
    return preg_match('/^' . self::rxClass($mask) . '+/u', $str, $str) ? self::strlen($str[0]) : 0;
6007
  }
6008
6009
  /**
6010
   * Returns part of haystack string from the first occurrence of needle to the end of haystack.
6011
   *
6012
   * @param string  $haystack      <p>The input string. Must be valid UTF-8.</p>
6013
   * @param string  $needle        <p>The string to look for. Must be valid UTF-8.</p>
6014
   * @param bool    $before_needle [optional] <p>
6015
   *                               If <b>TRUE</b>, strstr() returns the part of the
6016
   *                               haystack before the first occurrence of the needle (excluding the needle).
6017
   *                               </p>
6018
   * @param string  $encoding      [optional] <p>Set the charset.</p>
6019
   * @param boolean $cleanUtf8     [optional] <p>Remove non UTF-8 chars from the string.</p>
6020
   *
6021
   * @return string|false A sub-string,<br>or <strong>false</strong> if needle is not found.
6022
   */
6023
  public static function strstr($haystack, $needle, $before_needle = false, $encoding = 'UTF-8', $cleanUtf8 = false)
6024
  {
6025
    $haystack = (string)$haystack;
6026
    $needle = (string)$needle;
6027
6028
    if (!isset($haystack[0], $needle[0])) {
6029
      return false;
6030
    }
6031
6032
    if ($cleanUtf8 === true) {
6033
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6034
      // if invalid characters are found in $haystack before $needle
6035
      $needle = self::clean($needle);
6036
      $haystack = self::clean($haystack);
6037
    }
6038
6039
    if ($encoding !== 'UTF-8') {
6040
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6041
    }
6042 13
6043
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6044
      self::checkForSupport();
6045 13
    }
6046
6047 13 View Code Duplication
    if (
6048 4
        $encoding !== 'UTF-8'
6049
        &&
6050
        self::$SUPPORT['mbstring'] === false
6051 12
    ) {
6052 12
      trigger_error('UTF8::strstr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6053
    }
6054 12
6055 1
    if (self::$SUPPORT['mbstring'] === true) {
6056 1
      return \mb_strstr($haystack, $needle, $before_needle, $encoding);
6057 1
    }
6058
6059 12
    if (
6060
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6061 12
        &&
6062
        self::$SUPPORT['intl'] === true
6063 12
        &&
6064
        Bootup::is_php('5.4') === true
6065 12
    ) {
6066 1
      return \grapheme_strstr($haystack, $needle, $before_needle);
6067 1
    }
6068
6069
    preg_match('/^(.*?)' . preg_quote($needle, '/') . '/us', $haystack, $match);
6070 12
6071 12
    if (!isset($match[1])) {
6072
      return false;
6073 12
    }
6074 1
6075 1
    if ($before_needle) {
6076
      return $match[1];
6077 12
    }
6078
6079
    return self::substr($haystack, self::strlen($match[1]));
6080
  }
6081
6082
  /**
6083
   * Unicode transformation for case-less matching.
6084
   *
6085
   * @link http://unicode.org/reports/tr21/tr21-5.html
6086
   *
6087
   * @param string  $str       <p>The input string.</p>
6088
   * @param bool    $full      [optional] <p>
6089
   *                           <b>true</b>, replace full case folding chars (default)<br>
6090
   *                           <b>false</b>, use only limited static array [UTF8::$commonCaseFold]
6091
   *                           </p>
6092 25
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6093
   *
6094
   * @return string
6095 25
   */
6096
  public static function strtocasefold($str, $full = true, $cleanUtf8 = false)
6097 25
  {
6098 3
    // init
6099
    $str = (string)$str;
6100
6101 23
    if (!isset($str[0])) {
6102
      return '';
6103
    }
6104 1
6105 1
    static $COMMON_CASE_FOLD_KEYS_CACHE = null;
6106
    static $COMMAN_CASE_FOLD_VALUES_CACHE = null;
6107 23
6108 2
    if ($COMMON_CASE_FOLD_KEYS_CACHE === null) {
6109 2
      $COMMON_CASE_FOLD_KEYS_CACHE = array_keys(self::$COMMON_CASE_FOLD);
6110
      $COMMAN_CASE_FOLD_VALUES_CACHE = array_values(self::$COMMON_CASE_FOLD);
6111 23
    }
6112
6113
    $str = (string)str_replace($COMMON_CASE_FOLD_KEYS_CACHE, $COMMAN_CASE_FOLD_VALUES_CACHE, $str);
6114
6115
    if ($full) {
6116
6117
      static $FULL_CASE_FOLD = null;
6118
6119
      if ($FULL_CASE_FOLD === null) {
6120
        $FULL_CASE_FOLD = self::getData('caseFolding_full');
6121
      }
6122
6123
      /** @noinspection OffsetOperationsInspection */
6124
      $str = (string)str_replace($FULL_CASE_FOLD[0], $FULL_CASE_FOLD[1], $str);
6125
    }
6126
6127
    if ($cleanUtf8 === true) {
6128
      $str = self::clean($str);
6129
    }
6130
6131
    return self::strtolower($str);
6132
  }
6133
6134
  /**
6135 23
   * Make a string lowercase.
6136
   *
6137
   * @link http://php.net/manual/en/function.mb-strtolower.php
6138
   *
6139
   * @param string      $str       <p>The string being lowercased.</p>
6140
   * @param string      $encoding  [optional] <p>Set the charset for e.g. "\mb_" function</p>
6141
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6142
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6143
   *
6144
   * @return string str with all alphabetic characters converted to lowercase.
6145 3
   */
6146 View Code Duplication
  public static function strtolower($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6147
  {
6148 3
    // init
6149
    $str = (string)$str;
6150
6151
    if (!isset($str[0])) {
6152
      return '';
6153
    }
6154
6155
    if ($cleanUtf8 === true) {
6156
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6157
      // if invalid characters are found in $haystack before $needle
6158
      $str = self::clean($str);
6159
    }
6160
6161
    if ($encoding !== 'UTF-8') {
6162
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6163 19
    }
6164
6165 19
    if ($lang !== null) {
6166
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6167 19
        self::checkForSupport();
6168 3
      }
6169
6170
      if (
6171 17
          self::$SUPPORT['intl'] === true
6172
          &&
6173
          Bootup::is_php('5.4') === true
6174 1
      ) {
6175 1
6176
        $langCode = $lang . '-Lower';
6177 17
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6178 2
          trigger_error('UTF8::strtolower() without intl for special language: ' . $lang, E_USER_WARNING);
6179 2
6180
          $langCode = 'Any-Lower';
6181 17
        }
6182
6183
        return transliterator_transliterate($langCode, $str);
6184
      }
6185
6186
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6187
    }
6188
6189
    return \mb_strtolower($str, $encoding);
6190
  }
6191
6192
  /**
6193
   * Generic case sensitive transformation for collation matching.
6194
   *
6195
   * @param string $str <p>The input string</p>
6196
   *
6197
   * @return string
6198
   */
6199
  private static function strtonatfold($str)
6200
  {
6201
    /** @noinspection PhpUndefinedClassInspection */
6202
    return preg_replace('/\p{Mn}+/u', '', \Normalizer::normalize($str, \Normalizer::NFD));
6203
  }
6204
6205 17
  /**
6206
   * Make a string uppercase.
6207
   *
6208
   * @link http://php.net/manual/en/function.mb-strtoupper.php
6209
   *
6210
   * @param string      $str       <p>The string being uppercased.</p>
6211
   * @param string      $encoding  [optional] <p>Set the charset.</p>
6212
   * @param boolean     $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6213
   * @param string|null $lang      [optional] <p>Set the language for special cases: az, el, lt, tr</p>
6214
   *
6215
   * @return string str with all alphabetic characters converted to uppercase.
6216
   */
6217 View Code Duplication
  public static function strtoupper($str, $encoding = 'UTF-8', $cleanUtf8 = false, $lang = null)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6218
  {
6219
    $str = (string)$str;
6220
6221
    if (!isset($str[0])) {
6222 1
      return '';
6223
    }
6224 1
6225
    if ($cleanUtf8 === true) {
6226 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6227
      // if invalid characters are found in $haystack before $needle
6228
      $str = self::clean($str);
6229
    }
6230 1
6231
    if ($encoding !== 'UTF-8') {
6232
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6233
    }
6234 1
6235 1
    if ($lang !== null) {
6236 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6237 1
        self::checkForSupport();
6238 1
      }
6239
6240 1
      if (
6241 1
          self::$SUPPORT['intl'] === true
6242 1
          &&
6243 1
          Bootup::is_php('5.4') === true
6244 1
      ) {
6245
6246 1
        $langCode = $lang . '-Upper';
6247 1
        if (!in_array($langCode, self::$SUPPORT['intl__transliterator_list_ids'], true)) {
6248
          trigger_error('UTF8::strtoupper() without intl for special language: ' . $lang, E_USER_WARNING);
6249 1
6250 1
          $langCode = 'Any-Upper';
6251
        }
6252
6253 1
        return transliterator_transliterate($langCode, $str);
6254
      }
6255
6256
      trigger_error('UTF8::strtolower() without intl + PHP >= 5.4 cannot handle the "lang"-parameter: ' . $lang, E_USER_WARNING);
6257
    }
6258
6259
    return \mb_strtoupper($str, $encoding);
6260
  }
6261
6262
  /**
6263
   * Translate characters or replace sub-strings.
6264
   *
6265 1
   * @link  http://php.net/manual/en/function.strtr.php
6266
   *
6267 1
   * @param string          $str  <p>The string being translated.</p>
6268 1
   * @param string|string[] $from <p>The string replacing from.</p>
6269 1
   * @param string|string[] $to   <p>The string being translated to to.</p>
6270
   *
6271 1
   * @return string <p>
6272
   *                This function returns a copy of str, translating all occurrences of each character in from to the
6273
   *                corresponding character in to.
6274 1
   *                </p>
6275 1
   */
6276
  public static function strtr($str, $from, $to = INF)
6277
  {
6278 1
    $str = (string)$str;
6279
6280
    if (!isset($str[0])) {
6281
      return '';
6282
    }
6283
6284
    if ($from === $to) {
6285
      return $str;
6286
    }
6287
6288
    if (INF !== $to) {
6289
      $from = self::str_split($from);
0 ignored issues
show
Bug introduced by
It seems like $from defined by self::str_split($from) on line 6289 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6290
      $to = self::str_split($to);
0 ignored issues
show
Bug introduced by
It seems like $to defined by self::str_split($to) on line 6290 can also be of type array<integer,string>; however, voku\helper\UTF8::str_split() does only seem to accept string, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
6291 1
      $countFrom = count($from);
6292
      $countTo = count($to);
6293 1
6294
      if ($countFrom > $countTo) {
6295
        $from = array_slice($from, 0, $countTo);
6296
      } elseif ($countFrom < $countTo) {
6297
        $to = array_slice($to, 0, $countFrom);
6298
      }
6299 1
6300
      $from = array_combine($from, $to);
6301 1
    }
6302
6303
    if (is_string($from)) {
6304
      return str_replace($from, '', $str);
6305 1
    }
6306 1
6307 1
    return strtr($str, $from);
6308 1
  }
6309 1
6310 1
  /**
6311
   * Return the width of a string.
6312
   *
6313 1
   * @param string  $str       <p>The input string.</p>
6314 1
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6315
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6316 1
   *
6317
   * @return int
6318
   */
6319
  public static function strwidth($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6320
  {
6321
    if ($encoding !== 'UTF-8') {
6322
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6323
    }
6324
6325
    if ($cleanUtf8 === true) {
6326
      // iconv and mbstring are not tolerant to invalid encoding
6327
      // further, their behaviour is inconsistent with that of PHP's substr
6328
      $str = self::clean($str);
6329
    }
6330
6331
    // fallback to "mb_"-function via polyfill
6332
    return \mb_strwidth($str, $encoding);
6333
  }
6334 76
6335
  /**
6336
   * Changes all keys in an array.
6337 76
   *
6338
   * @param array $array <p>The array to work on</p>
6339 76
   * @param int   $case  [optional] <p> Either <strong>CASE_UPPER</strong><br>
6340 10
   *                  or <strong>CASE_LOWER</strong> (default)</p>
6341
   *
6342
   * @return array|false <p>An array with its keys lower or uppercased, or false if
6343
   *                     input is not an array.</p>
6344 74
   */
6345 3
  public static function array_change_key_case($array, $case = CASE_LOWER)
6346
  {
6347
    if (!is_array($array)) {
6348 73
      return false;
6349
    }
6350
6351 1
    if (
6352 1
        $case !== CASE_LOWER
6353
        &&
6354
        $case !== CASE_UPPER
6355 73
    ) {
6356 1
      $case = CASE_UPPER;
6357
    }
6358
6359 72
    $return = array();
6360 72
    foreach ($array as $key => $value) {
6361 49
      if ($case  === CASE_LOWER) {
6362 49
        $key = self::strtolower($key);
6363
      } else {
6364
        $key = self::strtoupper($key);
6365 72
      }
6366 2
6367
      $return[$key] = $value;
6368
    }
6369 70
6370 34
    return $return;
6371 34
  }
6372 59
6373
  /**
6374
   * Get part of a string.
6375
   *
6376
   * @link http://php.net/manual/en/function.mb-substr.php
6377 70
   *
6378 23
   * @param string  $str       <p>The string being checked.</p>
6379 70
   * @param int     $offset    <p>The first position used in str.</p>
6380 49
   * @param int     $length    [optional] <p>The maximum length of the returned string.</p>
6381 49
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6382 22
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6383
   *
6384
   * @return string|false <p>The portion of <i>str</i> specified by the <i>offset</i> and
6385 70
   *                      <i>length</i> parameters.</p><p>If <i>str</i> is shorter than <i>offset</i>
6386
   *                      characters long, <b>FALSE</b> will be returned.</p>
6387
   */
6388
  public static function substr($str, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6389
  {
6390
    // init
6391 70
    $str = (string)$str;
6392 21
6393 70
    if (!isset($str[0])) {
6394 21
      return '';
6395
    }
6396
6397
    // Empty string
6398
    if ($length === 0) {
6399 49
      return '';
6400 1
    }
6401 49
6402
    if ($cleanUtf8 === true) {
6403
      // iconv and mbstring are not tolerant to invalid encoding
6404
      // further, their behaviour is inconsistent with that of PHP's substr
6405 49
      $str = self::clean($str);
6406 49
    }
6407
6408
    // Whole string
6409
    if (!$offset && $length === null) {
6410
      return $str;
6411
    }
6412
6413
    $str_length = 0;
6414
    if ($offset || $length === null) {
6415
      $str_length = (int)self::strlen($str, $encoding);
6416
    }
6417
6418
    // Impossible
6419
    if ($offset && $offset > $str_length) {
6420
      return false;
6421
    }
6422
6423
    if ($length === null) {
6424
      $length = $str_length;
6425
    } else {
6426
      $length = (int)$length;
6427
    }
6428
6429 View Code Duplication
    if (
6430
        $encoding === 'UTF-8'
6431
        ||
6432
        $encoding === true || $encoding === false // INFO: the "bool"-check is only a fallback for old versions
6433
    ) {
6434
      $encoding = 'UTF-8';
6435
    } else {
6436
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6437
    }
6438
6439
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6440
      self::checkForSupport();
6441
    }
6442
6443
    if (
6444
        $encoding === 'CP850'
6445
        &&
6446
        self::$SUPPORT['mbstring_func_overload'] === false
6447
    ) {
6448
      return substr($str, $offset, $length === null ? $str_length : $length);
6449
    }
6450
6451 View Code Duplication
    if (
6452
        $encoding !== 'UTF-8'
6453
        &&
6454
        self::$SUPPORT['mbstring'] === false
6455
    ) {
6456
      trigger_error('UTF8::substr() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6457
    }
6458
6459
    if (self::$SUPPORT['mbstring'] === true) {
6460 1
      return \mb_substr($str, $offset, $length, $encoding);
6461
    }
6462
6463
    if (
6464 1
        $encoding === 'UTF-8' // INFO: "grapheme_stripos()" can't handle other encodings
6465
        &&
6466 1
        self::$SUPPORT['intl'] === true
6467 1
        &&
6468 1
        Bootup::is_php('5.4') === true
6469
    ) {
6470
      return \grapheme_substr($str, $offset, $length);
6471 1
    }
6472
6473 1
    if (
6474 1
        $length >= 0 // "iconv_substr()" can't handle negative length
6475
        &&
6476
        self::$SUPPORT['iconv'] === true
6477 1
    ) {
6478 1
      return \iconv_substr($str, $offset, $length);
6479
    }
6480 1
6481 1
    if (self::is_ascii($str)) {
6482
      return ($length === null) ?
6483
          substr($str, $offset) :
6484 1
          substr($str, $offset, $length);
6485
    }
6486
6487
    // fallback via vanilla php
6488
6489
    // split to array, and remove invalid characters
6490
    $array = self::split($str);
6491
6492
    // extract relevant part, and join to make sting again
6493
    return implode('', array_slice($array, $offset, $length));
6494
  }
6495
6496
  /**
6497
   * Binary safe comparison of two strings from an offset, up to length characters.
6498
   *
6499
   * @param string  $str1               <p>The main string being compared.</p>
6500
   * @param string  $str2               <p>The secondary string being compared.</p>
6501
   * @param int     $offset             [optional] <p>The start position for the comparison. If negative, it starts
6502
   *                                    counting from the end of the string.</p>
6503
   * @param int     $length             [optional] <p>The length of the comparison. The default value is the largest of
6504
   *                                    the length of the str compared to the length of main_str less the offset.</p>
6505 1
   * @param boolean $case_insensitivity [optional] <p>If case_insensitivity is TRUE, comparison is case
6506
   *                                    insensitive.</p>
6507
   *
6508 1
   * @return int <p>
6509 1
   *             <strong>&lt; 0</strong> if str1 is less than str2;<br>
6510
   *             <strong>&gt; 0</strong> if str1 is greater than str2,<br>
6511 1
   *             <strong>0</strong> if they are equal.
6512 1
   *             </p>
6513
   */
6514
  public static function substr_compare($str1, $str2, $offset = 0, $length = null, $case_insensitivity = false)
6515 1
  {
6516
    if (
6517 1
        $offset !== 0
6518 1
        ||
6519 1
        $length !== null
6520
    ) {
6521 1
      $str1Tmp = self::substr($str1, $offset, $length);
6522 1
      if ($str1Tmp === false) {
6523
        $str1Tmp = '';
6524
      }
6525
      $str1 = (string)$str1Tmp;
6526
6527 1
      $str2Tmp = self::substr($str2, 0, self::strlen($str1));
6528
      if ($str2Tmp === false) {
6529 1
        $str2Tmp = '';
6530 1
      }
6531 1
      $str2 = (string)$str2Tmp;
6532 1
    }
6533 1
6534 1
    if ($case_insensitivity === true) {
6535 1
      return self::strcasecmp($str1, $str2);
6536
    }
6537
6538 1
    return self::strcmp($str1, $str2);
6539 1
  }
6540
6541
  /**
6542 1
   * Count the number of substring occurrences.
6543 1
   *
6544
   * @link  http://php.net/manual/en/function.substr-count.php
6545 1
   *
6546 1
   * @param string  $haystack  <p>The string to search in.</p>
6547 1
   * @param string  $needle    <p>The substring to search for.</p>
6548
   * @param int     $offset    [optional] <p>The offset where to start counting.</p>
6549 1
   * @param int     $length    [optional] <p>
6550
   *                           The maximum length after the specified offset to search for the
6551
   *                           substring. It outputs a warning if the offset plus the length is
6552
   *                           greater than the haystack length.
6553
   *                           </p>
6554
   * @param string  $encoding  <p>Set the charset.</p>
6555
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6556 1
   *
6557
   * @return int|false <p>This functions returns an integer or false if there isn't a string.</p>
6558
   */
6559
  public static function substr_count($haystack, $needle, $offset = 0, $length = null, $encoding = 'UTF-8', $cleanUtf8 = false)
6560
  {
6561
    // init
6562 1
    $haystack = (string)$haystack;
6563 1
    $needle = (string)$needle;
6564 1
6565
    if (!isset($haystack[0], $needle[0])) {
6566
      return false;
6567
    }
6568 1
6569 1
    if ($offset || $length !== null) {
6570
6571
      if ($length === null) {
6572
        $length = (int)self::strlen($haystack);
6573
      }
6574
6575
      $offset = (int)$offset;
6576
      $length = (int)$length;
6577
6578
      if (
6579
          (
6580
              $length !== 0
6581
              &&
6582
              $offset !== 0
6583
          )
6584
          &&
6585 1
          $length + $offset <= 0
6586
          &&
6587
          Bootup::is_php('7.1') === false // output from "substr_count()" have changed in PHP 7.1
6588 1
      ) {
6589 1
        return false;
6590
      }
6591 1
6592 1
      $haystackTmp = self::substr($haystack, $offset, $length, $encoding);
6593
      if ($haystackTmp === false) {
6594
        $haystackTmp = '';
6595 1
      }
6596 1
      $haystack = (string)$haystackTmp;
6597
    }
6598
6599 1
    if ($encoding !== 'UTF-8') {
6600 1
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6601 1
    }
6602
6603
    if ($cleanUtf8 === true) {
6604 1
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6605 1
      // if invalid characters are found in $haystack before $needle
6606
      $needle = self::clean($needle);
6607 1
      $haystack = self::clean($haystack);
6608
    }
6609
6610
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
6611
      self::checkForSupport();
6612
    }
6613
6614 View Code Duplication
    if (
6615
        $encoding !== 'UTF-8'
6616
        &&
6617
        self::$SUPPORT['mbstring'] === false
6618 1
    ) {
6619
      trigger_error('UTF8::substr_count() without mbstring cannot handle "' . $encoding . '" encoding', E_USER_WARNING);
6620
    }
6621 1
6622 1
    if (self::$SUPPORT['mbstring'] === true) {
6623
      return \mb_substr_count($haystack, $needle, $encoding);
6624 1
    }
6625 1
6626
    preg_match_all('/' . preg_quote($needle, '/') . '/us', $haystack, $matches, PREG_SET_ORDER);
6627
6628 1
    return count($matches);
6629 1
  }
6630
6631
  /**
6632 1
   * Removes an prefix ($needle) from start of the string ($haystack), case insensitive.
6633 1
   *
6634 1
   * @param string $haystack <p>The string to search in.</p>
6635
   * @param string $needle   <p>The substring to search for.</p>
6636
   *
6637 1
   * @return string <p>Return the sub-string.</p>
6638 1
   */
6639 View Code Duplication
  public static function substr_ileft($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6640 1
  {
6641
    // init
6642
    $haystack = (string)$haystack;
6643
    $needle = (string)$needle;
6644
6645
    if (!isset($haystack[0])) {
6646
      return '';
6647
    }
6648
6649
    if (!isset($needle[0])) {
6650
      return $haystack;
6651 1
    }
6652
6653
    if (self::str_istarts_with($haystack, $needle) === true) {
6654 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6655 1
      if ($haystackTmp === false) {
6656
        $haystackTmp = '';
6657 1
      }
6658 1
      $haystack = (string)$haystackTmp;
6659
    }
6660
6661 1
    return $haystack;
6662 1
  }
6663
6664
  /**
6665 1
   * Removes an suffix ($needle) from end of the string ($haystack), case insensitive.
6666 1
   *
6667 1
   * @param string $haystack <p>The string to search in.</p>
6668
   * @param string $needle   <p>The substring to search for.</p>
6669
   *
6670 1
   * @return string <p>Return the sub-string.</p>
6671 1
   */
6672 View Code Duplication
  public static function substr_iright($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6673 1
  {
6674
    // init
6675
    $haystack = (string)$haystack;
6676
    $needle = (string)$needle;
6677
6678
    if (!isset($haystack[0])) {
6679
      return '';
6680
    }
6681
6682
    if (!isset($needle[0])) {
6683
      return $haystack;
6684
    }
6685
6686
    if (self::str_iends_with($haystack, $needle) === true) {
6687
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6688
      if ($haystackTmp === false) {
6689
        $haystackTmp = '';
6690
      }
6691
      $haystack = (string)$haystackTmp;
6692
    }
6693
6694
    return $haystack;
6695
  }
6696
6697
  /**
6698
   * Removes an prefix ($needle) from start of the string ($haystack).
6699
   *
6700 7
   * @param string $haystack <p>The string to search in.</p>
6701
   * @param string $needle   <p>The substring to search for.</p>
6702 7
   *
6703 1
   * @return string <p>Return the sub-string.</p>
6704
   */
6705 View Code Duplication
  public static function substr_left($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6706 1
  {
6707 1
    // init
6708 1
    $haystack = (string)$haystack;
6709 1
    $needle = (string)$needle;
6710
6711
    if (!isset($haystack[0])) {
6712
      return '';
6713 1
    }
6714 1
6715 1
    if (!isset($needle[0])) {
6716 1
      return $haystack;
6717 1
    }
6718 1
6719 1
    if (self::str_starts_with($haystack, $needle) === true) {
6720 1
      $haystackTmp = self::substr($haystack, self::strlen($needle));
6721
      if ($haystackTmp === false) {
6722
        $haystackTmp = '';
6723
      }
6724 1
      $haystack = (string)$haystackTmp;
6725 1
    }
6726 1
6727 1
    return $haystack;
6728 1
  }
6729 1
6730 1
  /**
6731 1
   * Replace text within a portion of a string.
6732
   *
6733
   * source: https://gist.github.com/stemar/8287074
6734 1
   *
6735 1
   * @param string|string[] $str              <p>The input string or an array of stings.</p>
6736 1
   * @param string|string[] $replacement      <p>The replacement string or an array of stings.</p>
6737 1
   * @param int|int[]       $offset           <p>
6738
   *                                          If start is positive, the replacing will begin at the start'th offset
6739
   *                                          into string.
6740
   *                                          <br><br>
6741 1
   *                                          If start is negative, the replacing will begin at the start'th character
6742
   *                                          from the end of string.
6743
   *                                          </p>
6744 7
   * @param int|int[]|void  $length           [optional] <p>If given and is positive, it represents the length of the
6745 1
   *                                          portion of string which is to be replaced. If it is negative, it
6746 1
   *                                          represents the number of characters from the end of string at which to
6747 1
   *                                          stop replacing. If it is not given, then it will default to strlen(
6748 1
   *                                          string ); i.e. end the replacing at the end of string. Of course, if
6749
   *                                          length is zero then this function will have the effect of inserting
6750 1
   *                                          replacement into string at the given start offset.</p>
6751
   *
6752
   * @return string|string[] <p>The result string is returned. If string is an array then array is returned.</p>
6753 7
   */
6754 7
  public static function substr_replace($str, $replacement, $offset, $length = null)
6755
  {
6756 7
    if (is_array($str) === true) {
6757 1
      $num = count($str);
6758
6759
      // the replacement
6760 6
      if (is_array($replacement) === true) {
6761 3
        $replacement = array_slice($replacement, 0, $num);
6762 3
      } else {
6763 3
        $replacement = array_pad(array($replacement), $num, $replacement);
6764
      }
6765
6766 5
      // the offset
6767 5 View Code Duplication
      if (is_array($offset) === true) {
6768
        $offset = array_slice($offset, 0, $num);
6769 5
        foreach ($offset as &$valueTmp) {
6770 3
          $valueTmp = (int)$valueTmp === $valueTmp ? $valueTmp : 0;
6771 3
        }
6772
        unset($valueTmp);
6773 5
      } else {
6774
        $offset = array_pad(array($offset), $num, $offset);
6775 5
      }
6776
6777
      // the length
6778
      if (!isset($length)) {
6779
        $length = array_fill(0, $num, 0);
6780 View Code Duplication
      } elseif (is_array($length) === true) {
6781
        $length = array_slice($length, 0, $num);
6782
        foreach ($length as &$valueTmpV2) {
6783
          if (isset($valueTmpV2)) {
6784
            $valueTmpV2 = (int)$valueTmpV2 === $valueTmpV2 ? $valueTmpV2 : $num;
6785
          } else {
6786 1
            $valueTmpV2 = 0;
6787
          }
6788 1
        }
6789 1
        unset($valueTmpV2);
6790
      } else {
6791 1
        $length = array_pad(array($length), $num, $length);
6792 1
      }
6793
6794
      // recursive call
6795 1
      return array_map(array('\\voku\\helper\\UTF8', 'substr_replace'), $str, $replacement, $offset, $length);
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array_map(array('...ent, $offset, $length); (array) is incompatible with the return type documented by voku\helper\UTF8::substr_replace of type string|string[].

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
6796 1
    }
6797
6798
    if (is_array($replacement) === true) {
6799 1
      if (count($replacement) > 0) {
6800 1
        $replacement = $replacement[0];
6801 1
      } else {
6802
        $replacement = '';
6803
      }
6804 1
    }
6805 1
6806
    // init
6807 1
    $str = (string)$str;
6808
    $replacement = (string)$replacement;
6809
6810
    if (!isset($str[0])) {
6811
      return $replacement;
6812
    }
6813
6814
    if (self::is_ascii($str)) {
6815
      return ($length === null) ?
6816
          substr_replace($str, $replacement, $offset) :
6817
          substr_replace($str, $replacement, $offset, $length);
6818
    }
6819 1
6820
    preg_match_all('/./us', $str, $smatches);
6821 1
    preg_match_all('/./us', $replacement, $rmatches);
6822
6823 1
    if ($length === null) {
6824 1
      $length = (int)self::strlen($str);
6825
    }
6826
6827 1
    array_splice($smatches[0], $offset, $length, $rmatches[0]);
6828 1
6829 1
    return implode('', $smatches[0]);
6830
  }
6831 1
6832
  /**
6833
   * Removes an suffix ($needle) from end of the string ($haystack).
6834 1
   *
6835 1
   * @param string $haystack <p>The string to search in.</p>
6836
   * @param string $needle   <p>The substring to search for.</p>
6837 1
   *
6838 1
   * @return string <p>Return the sub-string.</p>
6839
   */
6840 1 View Code Duplication
  public static function substr_right($haystack, $needle)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
6841
  {
6842 1
    $haystack = (string)$haystack;
6843 1
    $needle = (string)$needle;
6844
6845
    if (!isset($haystack[0])) {
6846 1
      return '';
6847 1
    }
6848
6849 1
    if (!isset($needle[0])) {
6850
      return $haystack;
6851 1
    }
6852
6853
    if (self::str_ends_with($haystack, $needle) === true) {
6854
      $haystackTmp = self::substr($haystack, 0, self::strlen($haystack) - self::strlen($needle));
6855
      if ($haystackTmp === false) {
6856
        $haystackTmp = '';
6857
      }
6858
      $haystack = (string)$haystackTmp;
6859
    }
6860
6861
    return $haystack;
6862
  }
6863
6864
  /**
6865
   * Returns a case swapped version of the string.
6866
   *
6867
   * @param string  $str       <p>The input string.</p>
6868
   * @param string  $encoding  [optional] <p>Default is UTF-8</p>
6869
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
6870
   *
6871
   * @return string <p>Each character's case swapped.</p>
6872
   */
6873
  public static function swapCase($str, $encoding = 'UTF-8', $cleanUtf8 = false)
6874
  {
6875
    $str = (string)$str;
6876
6877
    if (!isset($str[0])) {
6878
      return '';
6879
    }
6880
6881
    if ($encoding !== 'UTF-8') {
6882
      $encoding = self::normalize_encoding($encoding, 'UTF-8');
6883
    }
6884
6885
    if ($cleanUtf8 === true) {
6886
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
6887
      // if invalid characters are found in $haystack before $needle
6888
      $str = self::clean($str);
6889
    }
6890
6891
    $strSwappedCase = preg_replace_callback(
6892
        '/[\S]/u',
6893
        function ($match) use ($encoding) {
6894
          $marchToUpper = self::strtoupper($match[0], $encoding);
6895
6896
          if ($match[0] === $marchToUpper) {
6897
            return self::strtolower($match[0], $encoding);
6898
          }
6899
6900
          return $marchToUpper;
6901
        },
6902
        $str
6903
    );
6904
6905
    return $strSwappedCase;
6906
  }
6907
6908
  /**
6909
   * alias for "UTF8::to_ascii()"
6910
   *
6911
   * @see UTF8::to_ascii()
6912
   *
6913
   * @param string $s
6914
   * @param string $subst_chr
6915
   * @param bool   $strict
6916
   *
6917
   * @return string
6918
   *
6919
   * @deprecated <p>use "UTF8::to_ascii()"</p>
6920
   */
6921
  public static function toAscii($s, $subst_chr = '?', $strict = false)
6922
  {
6923
    return self::to_ascii($s, $subst_chr, $strict);
6924
  }
6925
6926
  /**
6927
   * alias for "UTF8::to_iso8859()"
6928
   *
6929
   * @see UTF8::to_iso8859()
6930 21
   *
6931
   * @param string $str
6932 21
   *
6933
   * @return string|string[]
6934
   *
6935 21
   * @deprecated <p>use "UTF8::to_iso8859()"</p>
6936
   */
6937 21
  public static function toIso8859($str)
6938 4
  {
6939
    return self::to_iso8859($str);
6940
  }
6941
6942 18
  /**
6943 6
   * alias for "UTF8::to_latin1()"
6944
   *
6945
   * @see UTF8::to_latin1()
6946 13
   *
6947
   * @param $str
6948
   *
6949 13
   * @return string
6950 7
   *
6951
   * @deprecated <p>use "UTF8::to_latin1()"</p>
6952
   */
6953 7
  public static function toLatin1($str)
6954
  {
6955
    return self::to_latin1($str);
6956
  }
6957
6958
  /**
6959
   * alias for "UTF8::to_utf8()"
6960
   *
6961
   * @see UTF8::to_utf8()
6962
   *
6963
   * @param string $str
6964
   *
6965
   * @return string
6966
   *
6967
   * @deprecated <p>use "UTF8::to_utf8()"</p>
6968
   */
6969
  public static function toUTF8($str)
6970
  {
6971
    return self::to_utf8($str);
6972
  }
6973
6974
  /**
6975
   * Convert a string into ASCII.
6976
   *
6977
   * @param string $str     <p>The input string.</p>
6978
   * @param string $unknown [optional] <p>Character use if character unknown. (default is ?)</p>
6979
   * @param bool   $strict  [optional] <p>Use "transliterator_transliterate()" from PHP-Intl | WARNING: bad
6980
   *                        performance</p>
6981 7
   *
6982 7
   * @return string
6983 7
   */
6984
  public static function to_ascii($str, $unknown = '?', $strict = false)
6985 7
  {
6986
    static $UTF8_TO_ASCII;
6987 7
6988 7
    // init
6989
    $str = (string)$str;
6990
6991 7
    if (!isset($str[0])) {
6992
      return '';
6993
    }
6994 7
6995 7
    // check if we only have ASCII, first (better performance)
6996 7
    if (self::is_ascii($str) === true) {
6997
      return $str;
6998 7
    }
6999 2
7000
    $str = self::clean($str, true, true, true);
7001 2
7002 2
    // check again, if we only have ASCII, now ...
7003 2
    if (self::is_ascii($str) === true) {
7004
      return $str;
7005 2
    }
7006 1
7007
    if ($strict === true) {
7008 1
      if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7009 1
        self::checkForSupport();
7010 1
      }
7011
7012 1
      if (
7013
          self::$SUPPORT['intl'] === true
7014
          &&
7015
          Bootup::is_php('5.4') === true
7016
      ) {
7017
7018
        // HACK for issue from "transliterator_transliterate()"
7019
        $str = str_replace(
7020
            'ℌ',
7021
            'H',
7022
            $str
7023
        );
7024
7025
        $str = transliterator_transliterate('NFD; [:Nonspacing Mark:] Remove; NFC; Any-Latin; Latin-ASCII;', $str);
7026
7027 1
        // check again, if we only have ASCII, now ...
7028 2
        if (self::is_ascii($str) === true) {
7029
          return $str;
7030 7
        }
7031
7032
      }
7033
    }
7034
7035 7
    preg_match_all('/.{1}|[^\x00]{1,1}$/us', $str, $ar);
7036
    $chars = $ar[0];
7037
    foreach ($chars as &$c) {
7038
7039
      $ordC0 = ord($c[0]);
7040 7
7041 7
      if ($ordC0 >= 0 && $ordC0 <= 127) {
7042 3
        continue;
7043 3
      }
7044 1
7045 1
      $ordC1 = ord($c[1]);
7046 3
7047
      // ASCII - next please
7048 7
      if ($ordC0 >= 192 && $ordC0 <= 223) {
7049
        $ord = ($ordC0 - 192) * 64 + ($ordC1 - 128);
7050 7
      }
7051
7052
      if ($ordC0 >= 224) {
7053
        $ordC2 = ord($c[2]);
7054
7055
        if ($ordC0 <= 239) {
7056
          $ord = ($ordC0 - 224) * 4096 + ($ordC1 - 128) * 64 + ($ordC2 - 128);
7057
        }
7058
7059
        if ($ordC0 >= 240) {
7060
          $ordC3 = ord($c[3]);
7061
7062 7
          if ($ordC0 <= 247) {
7063 7
            $ord = ($ordC0 - 240) * 262144 + ($ordC1 - 128) * 4096 + ($ordC2 - 128) * 64 + ($ordC3 - 128);
7064
          }
7065
7066
          if ($ordC0 >= 248) {
7067
            $ordC4 = ord($c[4]);
7068
7069 View Code Duplication
            if ($ordC0 <= 251) {
7070
              $ord = ($ordC0 - 248) * 16777216 + ($ordC1 - 128) * 262144 + ($ordC2 - 128) * 4096 + ($ordC3 - 128) * 64 + ($ordC4 - 128);
7071
            }
7072
7073
            if ($ordC0 >= 252) {
7074 1
              $ordC5 = ord($c[5]);
7075
7076 7 View Code Duplication
              if ($ordC0 <= 253) {
7077
                $ord = ($ordC0 - 252) * 1073741824 + ($ordC1 - 128) * 16777216 + ($ordC2 - 128) * 262144 + ($ordC3 - 128) * 4096 + ($ordC4 - 128) * 64 + ($ordC5 - 128);
7078 7
              }
7079
            }
7080
          }
7081
        }
7082
      }
7083
7084
      if ($ordC0 === 254 || $ordC0 === 255) {
7085
        $c = $unknown;
7086
        continue;
7087
      }
7088 3
7089
      if (!isset($ord)) {
7090 3
        $c = $unknown;
7091
        continue;
7092
      }
7093 1
7094
      $bank = $ord >> 8;
7095
      if (!isset($UTF8_TO_ASCII[$bank])) {
7096 1
        $UTF8_TO_ASCII[$bank] = self::getData(sprintf('x%02x', $bank));
7097 1
        if ($UTF8_TO_ASCII[$bank] === false) {
7098
          $UTF8_TO_ASCII[$bank] = array();
7099 1
        }
7100
      }
7101
7102 3
      $newchar = $ord & 255;
7103
7104 3
      if (isset($UTF8_TO_ASCII[$bank], $UTF8_TO_ASCII[$bank][$newchar])) {
7105 1
7106
        // keep for debugging
7107
        /*
7108 3
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7109
        echo "char: " . $c . "\n";
7110
        echo "ord: " . $ord . "\n";
7111
        echo "newchar: " . $newchar . "\n";
7112
        echo "ascii: " . $UTF8_TO_ASCII[$bank][$newchar] . "\n";
7113
        echo "bank:" . $bank . "\n\n";
7114
        */
7115
7116
        $c = $UTF8_TO_ASCII[$bank][$newchar];
7117
      } else {
7118
7119
        // keep for debugging missing chars
7120 1
        /*
7121
        echo "file: " . sprintf('x%02x', $bank) . "\n";
7122 1
        echo "char: " . $c . "\n";
7123
        echo "ord: " . $ord . "\n";
7124
        echo "newchar: " . $newchar . "\n";
7125
        echo "bank:" . $bank . "\n\n";
7126
        */
7127
7128
        $c = $unknown;
7129
      }
7130
    }
7131
7132
    return implode('', $chars);
7133
  }
7134
7135
  /**
7136
   * Convert a string into "ISO-8859"-encoding (Latin-1).
7137
   *
7138
   * @param string|string[] $str
7139
   *
7140 22
   * @return string|string[]
7141
   */
7142 22
  public static function to_iso8859($str)
7143
  {
7144 2
    if (is_array($str) === true) {
7145
7146
      /** @noinspection ForeachSourceInspection */
7147 2
      foreach ($str as $k => $v) {
7148 2
        /** @noinspection AlterInForeachInspection */
7149
        /** @noinspection OffsetOperationsInspection */
7150 2
        $str[$k] = self::to_iso8859($v);
7151
      }
7152
7153 22
      return $str;
7154
    }
7155 22
7156 3
    $str = (string)$str;
7157
7158
    if (!isset($str[0])) {
7159 22
      return '';
7160
    }
7161
7162
    return self::utf8_decode($str);
7163 22
  }
7164
7165
  /**
7166 22
   * alias for "UTF8::to_iso8859()"
7167
   *
7168
   * @see UTF8::to_iso8859()
7169 22
   *
7170
   * @param string|string[] $str
7171
   *
7172 22
   * @return string|string[]
7173 22
   */
7174
  public static function to_latin1($str)
7175 22
  {
7176
    return self::to_iso8859($str);
7177 22
  }
7178
7179 20
  /**
7180
   * This function leaves UTF-8 characters alone, while converting almost all non-UTF8 to UTF8.
7181 20
   *
7182 18
   * <ul>
7183 18
   * <li>It decode UTF-8 codepoints and unicode escape sequences.</li>
7184 18
   * <li>It assumes that the encoding of the original string is either WINDOWS-1252 or ISO-8859.</li>
7185 8
   * <li>WARNING: It does not remove invalid UTF-8 characters, so you maybe need to use "UTF8::clean()" for this
7186
   * case.</li>
7187
   * </ul>
7188 22
   *
7189
   * @param string|string[] $str                    <p>Any string or array.</p>
7190 21
   * @param bool            $decodeHtmlEntityToUtf8 <p>Set to true, if you need to decode html-entities.</p>
7191 21
   *
7192
   * @return string|string[] <p>The UTF-8 encoded string.</p>
7193 21
   */
7194 15
  public static function to_utf8($str, $decodeHtmlEntityToUtf8 = false)
7195 15
  {
7196 15
    if (is_array($str) === true) {
7197 11
      /** @noinspection ForeachSourceInspection */
7198
      foreach ($str as $k => $v) {
7199
        /** @noinspection AlterInForeachInspection */
7200 22
        /** @noinspection OffsetOperationsInspection */
7201
        $str[$k] = self::to_utf8($v, $decodeHtmlEntityToUtf8);
7202 12
      }
7203 12
7204 12
      return $str;
7205
    }
7206 12
7207 5
    $str = (string)$str;
7208 5
7209 5
    if (!isset($str[0])) {
7210 9
      return $str;
7211
    }
7212
7213 12
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7214 9
      self::checkForSupport();
7215
    }
7216
7217 22 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7218
      $max = \mb_strlen($str, '8BIT');
7219 5
    } else {
7220
      $max = strlen($str);
7221 5
    }
7222 20
7223
    $buf = '';
7224 22
7225
    /** @noinspection ForeachInvariantsInspection */
7226
    for ($i = 0; $i < $max; $i++) {
7227 22
      $c1 = $str[$i];
7228 22
7229 22
      if ($c1 >= "\xC0") { // should be converted to UTF8, if it's not UTF8 already
7230 4
7231 22
        if ($c1 <= "\xDF") { // looks like 2 bytes UTF8
7232
7233 22
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7234
7235
          if ($c2 >= "\x80" && $c2 <= "\xBF") { // yeah, almost sure it's UTF8 already
7236 22
            $buf .= $c1 . $c2;
7237 1
            $i++;
7238 1
          } else { // not valid UTF8 - convert it
7239
            $buf .= self::to_utf8_convert($c1);
7240 22
          }
7241
7242
        } elseif ($c1 >= "\xE0" && $c1 <= "\xEF") { // looks like 3 bytes UTF8
7243
7244
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7245
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7246
7247
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF") { // yeah, almost sure it's UTF8 already
7248 16
            $buf .= $c1 . $c2 . $c3;
7249
            $i += 2;
7250 16
          } else { // not valid UTF8 - convert it
7251
            $buf .= self::to_utf8_convert($c1);
7252 16
          }
7253 16
7254 2
        } elseif ($c1 >= "\xF0" && $c1 <= "\xF7") { // looks like 4 bytes UTF8
7255 2
7256 16
          $c2 = $i + 1 >= $max ? "\x00" : $str[$i + 1];
7257 16
          $c3 = $i + 2 >= $max ? "\x00" : $str[$i + 2];
7258 16
          $c4 = $i + 3 >= $max ? "\x00" : $str[$i + 3];
7259
7260
          if ($c2 >= "\x80" && $c2 <= "\xBF" && $c3 >= "\x80" && $c3 <= "\xBF" && $c4 >= "\x80" && $c4 <= "\xBF") { // yeah, almost sure it's UTF8 already
7261 16
            $buf .= $c1 . $c2 . $c3 . $c4;
7262
            $i += 3;
7263
          } else { // not valid UTF8 - convert it
7264
            $buf .= self::to_utf8_convert($c1);
7265
          }
7266
7267
        } else { // doesn't look like UTF8, but should be converted
7268
          $buf .= self::to_utf8_convert($c1);
7269
        }
7270
7271
      } elseif (($c1 & "\xC0") === "\x80") { // needs conversion
7272
7273
        $buf .= self::to_utf8_convert($c1);
7274
7275
      } else { // it doesn't need conversion
7276
        $buf .= $c1;
7277 26
      }
7278
    }
7279 26
7280
    // decode unicode escape sequences
7281 26
    $buf = preg_replace_callback(
7282 5
        '/\\\\u([0-9a-f]{4})/i',
7283
        function ($match) {
7284
          return \mb_convert_encoding(pack('H*', $match[1]), 'UTF-8', 'UCS-2BE');
7285
        },
7286 22
        $buf
7287 6
    );
7288
7289
    // decode UTF-8 codepoints
7290 16
    if ($decodeHtmlEntityToUtf8 === true) {
7291
      $buf = self::html_entity_decode($buf);
7292
    }
7293
7294
    return $buf;
7295
  }
7296
7297
  /**
7298
   * @param int $int
7299
   *
7300
   * @return string
7301
   */
7302 14
  private static function to_utf8_convert($int)
7303
  {
7304 14
    $buf = '';
7305
7306
    $ordC1 = ord($int);
7307
    if (isset(self::$WIN1252_TO_UTF8[$ordC1])) { // found in Windows-1252 special cases
7308
      $buf .= self::$WIN1252_TO_UTF8[$ordC1];
7309
    } else {
7310 14
      $cc1 = self::chr_and_parse_int($ordC1 / 64) | "\xC0";
7311 14
      $cc2 = ($int & "\x3F") | "\x80";
7312
      $buf .= $cc1 . $cc2;
7313
    }
7314
7315 14
    return $buf;
7316 14
  }
7317 14
7318
  /**
7319 14
   * Strip whitespace or other characters from beginning or end of a UTF-8 string.
7320
   *
7321 14
   * INFO: This is slower then "trim()"
7322
   *
7323
   * We can only use the original-function, if we use <= 7-Bit in the string / chars
7324
   * but the check for ACSII (7-Bit) cost more time, then we can safe here.
7325
   *
7326
   * @param string $str   <p>The string to be trimmed</p>
7327
   * @param string $chars [optional] <p>Optional characters to be stripped</p>
7328
   *
7329
   * @return string <p>The trimmed string.</p>
7330
   */
7331
  public static function trim($str = '', $chars = INF)
7332
  {
7333
    $str = (string)$str;
7334
7335 1
    if (!isset($str[0])) {
7336
      return '';
7337 1
    }
7338
7339
    // Info: http://nadeausoftware.com/articles/2007/9/php_tip_how_strip_punctuation_characters_web_page#Unicodecharactercategories
7340
    if ($chars === INF || !$chars) {
7341
      return preg_replace('/^[\pZ\pC]+|[\pZ\pC]+$/u', '', $str);
7342
    }
7343
7344
    return self::rtrim(self::ltrim($str, $chars), $chars);
7345
  }
7346
7347
  /**
7348
   * Makes string's first char uppercase.
7349
   *
7350
   * @param string  $str       <p>The input string.</p>
7351 8
   * @param string  $encoding  [optional] <p>Set the charset.</p>
7352
   * @param boolean $cleanUtf8 [optional] <p>Remove non UTF-8 chars from the string.</p>
7353 8
   *
7354 2
   * @return string <p>The resulting string</p>
7355
   */
7356
  public static function ucfirst($str, $encoding = 'UTF-8', $cleanUtf8 = false)
7357
  {
7358
    if ($cleanUtf8 === true) {
7359
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7360 7
      // if invalid characters are found in $haystack before $needle
7361
      $str = self::clean($str);
7362
    }
7363
7364
    $strPartTwo = self::substr($str, 1, null, $encoding);
7365
    if ($strPartTwo === false) {
7366 7
      $strPartTwo = '';
7367
    }
7368
7369
    $strPartOne = self::strtoupper(
7370 7
        (string)self::substr($str, 0, 1, $encoding),
7371 7
        $encoding,
7372 7
        $cleanUtf8
7373
    );
7374
7375
    return $strPartOne . $strPartTwo;
7376 7
  }
7377 7
7378
  /**
7379 7
   * alias for "UTF8::ucfirst()"
7380 1
   *
7381 1
   * @see UTF8::ucfirst()
7382 7
   *
7383
   * @param string  $word
7384
   * @param string  $encoding
7385 7
   * @param boolean $cleanUtf8
7386
   *
7387 7
   * @return string
7388 7
   */
7389
  public static function ucword($word, $encoding = 'UTF-8', $cleanUtf8 = false)
7390
  {
7391
    return self::ucfirst($word, $encoding, $cleanUtf8);
7392
  }
7393 7
7394
  /**
7395
   * Uppercase for all words in the string.
7396 1
   *
7397 1
   * @param string   $str        <p>The input string.</p>
7398 1
   * @param string[] $exceptions [optional] <p>Exclusion for some words.</p>
7399 7
   * @param string   $charlist   [optional] <p>Additional chars that contains to words and do not start a new word.</p>
7400 7
   * @param string   $encoding   [optional] <p>Set the charset.</p>
7401 7
   * @param boolean  $cleanUtf8  [optional] <p>Remove non UTF-8 chars from the string.</p>
7402
   *
7403 7
   * @return string
7404 7
   */
7405
  public static function ucwords($str, $exceptions = array(), $charlist = '', $encoding = 'UTF-8', $cleanUtf8 = false)
7406 7
  {
7407
    if (!$str) {
7408
      return '';
7409
    }
7410
7411
    // INFO: mb_convert_case($str, MB_CASE_TITLE);
7412
    // -> MB_CASE_TITLE didn't only uppercase the first letter, it also lowercase all other letters
7413
7414
    if ($cleanUtf8 === true) {
7415
      // "\mb_strpos" and "\iconv_strpos" returns wrong position,
7416
      // if invalid characters are found in $haystack before $needle
7417
      $str = self::clean($str);
7418
    }
7419
7420
    $usePhpDefaultFunctions = !(bool)($charlist . implode('', $exceptions));
7421
7422
    if (
7423
        $usePhpDefaultFunctions === true
7424
        &&
7425
        self::is_ascii($str) === true
7426
    ) {
7427
      return ucwords($str);
7428 1
    }
7429
7430 1
    $words = self::str_to_words($str, $charlist);
7431
    $newWords = array();
7432 1
7433 1
    if (count($exceptions) > 0) {
7434
      $useExceptions = true;
7435
    } else {
7436 1
      $useExceptions = false;
7437 1
    }
7438 1
7439 1 View Code Duplication
    foreach ($words as $word) {
7440
7441 1
      if (!$word) {
7442
        continue;
7443
      }
7444 1
7445
      if (
7446 1
          $useExceptions === false
7447 1
          ||
7448 1
          (
7449 1
              $useExceptions === true
7450
              &&
7451 1
              !in_array($word, $exceptions, true)
7452 1
          )
7453 1
      ) {
7454
        $word = self::ucfirst($word, $encoding);
7455 1
      }
7456
7457 1
      $newWords[] = $word;
7458
    }
7459
7460
    return implode('', $newWords);
7461
  }
7462
7463
  /**
7464
   * Multi decode html entity & fix urlencoded-win1252-chars.
7465
   *
7466
   * e.g:
7467
   * 'test+test'                     => 'test test'
7468
   * 'D&#252;sseldorf'               => 'Düsseldorf'
7469
   * 'D%FCsseldorf'                  => 'Düsseldorf'
7470
   * 'D&#xFC;sseldorf'               => 'Düsseldorf'
7471
   * 'D%26%23xFC%3Bsseldorf'         => 'Düsseldorf'
7472
   * 'Düsseldorf'                   => 'Düsseldorf'
7473
   * 'D%C3%BCsseldorf'               => 'Düsseldorf'
7474
   * 'D%C3%83%C2%BCsseldorf'         => 'Düsseldorf'
7475
   * 'D%25C3%2583%25C2%25BCsseldorf' => 'Düsseldorf'
7476
   *
7477
   * @param string $str          <p>The input string.</p>
7478
   * @param bool   $multi_decode <p>Decode as often as possible.</p>
7479
   *
7480
   * @return string
7481
   */
7482 View Code Duplication
  public static function urldecode($str, $multi_decode = true)
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
7483
  {
7484
    $str = (string)$str;
7485
7486
    if (!isset($str[0])) {
7487
      return '';
7488
    }
7489
7490
    $pattern = '/%u([0-9a-f]{3,4})/i';
7491
    if (preg_match($pattern, $str)) {
7492
      $str = preg_replace($pattern, '&#x\\1;', urldecode($str));
7493
    }
7494
7495
    $flags = Bootup::is_php('5.4') === true ? ENT_QUOTES | ENT_HTML5 : ENT_QUOTES;
7496
7497
    do {
7498
      $str_compare = $str;
7499
7500
      $str = self::fix_simple_utf8(
7501
          urldecode(
7502
              self::html_entity_decode(
7503
                  self::to_utf8($str),
0 ignored issues
show
Bug introduced by
It seems like self::to_utf8($str) targeting voku\helper\UTF8::to_utf8() can also be of type array; however, voku\helper\UTF8::html_entity_decode() does only seem to accept string, maybe add an additional type check?

This check looks at variables that are passed out again to other methods.

If the outgoing method call has stricter type requirements than the method itself, an issue is raised.

An additional type check may prevent trouble.

Loading history...
7504
                  $flags
7505
              )
7506
          )
7507
      );
7508
7509
    } while ($multi_decode === true && $str_compare !== $str);
7510
7511
    return (string)$str;
7512
  }
7513
7514
  /**
7515
   * Return a array with "urlencoded"-win1252 -> UTF-8
7516
   *
7517
   * @deprecated <p>use the "UTF8::urldecode()" function to decode a string</p>
7518
   *
7519
   * @return array
7520
   */
7521
  public static function urldecode_fix_win1252_chars()
7522
  {
7523
    return array(
7524
        '%20' => ' ',
7525
        '%21' => '!',
7526
        '%22' => '"',
7527
        '%23' => '#',
7528
        '%24' => '$',
7529
        '%25' => '%',
7530
        '%26' => '&',
7531
        '%27' => "'",
7532
        '%28' => '(',
7533
        '%29' => ')',
7534
        '%2A' => '*',
7535
        '%2B' => '+',
7536
        '%2C' => ',',
7537
        '%2D' => '-',
7538
        '%2E' => '.',
7539
        '%2F' => '/',
7540
        '%30' => '0',
7541
        '%31' => '1',
7542
        '%32' => '2',
7543
        '%33' => '3',
7544
        '%34' => '4',
7545
        '%35' => '5',
7546
        '%36' => '6',
7547
        '%37' => '7',
7548
        '%38' => '8',
7549
        '%39' => '9',
7550
        '%3A' => ':',
7551
        '%3B' => ';',
7552
        '%3C' => '<',
7553
        '%3D' => '=',
7554
        '%3E' => '>',
7555
        '%3F' => '?',
7556
        '%40' => '@',
7557
        '%41' => 'A',
7558
        '%42' => 'B',
7559
        '%43' => 'C',
7560
        '%44' => 'D',
7561
        '%45' => 'E',
7562
        '%46' => 'F',
7563
        '%47' => 'G',
7564
        '%48' => 'H',
7565
        '%49' => 'I',
7566
        '%4A' => 'J',
7567
        '%4B' => 'K',
7568
        '%4C' => 'L',
7569
        '%4D' => 'M',
7570
        '%4E' => 'N',
7571
        '%4F' => 'O',
7572
        '%50' => 'P',
7573
        '%51' => 'Q',
7574
        '%52' => 'R',
7575
        '%53' => 'S',
7576
        '%54' => 'T',
7577
        '%55' => 'U',
7578
        '%56' => 'V',
7579
        '%57' => 'W',
7580
        '%58' => 'X',
7581
        '%59' => 'Y',
7582
        '%5A' => 'Z',
7583
        '%5B' => '[',
7584
        '%5C' => '\\',
7585
        '%5D' => ']',
7586
        '%5E' => '^',
7587
        '%5F' => '_',
7588
        '%60' => '`',
7589
        '%61' => 'a',
7590
        '%62' => 'b',
7591
        '%63' => 'c',
7592
        '%64' => 'd',
7593
        '%65' => 'e',
7594
        '%66' => 'f',
7595
        '%67' => 'g',
7596
        '%68' => 'h',
7597
        '%69' => 'i',
7598
        '%6A' => 'j',
7599
        '%6B' => 'k',
7600
        '%6C' => 'l',
7601
        '%6D' => 'm',
7602
        '%6E' => 'n',
7603
        '%6F' => 'o',
7604
        '%70' => 'p',
7605
        '%71' => 'q',
7606
        '%72' => 'r',
7607
        '%73' => 's',
7608
        '%74' => 't',
7609
        '%75' => 'u',
7610
        '%76' => 'v',
7611
        '%77' => 'w',
7612
        '%78' => 'x',
7613
        '%79' => 'y',
7614
        '%7A' => 'z',
7615
        '%7B' => '{',
7616
        '%7C' => '|',
7617
        '%7D' => '}',
7618
        '%7E' => '~',
7619
        '%7F' => '',
7620
        '%80' => '`',
7621
        '%81' => '',
7622
        '%82' => '‚',
7623
        '%83' => 'ƒ',
7624
        '%84' => '„',
7625
        '%85' => '…',
7626
        '%86' => '†',
7627
        '%87' => '‡',
7628
        '%88' => 'ˆ',
7629
        '%89' => '‰',
7630
        '%8A' => 'Š',
7631
        '%8B' => '‹',
7632
        '%8C' => 'Œ',
7633
        '%8D' => '',
7634
        '%8E' => 'Ž',
7635
        '%8F' => '',
7636
        '%90' => '',
7637
        '%91' => '‘',
7638
        '%92' => '’',
7639
        '%93' => '“',
7640
        '%94' => '”',
7641
        '%95' => '•',
7642
        '%96' => '–',
7643
        '%97' => '—',
7644
        '%98' => '˜',
7645
        '%99' => '™',
7646
        '%9A' => 'š',
7647
        '%9B' => '›',
7648
        '%9C' => 'œ',
7649
        '%9D' => '',
7650
        '%9E' => 'ž',
7651
        '%9F' => 'Ÿ',
7652
        '%A0' => '',
7653
        '%A1' => '¡',
7654
        '%A2' => '¢',
7655
        '%A3' => '£',
7656
        '%A4' => '¤',
7657
        '%A5' => '¥',
7658
        '%A6' => '¦',
7659
        '%A7' => '§',
7660
        '%A8' => '¨',
7661
        '%A9' => '©',
7662
        '%AA' => 'ª',
7663
        '%AB' => '«',
7664
        '%AC' => '¬',
7665
        '%AD' => '',
7666
        '%AE' => '®',
7667
        '%AF' => '¯',
7668
        '%B0' => '°',
7669
        '%B1' => '±',
7670
        '%B2' => '²',
7671
        '%B3' => '³',
7672
        '%B4' => '´',
7673
        '%B5' => 'µ',
7674
        '%B6' => '¶',
7675
        '%B7' => '·',
7676
        '%B8' => '¸',
7677
        '%B9' => '¹',
7678
        '%BA' => 'º',
7679
        '%BB' => '»',
7680
        '%BC' => '¼',
7681
        '%BD' => '½',
7682
        '%BE' => '¾',
7683
        '%BF' => '¿',
7684
        '%C0' => 'À',
7685
        '%C1' => 'Á',
7686
        '%C2' => 'Â',
7687
        '%C3' => 'Ã',
7688
        '%C4' => 'Ä',
7689
        '%C5' => 'Å',
7690
        '%C6' => 'Æ',
7691
        '%C7' => 'Ç',
7692
        '%C8' => 'È',
7693
        '%C9' => 'É',
7694
        '%CA' => 'Ê',
7695
        '%CB' => 'Ë',
7696
        '%CC' => 'Ì',
7697
        '%CD' => 'Í',
7698
        '%CE' => 'Î',
7699
        '%CF' => 'Ï',
7700
        '%D0' => 'Ð',
7701
        '%D1' => 'Ñ',
7702
        '%D2' => 'Ò',
7703
        '%D3' => 'Ó',
7704 6
        '%D4' => 'Ô',
7705
        '%D5' => 'Õ',
7706
        '%D6' => 'Ö',
7707 6
        '%D7' => '×',
7708
        '%D8' => 'Ø',
7709 6
        '%D9' => 'Ù',
7710 3
        '%DA' => 'Ú',
7711
        '%DB' => 'Û',
7712
        '%DC' => 'Ü',
7713 6
        '%DD' => 'Ý',
7714
        '%DE' => 'Þ',
7715 6
        '%DF' => 'ß',
7716 6
        '%E0' => 'à',
7717
        '%E1' => 'á',
7718 6
        '%E2' => 'â',
7719 1
        '%E3' => 'ã',
7720 1
        '%E4' => 'ä',
7721 1
        '%E5' => 'å',
7722
        '%E6' => 'æ',
7723
        '%E7' => 'ç',
7724 6
        '%E8' => 'è',
7725
        '%E9' => 'é',
7726 6
        '%EA' => 'ê',
7727
        '%EB' => 'ë',
7728
        '%EC' => 'ì',
7729
        '%ED' => 'í',
7730 6
        '%EE' => 'î',
7731
        '%EF' => 'ï',
7732
        '%F0' => 'ð',
7733 6
        '%F1' => 'ñ',
7734
        '%F2' => 'ò',
7735
        '%F3' => 'ó',
7736
        '%F4' => 'ô',
7737 6
        '%F5' => 'õ',
7738 6
        '%F6' => 'ö',
7739 6
        '%F7' => '÷',
7740 6
        '%F8' => 'ø',
7741 6
        '%F9' => 'ù',
7742 6
        '%FA' => 'ú',
7743 6
        '%FB' => 'û',
7744
        '%FC' => 'ü',
7745
        '%FD' => 'ý',
7746 6
        '%FE' => 'þ',
7747
        '%FF' => 'ÿ',
7748 6
    );
7749 4
  }
7750 4
7751 4
  /**
7752
   * Decodes an UTF-8 string to ISO-8859-1.
7753 6
   *
7754 6
   * @param string $str <p>The input string.</p>
7755 6
   *
7756 6
   * @return string
7757
   */
7758 6
  public static function utf8_decode($str)
7759
  {
7760
    // init
7761
    $str = (string)$str;
7762
7763
    if (!isset($str[0])) {
7764
      return '';
7765
    }
7766
7767
    $str = (string)self::to_utf8($str);
7768 7
7769
    static $UTF8_TO_WIN1252_KEYS_CACHE = null;
7770
    static $UTF8_TO_WIN1252_VALUES_CACHE = null;
7771 7
7772
    if ($UTF8_TO_WIN1252_KEYS_CACHE === null) {
7773 7
      $UTF8_TO_WIN1252_KEYS_CACHE = array_keys(self::$UTF8_TO_WIN1252);
7774 7
      $UTF8_TO_WIN1252_VALUES_CACHE = array_values(self::$UTF8_TO_WIN1252);
7775
    }
7776
7777 7
    /** @noinspection PhpInternalEntityUsedInspection */
7778 7
    $str = str_replace($UTF8_TO_WIN1252_KEYS_CACHE, $UTF8_TO_WIN1252_VALUES_CACHE, $str);
7779
7780
    if (!isset(self::$SUPPORT['already_checked_via_portable_utf8'])) {
7781
      self::checkForSupport();
7782 7
    }
7783 7
7784 3 View Code Duplication
    if (self::$SUPPORT['mbstring_func_overload'] === true) {
7785
      $len = \mb_strlen($str, '8BIT');
7786
    } else {
7787 6
      $len = strlen($str);
7788 6
    }
7789
7790 6
    /** @noinspection ForeachInvariantsInspection */
7791 1
    for ($i = 0, $j = 0; $i < $len; ++$i, ++$j) {
7792 1
      switch ($str[$i] & "\xF0") {
7793 1
        case "\xC0":
7794
        case "\xD0":
7795 6
          $c = (ord($str[$i] & "\x1F") << 6) | ord($str[++$i] & "\x3F");
7796
          $str[$j] = $c < 256 ? self::chr_and_parse_int($c) : '?';
7797
          break;
7798
7799
        /** @noinspection PhpMissingBreakStatementInspection */
7800
        case "\xF0":
0 ignored issues
show
Coding Style introduced by
There must be a comment when fall-through is intentional in a non-empty case body
Loading history...
7801
          ++$i;
7802
        case "\xE0":
7803
          $str[$j] = '?';
7804
          $i += 2;
7805
          break;
7806
7807
        default:
7808
          $str[$j] = $str[$i];
7809
      }
7810
    }
7811
7812
    return (string)self::substr($str, 0, $j, '8BIT');
7813
  }
7814
7815
  /**
7816
   * Encodes an ISO-8859-1 string to UTF-8.
7817
   *
7818
   * @param string $str <p>The input string.</p>
7819
   *
7820
   * @return string
7821
   */
7822
  public static function utf8_encode($str)
7823
  {
7824 1
    // init
7825
    $str = (string)$str;
7826 1
7827
    if (!isset($str[0])) {
7828
      return '';
7829
    }
7830
7831
    $strTmp = \utf8_encode($str);
7832
    if ($strTmp === false) {
7833
      return '';
7834
    }
7835
7836
    $str = (string)$strTmp;
7837
    if (false === strpos($str, "\xC2")) {
7838 1
      return $str;
7839
    }
7840 1
7841
    static $CP1252_TO_UTF8_KEYS_CACHE = null;
7842 1
    static $CP1252_TO_UTF8_VALUES_CACHE = null;
7843 1
7844
    if ($CP1252_TO_UTF8_KEYS_CACHE === null) {
7845
      $CP1252_TO_UTF8_KEYS_CACHE = array_keys(self::$CP1252_TO_UTF8);
7846
      $CP1252_TO_UTF8_VALUES_CACHE = array_values(self::$CP1252_TO_UTF8);
7847 1
    }
7848
7849 1
    return str_replace($CP1252_TO_UTF8_KEYS_CACHE, $CP1252_TO_UTF8_VALUES_CACHE, $str);
7850 1
  }
7851
7852
  /**
7853 1
   * fix -> utf8-win1252 chars
7854
   *
7855
   * @param string $str <p>The input string.</p>
7856 1
   *
7857 1
   * @return string
7858 1
   *
7859 1
   * @deprecated <p>use "UTF8::fix_simple_utf8()"</p>
7860 1
   */
7861
  public static function utf8_fix_win1252_chars($str)
7862
  {
7863 1
    return self::fix_simple_utf8($str);
7864
  }
7865
7866
  /**
7867
   * Returns an array with all utf8 whitespace characters.
7868
   *
7869
   * @see   : http://www.bogofilter.org/pipermail/bogofilter/2003-March/001889.html
7870
   *
7871
   * @author: Derek E. [email protected]
7872
   *
7873
   * @return array <p>
7874
   *               An array with all known whitespace characters as values and the type of whitespace as keys
7875
   *               as defined in above URL.
7876
   *               </p>
7877
   */
7878
  public static function whitespace_table()
7879
  {
7880
    return self::$WHITESPACE_TABLE;
7881
  }
7882 10
7883
  /**
7884 10
   * Limit the number of words in a string.
7885 10
   *
7886
   * @param string $str      <p>The input string.</p>
7887 10
   * @param int    $limit    <p>The limit of words as integer.</p>
7888 3
   * @param string $strAddOn <p>Replacement for the striped string.</p>
7889
   *
7890
   * @return string
7891 8
   */
7892 8
  public static function words_limit($str, $limit = 100, $strAddOn = '...')
7893 8
  {
7894
    $str = (string)$str;
7895 8
7896
    if (!isset($str[0])) {
7897 8
      return '';
7898
    }
7899 8
7900 1
    // init
7901 1
    $limit = (int)$limit;
7902 1
7903
    if ($limit < 1) {
7904 8
      return '';
7905 8
    }
7906
7907 8
    preg_match('/^\s*+(?:\S++\s*+){1,' . $limit . '}/u', $str, $matches);
7908 8
7909 8
    if (
7910 8
        !isset($matches[0])
7911 8
        ||
7912
        self::strlen($str) === self::strlen($matches[0])
7913 8
    ) {
7914 8
      return $str;
7915 8
    }
7916 8
7917
    return self::rtrim($matches[0]) . $strAddOn;
7918 8
  }
7919 6
7920 6
  /**
7921 6
   * Wraps a string to a given number of characters
7922 6
   *
7923
   * @link  http://php.net/manual/en/function.wordwrap.php
7924 6
   *
7925 3
   * @param string $str   <p>The input string.</p>
7926 3
   * @param int    $width [optional] <p>The column width.</p>
7927
   * @param string $break [optional] <p>The line is broken using the optional break parameter.</p>
7928 6
   * @param bool   $cut   [optional] <p>
7929 6
   *                      If the cut is set to true, the string is
7930
   *                      always wrapped at or before the specified width. So if you have
7931 8
   *                      a word that is larger than the given width, it is broken apart.
7932
   *                      </p>
7933
   *
7934
   * @return string <p>The given string wrapped at the specified column.</p>
7935
   */
7936
  public static function wordwrap($str, $width = 75, $break = "\n", $cut = false)
7937
  {
7938
    $str = (string)$str;
7939 1
    $break = (string)$break;
7940
7941 1
    if (!isset($str[0], $break[0])) {
7942
      return '';
7943
    }
7944
7945
    $w = '';
7946
    $strSplit = explode($break, $str);
7947
    $count = count($strSplit);
7948
7949
    $chars = array();
7950
    /** @noinspection ForeachInvariantsInspection */
7951
    for ($i = 0; $i < $count; ++$i) {
7952
7953
      if ($i) {
7954
        $chars[] = $break;
7955
        $w .= '#';
7956
      }
7957
7958
      $c = $strSplit[$i];
7959
      unset($strSplit[$i]);
7960
7961
      foreach (self::split($c) as $c) {
7962
        $chars[] = $c;
7963
        $w .= ' ' === $c ? ' ' : '?';
7964
      }
7965
    }
7966
7967
    $strReturn = '';
7968
    $j = 0;
7969
    $b = $i = -1;
7970
    $w = wordwrap($w, $width, '#', $cut);
7971
7972
    while (false !== $b = self::strpos($w, '#', $b + 1)) {
7973
      for (++$i; $i < $b; ++$i) {
7974
        $strReturn .= $chars[$j];
7975
        unset($chars[$j++]);
7976
      }
7977
7978
      if ($break === $chars[$j] || ' ' === $chars[$j]) {
7979
        unset($chars[$j++]);
7980
      }
7981
7982
      $strReturn .= $break;
7983
    }
7984
7985
    return $strReturn . implode('', $chars);
7986
  }
7987
7988
  /**
7989
   * Returns an array of Unicode White Space characters.
7990
   *
7991
   * @return array <p>An array with numeric code point as key and White Space Character as value.</p>
7992
   */
7993
  public static function ws()
7994
  {
7995
    return self::$WHITESPACE;
7996
  }
7997
7998
}
7999