1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* CakePHP(tm) : Rapid Development Framework (http://cakephp.org) |
4
|
|
|
* Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org) |
5
|
|
|
* |
6
|
|
|
* Licensed under The MIT License |
7
|
|
|
* For full copyright and license information, please see the LICENSE.txt |
8
|
|
|
* Redistributions of files must retain the above copyright notice. |
9
|
|
|
* |
10
|
|
|
* @copyright Copyright (c) Cake Software Foundation, Inc. (http://cakefoundation.org) |
11
|
|
|
* @link http://cakephp.org CakePHP(tm) Project |
12
|
|
|
* @since 1.2.0 |
13
|
|
|
* @since NML v0.4.6 |
14
|
|
|
* @license http://www.opensource.org/licenses/mit-license.php MIT License |
15
|
|
|
*/ |
16
|
|
|
namespace Cake\Utility; |
17
|
|
|
|
18
|
|
|
use InvalidArgumentException; |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* Text handling methods. |
22
|
|
|
* |
23
|
|
|
*/ |
24
|
|
|
class Text |
25
|
|
|
{ |
26
|
|
|
|
27
|
|
|
/** |
28
|
|
|
* Generate a random UUID version 4 |
29
|
|
|
* |
30
|
|
|
* Warning: This method should not be used as a random seed for any cryptographic operations. |
31
|
|
|
* Instead you should use the openssl or mcrypt extensions. |
32
|
|
|
* |
33
|
|
|
* @see http://www.ietf.org/rfc/rfc4122.txt |
34
|
|
|
* @return string RFC 4122 UUID |
35
|
|
|
* @copyright Matt Farina MIT License https://github.com/lootils/uuid/blob/master/LICENSE |
36
|
|
|
*/ |
37
|
|
|
public static function uuid() |
38
|
|
|
{ |
39
|
|
|
return sprintf( |
40
|
|
|
'%04x%04x-%04x-%04x-%04x-%04x%04x%04x', |
41
|
|
|
// 32 bits for "time_low" |
|
|
|
|
42
|
|
|
mt_rand(0, 65535), |
43
|
|
|
mt_rand(0, 65535), |
44
|
|
|
// 16 bits for "time_mid" |
|
|
|
|
45
|
|
|
mt_rand(0, 65535), |
46
|
|
|
// 12 bits before the 0100 of (version) 4 for "time_hi_and_version" |
47
|
|
|
mt_rand(0, 4095) | 0x4000, |
48
|
|
|
// 16 bits, 8 bits for "clk_seq_hi_res", |
|
|
|
|
49
|
|
|
// 8 bits for "clk_seq_low", |
|
|
|
|
50
|
|
|
// two most significant bits holds zero and one for variant DCE1.1 |
51
|
|
|
mt_rand(0, 0x3fff) | 0x8000, |
52
|
|
|
// 48 bits for "node" |
|
|
|
|
53
|
|
|
mt_rand(0, 65535), |
54
|
|
|
mt_rand(0, 65535), |
55
|
|
|
mt_rand(0, 65535) |
56
|
|
|
); |
57
|
|
|
} |
58
|
|
|
|
59
|
|
|
/** |
60
|
|
|
* Tokenizes a string using $separator, ignoring any instance of $separator that appears between |
61
|
|
|
* $leftBound and $rightBound. |
62
|
|
|
* |
63
|
|
|
* @param string $data The data to tokenize. |
64
|
|
|
* @param string $separator The token to split the data on. |
65
|
|
|
* @param string $leftBound The left boundary to ignore separators in. |
66
|
|
|
* @param string $rightBound The right boundary to ignore separators in. |
67
|
|
|
* @return mixed Array of tokens in $data or original input if empty. |
68
|
|
|
*/ |
69
|
|
|
public static function tokenize($data, $separator = ',', $leftBound = '(', $rightBound = ')') |
70
|
|
|
{ |
71
|
|
|
if (empty($data)) { |
72
|
|
|
return []; |
73
|
|
|
} |
74
|
|
|
|
75
|
|
|
$depth = 0; |
|
|
|
|
76
|
|
|
$offset = 0; |
|
|
|
|
77
|
|
|
$buffer = ''; |
|
|
|
|
78
|
|
|
$results = []; |
79
|
|
|
$length = strlen($data); |
|
|
|
|
80
|
|
|
$open = false; |
|
|
|
|
81
|
|
|
|
82
|
|
|
while ($offset <= $length) { |
83
|
|
|
$tmpOffset = -1; |
84
|
|
|
$offsets = [ |
|
|
|
|
85
|
|
|
strpos($data, $separator, $offset), |
86
|
|
|
strpos($data, $leftBound, $offset), |
87
|
|
|
strpos($data, $rightBound, $offset) |
88
|
|
|
]; |
89
|
|
|
for ($i = 0; $i < 3; $i++) { |
90
|
|
|
if ($offsets[$i] !== false && ($offsets[$i] < $tmpOffset || $tmpOffset == -1)) { |
91
|
|
|
$tmpOffset = $offsets[$i]; |
92
|
|
|
} |
93
|
|
|
} |
94
|
|
|
if ($tmpOffset !== -1) { |
95
|
|
|
$buffer .= substr($data, $offset, ($tmpOffset - $offset)); |
96
|
|
|
if (!$depth && $data{$tmpOffset} === $separator) { |
97
|
|
|
$results[] = $buffer; |
98
|
|
|
$buffer = ''; |
|
|
|
|
99
|
|
|
} else { |
100
|
|
|
$buffer .= $data{$tmpOffset}; |
101
|
|
|
} |
102
|
|
|
if ($leftBound !== $rightBound) { |
103
|
|
|
if ($data{$tmpOffset} === $leftBound) { |
104
|
|
|
$depth++; |
105
|
|
|
} |
106
|
|
|
if ($data{$tmpOffset} === $rightBound) { |
107
|
|
|
$depth--; |
108
|
|
|
} |
109
|
|
|
} else { |
110
|
|
|
if ($data{$tmpOffset} === $leftBound) { |
111
|
|
|
if (!$open) { |
112
|
|
|
$depth++; |
113
|
|
|
$open = true; |
114
|
|
|
} else { |
115
|
|
|
$depth--; |
116
|
|
|
} |
117
|
|
|
} |
118
|
|
|
} |
119
|
|
|
$offset = ++$tmpOffset; |
120
|
|
|
} else { |
121
|
|
|
$results[] = $buffer . substr($data, $offset); |
122
|
|
|
$offset = $length + 1; |
|
|
|
|
123
|
|
|
} |
124
|
|
|
} |
125
|
|
|
if (empty($results) && !empty($buffer)) { |
126
|
|
|
$results[] = $buffer; |
127
|
|
|
} |
128
|
|
|
|
129
|
|
|
if (!empty($results)) { |
130
|
|
|
return array_map('trim', $results); |
131
|
|
|
} |
132
|
|
|
|
133
|
|
|
return []; |
134
|
|
|
} |
135
|
|
|
|
136
|
|
|
/** |
137
|
|
|
* Replaces variable placeholders inside a $str with any given $data. Each key in the $data array |
138
|
|
|
* corresponds to a variable placeholder name in $str. |
139
|
|
|
* Example: |
140
|
|
|
* ``` |
141
|
|
|
* Text::insert(':name is :age years old.', ['name' => 'Bob', '65']); |
142
|
|
|
* ``` |
143
|
|
|
* Returns: Bob is 65 years old. |
144
|
|
|
* |
145
|
|
|
* Available $options are: |
146
|
|
|
* |
147
|
|
|
* - before: The character or string in front of the name of the variable placeholder (Defaults to `:`) |
148
|
|
|
* - after: The character or string after the name of the variable placeholder (Defaults to null) |
149
|
|
|
* - escape: The character or string used to escape the before character / string (Defaults to `\`) |
150
|
|
|
* - format: A regex to use for matching variable placeholders. Default is: `/(?<!\\)\:%s/` |
151
|
|
|
* (Overwrites before, after, breaks escape / clean) |
152
|
|
|
* - clean: A boolean or array with instructions for Text::cleanInsert |
153
|
|
|
* |
154
|
|
|
* @param string $str A string containing variable placeholders |
155
|
|
|
* @param array $data A key => val array where each key stands for a placeholder variable name |
156
|
|
|
* to be replaced with val |
157
|
|
|
* @param array $options An array of options, see description above |
158
|
|
|
* @return string |
159
|
|
|
*/ |
160
|
|
|
public static function insert($str, $data, array $options = []) |
161
|
|
|
{ |
162
|
|
|
$defaults = [ |
163
|
|
|
'before' => ':', 'after' => null, 'escape' => '\\', 'format' => null, 'clean' => false |
164
|
|
|
]; |
165
|
|
|
$options += $defaults; |
166
|
|
|
$format = $options['format']; |
|
|
|
|
167
|
|
|
$data = (array)$data; |
|
|
|
|
168
|
|
|
if (empty($data)) { |
169
|
|
|
return ($options['clean']) ? static::cleanInsert($str, $options) : $str; |
170
|
|
|
} |
171
|
|
|
|
172
|
|
|
if (!isset($format)) { |
173
|
|
|
$format = sprintf( |
174
|
|
|
'/(?<!%s)%s%%s%s/', |
175
|
|
|
preg_quote($options['escape'], '/'), |
176
|
|
|
str_replace('%', '%%', preg_quote($options['before'], '/')), |
177
|
|
|
str_replace('%', '%%', preg_quote($options['after'], '/')) |
178
|
|
|
); |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
if (strpos($str, '?') !== false && is_numeric(key($data))) { |
182
|
|
|
$offset = 0; |
183
|
|
|
while (($pos = strpos($str, '?', $offset)) !== false) { |
184
|
|
|
$val = array_shift($data); |
|
|
|
|
185
|
|
|
$offset = $pos + strlen($val); |
186
|
|
|
$str = substr_replace($str, $val, $pos, 1); |
|
|
|
|
187
|
|
|
} |
188
|
|
|
return ($options['clean']) ? static::cleanInsert($str, $options) : $str; |
189
|
|
|
} |
190
|
|
|
|
191
|
|
|
asort($data); |
192
|
|
|
|
193
|
|
|
$dataKeys = array_keys($data); |
194
|
|
|
$hashKeys = array_map('crc32', $dataKeys); |
195
|
|
|
$tempData = array_combine($dataKeys, $hashKeys); |
196
|
|
|
krsort($tempData); |
197
|
|
|
|
198
|
|
|
foreach ($tempData as $key => $hashVal) { |
199
|
|
|
$key = sprintf($format, preg_quote($key, '/')); |
200
|
|
|
$str = preg_replace($key, $hashVal, $str); |
201
|
|
|
} |
202
|
|
|
$dataReplacements = array_combine($hashKeys, array_values($data)); |
203
|
|
|
foreach ($dataReplacements as $tmpHash => $tmpValue) { |
204
|
|
|
$tmpValue = (is_array($tmpValue)) ? '' : $tmpValue; |
205
|
|
|
$str = str_replace($tmpHash, $tmpValue, $str); |
|
|
|
|
206
|
|
|
} |
207
|
|
|
|
208
|
|
|
if (!isset($options['format']) && isset($options['before'])) { |
209
|
|
|
$str = str_replace($options['escape'] . $options['before'], $options['before'], $str); |
210
|
|
|
} |
211
|
|
|
return ($options['clean']) ? static::cleanInsert($str, $options) : $str; |
212
|
|
|
} |
213
|
|
|
|
214
|
|
|
/** |
215
|
|
|
* Cleans up a Text::insert() formatted string with given $options depending on the 'clean' key in |
216
|
|
|
* $options. The default method used is text but html is also available. The goal of this function |
217
|
|
|
* is to replace all whitespace and unneeded markup around placeholders that did not get replaced |
218
|
|
|
* by Text::insert(). |
219
|
|
|
* |
220
|
|
|
* @param string $str String to clean. |
221
|
|
|
* @param array $options Options list. |
222
|
|
|
* @return string |
223
|
|
|
* @see \Cake\Utility\Text::insert() |
224
|
|
|
*/ |
225
|
|
|
public static function cleanInsert($str, array $options) |
226
|
|
|
{ |
227
|
|
|
$clean = $options['clean']; |
228
|
|
|
if (!$clean) { |
229
|
|
|
return $str; |
230
|
|
|
} |
231
|
|
|
if ($clean === true) { |
232
|
|
|
$clean = ['method' => 'text']; |
233
|
|
|
} |
234
|
|
|
if (!is_array($clean)) { |
235
|
|
|
$clean = ['method' => $options['clean']]; |
236
|
|
|
} |
237
|
|
|
switch ($clean['method']) { |
238
|
|
|
case 'html': |
239
|
|
|
$clean += [ |
|
|
|
|
240
|
|
|
'word' => '[\w,.]+', |
241
|
|
|
'andText' => true, |
242
|
|
|
'replacement' => '', |
243
|
|
|
]; |
244
|
|
|
$kleenex = sprintf( |
245
|
|
|
'/[\s]*[a-z]+=(")(%s%s%s[\s]*)+\\1/i', |
246
|
|
|
preg_quote($options['before'], '/'), |
247
|
|
|
$clean['word'], |
248
|
|
|
preg_quote($options['after'], '/') |
249
|
|
|
); |
250
|
|
|
$str = preg_replace($kleenex, $clean['replacement'], $str); |
|
|
|
|
251
|
|
|
if ($clean['andText']) { |
252
|
|
|
$options['clean'] = ['method' => 'text']; |
253
|
|
|
$str = static::cleanInsert($str, $options); |
|
|
|
|
254
|
|
|
} |
255
|
|
|
break; |
256
|
|
|
case 'text': |
257
|
|
|
$clean += [ |
258
|
|
|
'word' => '[\w,.]+', |
259
|
|
|
'gap' => '[\s]*(?:(?:and|or)[\s]*)?', |
260
|
|
|
'replacement' => '', |
261
|
|
|
]; |
262
|
|
|
|
263
|
|
|
$kleenex = sprintf( |
264
|
|
|
'/(%s%s%s%s|%s%s%s%s)/', |
265
|
|
|
preg_quote($options['before'], '/'), |
266
|
|
|
$clean['word'], |
267
|
|
|
preg_quote($options['after'], '/'), |
268
|
|
|
$clean['gap'], |
269
|
|
|
$clean['gap'], |
270
|
|
|
preg_quote($options['before'], '/'), |
271
|
|
|
$clean['word'], |
272
|
|
|
preg_quote($options['after'], '/') |
273
|
|
|
); |
274
|
|
|
$str = preg_replace($kleenex, $clean['replacement'], $str); |
|
|
|
|
275
|
|
|
break; |
276
|
|
|
} |
277
|
|
|
return $str; |
278
|
|
|
} |
279
|
|
|
|
280
|
|
|
/** |
281
|
|
|
* Wraps text to a specific width, can optionally wrap at word breaks. |
282
|
|
|
* |
283
|
|
|
* ### Options |
284
|
|
|
* |
285
|
|
|
* - `width` The width to wrap to. Defaults to 72. |
286
|
|
|
* - `wordWrap` Only wrap on words breaks (spaces) Defaults to true. |
287
|
|
|
* - `indent` String to indent with. Defaults to null. |
288
|
|
|
* - `indentAt` 0 based index to start indenting at. Defaults to 0. |
289
|
|
|
* |
290
|
|
|
* @param string $text The text to format. |
291
|
|
|
* @param array|int $options Array of options to use, or an integer to wrap the text to. |
292
|
|
|
* @return string Formatted text. |
293
|
|
|
*/ |
294
|
|
|
public static function wrap($text, $options = []) |
295
|
|
|
{ |
296
|
|
|
if (is_numeric($options)) { |
297
|
|
|
$options = ['width' => $options]; |
298
|
|
|
} |
299
|
|
|
$options += ['width' => 72, 'wordWrap' => true, 'indent' => null, 'indentAt' => 0]; |
300
|
|
|
if ($options['wordWrap']) { |
301
|
|
|
$wrapped = self::wordWrap($text, $options['width'], "\n"); |
302
|
|
|
} else { |
303
|
|
|
$wrapped = trim(chunk_split($text, $options['width'] - 1, "\n")); |
304
|
|
|
} |
305
|
|
|
if (!empty($options['indent'])) { |
306
|
|
|
$chunks = explode("\n", $wrapped); |
307
|
|
|
for ($i = $options['indentAt'], $len = count($chunks); $i < $len; $i++) { |
308
|
|
|
$chunks[$i] = $options['indent'] . $chunks[$i]; |
309
|
|
|
} |
310
|
|
|
$wrapped = implode("\n", $chunks); |
311
|
|
|
} |
312
|
|
|
return $wrapped; |
313
|
|
|
} |
314
|
|
|
|
315
|
|
|
/** |
316
|
|
|
* Unicode and newline aware version of wordwrap. |
317
|
|
|
* |
318
|
|
|
* @param string $text The text to format. |
319
|
|
|
* @param int $width The width to wrap to. Defaults to 72. |
320
|
|
|
* @param string $break The line is broken using the optional break parameter. Defaults to '\n'. |
321
|
|
|
* @param bool $cut If the cut is set to true, the string is always wrapped at the specified width. |
322
|
|
|
* @return string Formatted text. |
323
|
|
|
*/ |
324
|
|
|
public static function wordWrap($text, $width = 72, $break = "\n", $cut = false) |
325
|
|
|
{ |
326
|
|
|
$paragraphs = explode($break, $text); |
327
|
|
|
foreach ($paragraphs as &$paragraph) { |
328
|
|
|
$paragraph = static::_wordWrap($paragraph, $width, $break, $cut); |
329
|
|
|
} |
330
|
|
|
return implode($break, $paragraphs); |
331
|
|
|
} |
332
|
|
|
|
333
|
|
|
/** |
334
|
|
|
* Unicode aware version of wordwrap as helper method. |
335
|
|
|
* |
336
|
|
|
* @param string $text The text to format. |
337
|
|
|
* @param int $width The width to wrap to. Defaults to 72. |
338
|
|
|
* @param string $break The line is broken using the optional break parameter. Defaults to '\n'. |
339
|
|
|
* @param bool $cut If the cut is set to true, the string is always wrapped at the specified width. |
340
|
|
|
* @return string Formatted text. |
341
|
|
|
*/ |
342
|
|
|
protected static function _wordWrap($text, $width = 72, $break = "\n", $cut = false) |
343
|
|
|
{ |
344
|
|
|
if ($cut) { |
345
|
|
|
$parts = []; |
346
|
|
|
while (mb_strlen($text) > 0) { |
347
|
|
|
$part = mb_substr($text, 0, $width); |
|
|
|
|
348
|
|
|
$parts[] = trim($part); |
349
|
|
|
$text = trim(mb_substr($text, mb_strlen($part))); |
|
|
|
|
350
|
|
|
} |
351
|
|
|
return implode($break, $parts); |
352
|
|
|
} |
353
|
|
|
|
354
|
|
|
$parts = []; |
355
|
|
|
while (mb_strlen($text) > 0) { |
356
|
|
|
if ($width >= mb_strlen($text)) { |
357
|
|
|
$parts[] = trim($text); |
358
|
|
|
break; |
359
|
|
|
} |
360
|
|
|
|
361
|
|
|
$part = mb_substr($text, 0, $width); |
|
|
|
|
362
|
|
|
$nextChar = mb_substr($text, $width, 1); |
363
|
|
|
if ($nextChar !== ' ') { |
364
|
|
|
$breakAt = mb_strrpos($part, ' '); |
365
|
|
|
if ($breakAt === false) { |
366
|
|
|
$breakAt = mb_strpos($text, ' ', $width); |
367
|
|
|
} |
368
|
|
|
if ($breakAt === false) { |
369
|
|
|
$parts[] = trim($text); |
370
|
|
|
break; |
371
|
|
|
} |
372
|
|
|
$part = mb_substr($text, 0, $breakAt); |
373
|
|
|
} |
374
|
|
|
|
375
|
|
|
$part = trim($part); |
|
|
|
|
376
|
|
|
$parts[] = $part; |
377
|
|
|
$text = trim(mb_substr($text, mb_strlen($part))); |
|
|
|
|
378
|
|
|
} |
379
|
|
|
|
380
|
|
|
return implode($break, $parts); |
381
|
|
|
} |
382
|
|
|
|
383
|
|
|
/** |
384
|
|
|
* Highlights a given phrase in a text. You can specify any expression in highlighter that |
385
|
|
|
* may include the \1 expression to include the $phrase found. |
386
|
|
|
* |
387
|
|
|
* ### Options: |
388
|
|
|
* |
389
|
|
|
* - `format` The piece of HTML with that the phrase will be highlighted |
390
|
|
|
* - `html` If true, will ignore any HTML tags, ensuring that only the correct text is highlighted |
391
|
|
|
* - `regex` a custom regex rule that is used to match words, default is '|$tag|iu' |
392
|
|
|
* |
393
|
|
|
* @param string $text Text to search the phrase in. |
394
|
|
|
* @param string|array $phrase The phrase or phrases that will be searched. |
395
|
|
|
* @param array $options An array of HTML attributes and options. |
396
|
|
|
* @return string The highlighted text |
397
|
|
|
* @link http://book.cakephp.org/3.0/en/core-libraries/string.html#highlighting-substrings |
398
|
|
|
*/ |
399
|
|
|
public static function highlight($text, $phrase, array $options = []) |
400
|
|
|
{ |
401
|
|
|
if (empty($phrase)) { |
402
|
|
|
return $text; |
403
|
|
|
} |
404
|
|
|
|
405
|
|
|
$defaults = [ |
406
|
|
|
'format' => '<span class="highlight">\1</span>', |
407
|
|
|
'html' => false, |
408
|
|
|
'regex' => "|%s|iu" |
|
|
|
|
409
|
|
|
]; |
410
|
|
|
$options += $defaults; |
411
|
|
|
extract($options); |
412
|
|
|
|
413
|
|
|
if (is_array($phrase)) { |
414
|
|
|
$replace = []; |
415
|
|
|
$with = []; |
|
|
|
|
416
|
|
|
|
417
|
|
|
foreach ($phrase as $key => $segment) { |
418
|
|
|
$segment = '(' . preg_quote($segment, '|') . ')'; |
419
|
|
|
if ($html) { |
420
|
|
|
$segment = "(?![^<]+>)$segment(?![^<]+>)"; |
|
|
|
|
421
|
|
|
} |
422
|
|
|
|
423
|
|
|
$with[] = (is_array($format)) ? $format[$key] : $format; |
|
|
|
|
424
|
|
|
$replace[] = sprintf($options['regex'], $segment); |
425
|
|
|
} |
426
|
|
|
|
427
|
|
|
return preg_replace($replace, $with, $text); |
428
|
|
|
} |
429
|
|
|
|
430
|
|
|
$phrase = '(' . preg_quote($phrase, '|') . ')'; |
431
|
|
|
if ($html) { |
432
|
|
|
$phrase = "(?![^<]+>)$phrase(?![^<]+>)"; |
|
|
|
|
433
|
|
|
} |
434
|
|
|
|
435
|
|
|
return preg_replace(sprintf($options['regex'], $phrase), $format, $text); |
436
|
|
|
} |
437
|
|
|
|
438
|
|
|
/** |
439
|
|
|
* Strips given text of all links (<a href=....). |
440
|
|
|
* |
441
|
|
|
* @param string $text Text |
442
|
|
|
* @return string The text without links |
443
|
|
|
*/ |
444
|
|
|
public static function stripLinks($text) |
445
|
|
|
{ |
446
|
|
|
return preg_replace('|<a\s+[^>]+>|im', '', preg_replace('|<\/a>|im', '', $text)); |
447
|
|
|
} |
448
|
|
|
|
449
|
|
|
/** |
450
|
|
|
* Truncates text starting from the end. |
451
|
|
|
* |
452
|
|
|
* Cuts a string to the length of $length and replaces the first characters |
453
|
|
|
* with the ellipsis if the text is longer than length. |
454
|
|
|
* |
455
|
|
|
* ### Options: |
456
|
|
|
* |
457
|
|
|
* - `ellipsis` Will be used as Beginning and prepended to the trimmed string |
458
|
|
|
* - `exact` If false, $text will not be cut mid-word |
459
|
|
|
* |
460
|
|
|
* @param string $text String to truncate. |
461
|
|
|
* @param int $length Length of returned string, including ellipsis. |
462
|
|
|
* @param array $options An array of options. |
463
|
|
|
* @return string Trimmed string. |
464
|
|
|
*/ |
465
|
|
|
public static function tail($text, $length = 100, array $options = []) |
466
|
|
|
{ |
467
|
|
|
$default = [ |
|
|
|
|
468
|
|
|
'ellipsis' => '...', 'exact' => true |
469
|
|
|
]; |
470
|
|
|
$options += $default; |
471
|
|
|
extract($options); |
472
|
|
|
|
473
|
|
|
if (mb_strlen($text) <= $length) { |
474
|
|
|
return $text; |
475
|
|
|
} |
476
|
|
|
|
477
|
|
|
$truncate = mb_substr($text, mb_strlen($text) - $length + mb_strlen($ellipsis)); |
478
|
|
|
if (!$exact) { |
479
|
|
|
$spacepos = mb_strpos($truncate, ' '); |
480
|
|
|
$truncate = $spacepos === false ? '' : trim(mb_substr($truncate, $spacepos)); |
481
|
|
|
} |
482
|
|
|
|
483
|
|
|
return $ellipsis . $truncate; |
484
|
|
|
} |
485
|
|
|
|
486
|
|
|
/** |
487
|
|
|
* Truncates text. |
488
|
|
|
* |
489
|
|
|
* Cuts a string to the length of $length and replaces the last characters |
490
|
|
|
* with the ellipsis if the text is longer than length. |
491
|
|
|
* |
492
|
|
|
* ### Options: |
493
|
|
|
* |
494
|
|
|
* - `ellipsis` Will be used as ending and appended to the trimmed string |
495
|
|
|
* - `exact` If false, $text will not be cut mid-word |
496
|
|
|
* - `html` If true, HTML tags would be handled correctly |
497
|
|
|
* |
498
|
|
|
* @param string $text String to truncate. |
499
|
|
|
* @param int $length Length of returned string, including ellipsis. |
500
|
|
|
* @param array $options An array of HTML attributes and options. |
501
|
|
|
* @return string Trimmed string. |
502
|
|
|
* @link http://book.cakephp.org/3.0/en/core-libraries/string.html#truncating-text |
503
|
|
|
*/ |
504
|
|
|
public static function truncate($text, $length = 100, array $options = []) |
505
|
|
|
{ |
506
|
|
|
$default = [ |
507
|
|
|
'ellipsis' => '...', 'exact' => true, 'html' => false |
508
|
|
|
]; |
509
|
|
|
if (!empty($options['html']) && strtolower(mb_internal_encoding()) === 'utf-8') { |
510
|
|
|
$default['ellipsis'] = "\xe2\x80\xa6"; |
511
|
|
|
} |
512
|
|
|
$options += $default; |
513
|
|
|
extract($options); |
514
|
|
|
|
515
|
|
|
if ($html) { |
516
|
|
|
if (mb_strlen(preg_replace('/<.*?>/', '', $text)) <= $length) { |
517
|
|
|
return $text; |
518
|
|
|
} |
519
|
|
|
$totalLength = mb_strlen(strip_tags($ellipsis)); |
520
|
|
|
$openTags = []; |
|
|
|
|
521
|
|
|
$truncate = ''; |
|
|
|
|
522
|
|
|
|
523
|
|
|
preg_match_all('/(<\/?([\w+]+)[^>]*>)?([^<>]*)/', $text, $tags, PREG_SET_ORDER); |
524
|
|
|
foreach ($tags as $tag) { |
|
|
|
|
525
|
|
|
if (!preg_match('/img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param/s', $tag[2])) { |
526
|
|
|
if (preg_match('/<[\w]+[^>]*>/s', $tag[0])) { |
527
|
|
|
array_unshift($openTags, $tag[2]); |
528
|
|
|
} elseif (preg_match('/<\/([\w]+)[^>]*>/s', $tag[0], $closeTag)) { |
529
|
|
|
$pos = array_search($closeTag[1], $openTags); |
530
|
|
|
if ($pos !== false) { |
531
|
|
|
array_splice($openTags, $pos, 1); |
532
|
|
|
} |
533
|
|
|
} |
534
|
|
|
} |
535
|
|
|
$truncate .= $tag[1]; |
536
|
|
|
|
537
|
|
|
$contentLength = mb_strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', ' ', $tag[3])); |
|
|
|
|
538
|
|
|
if ($contentLength + $totalLength > $length) { |
539
|
|
|
$left = $length - $totalLength; |
|
|
|
|
540
|
|
|
$entitiesLength = 0; |
541
|
|
|
if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|&#x[0-9a-f]{1,6};/i', $tag[3], $entities, PREG_OFFSET_CAPTURE)) { |
|
|
|
|
542
|
|
|
foreach ($entities[0] as $entity) { |
543
|
|
|
if ($entity[1] + 1 - $entitiesLength <= $left) { |
544
|
|
|
$left--; |
545
|
|
|
$entitiesLength += mb_strlen($entity[0]); |
546
|
|
|
} else { |
547
|
|
|
break; |
548
|
|
|
} |
549
|
|
|
} |
550
|
|
|
} |
551
|
|
|
|
552
|
|
|
$truncate .= mb_substr($tag[3], 0, $left + $entitiesLength); |
553
|
|
|
break; |
554
|
|
|
} else { |
555
|
|
|
$truncate .= $tag[3]; |
|
|
|
|
556
|
|
|
$totalLength += $contentLength; |
557
|
|
|
} |
558
|
|
|
if ($totalLength >= $length) { |
559
|
|
|
break; |
560
|
|
|
} |
561
|
|
|
} |
562
|
|
|
} else { |
563
|
|
|
if (mb_strlen($text) <= $length) { |
564
|
|
|
return $text; |
565
|
|
|
} |
566
|
|
|
$truncate = mb_substr($text, 0, $length - mb_strlen($ellipsis)); |
567
|
|
|
} |
568
|
|
|
if (!$exact) { |
569
|
|
|
$spacepos = mb_strrpos($truncate, ' '); |
570
|
|
|
if ($html) { |
571
|
|
|
$truncateCheck = mb_substr($truncate, 0, $spacepos); |
572
|
|
|
$lastOpenTag = mb_strrpos($truncateCheck, '<'); |
|
|
|
|
573
|
|
|
$lastCloseTag = mb_strrpos($truncateCheck, '>'); |
|
|
|
|
574
|
|
|
if ($lastOpenTag > $lastCloseTag) { |
575
|
|
|
preg_match_all('/<[\w]+[^>]*>/s', $truncate, $lastTagMatches); |
576
|
|
|
$lastTag = array_pop($lastTagMatches[0]); |
|
|
|
|
577
|
|
|
$spacepos = mb_strrpos($truncate, $lastTag) + mb_strlen($lastTag); |
578
|
|
|
} |
579
|
|
|
$bits = mb_substr($truncate, $spacepos); |
580
|
|
|
preg_match_all('/<\/([a-z]+)>/', $bits, $droppedTags, PREG_SET_ORDER); |
581
|
|
|
if (!empty($droppedTags)) { |
582
|
|
|
if (!empty($openTags)) { |
583
|
|
|
foreach ($droppedTags as $closingTag) { |
584
|
|
|
if (!in_array($closingTag[1], $openTags)) { |
585
|
|
|
array_unshift($openTags, $closingTag[1]); |
586
|
|
|
} |
587
|
|
|
} |
588
|
|
|
} else { |
589
|
|
|
foreach ($droppedTags as $closingTag) { |
590
|
|
|
$openTags[] = $closingTag[1]; |
591
|
|
|
} |
592
|
|
|
} |
593
|
|
|
} |
594
|
|
|
} |
595
|
|
|
$truncate = mb_substr($truncate, 0, $spacepos); |
596
|
|
|
|
597
|
|
|
// If truncate still empty, then we don't need to count ellipsis in the cut. |
598
|
|
|
if (mb_strlen($truncate) === 0) { |
599
|
|
|
$truncate = mb_substr($text, 0, $length); |
600
|
|
|
} |
601
|
|
|
} |
602
|
|
|
|
603
|
|
|
$truncate .= $ellipsis; |
604
|
|
|
|
605
|
|
|
if ($html) { |
606
|
|
|
foreach ($openTags as $tag) { |
607
|
|
|
$truncate .= '</' . $tag . '>'; |
608
|
|
|
} |
609
|
|
|
} |
610
|
|
|
|
611
|
|
|
return $truncate; |
612
|
|
|
} |
613
|
|
|
|
614
|
|
|
/** |
615
|
|
|
* Extracts an excerpt from the text surrounding the phrase with a number of characters on each side |
616
|
|
|
* determined by radius. |
617
|
|
|
* |
618
|
|
|
* @param string $text String to search the phrase in |
619
|
|
|
* @param string $phrase Phrase that will be searched for |
620
|
|
|
* @param int $radius The amount of characters that will be returned on each side of the founded phrase |
621
|
|
|
* @param string $ellipsis Ending that will be appended |
622
|
|
|
* @return string Modified string |
623
|
|
|
* @link http://book.cakephp.org/3.0/en/core-libraries/string.html#extracting-an-excerpt |
624
|
|
|
*/ |
625
|
|
|
public static function excerpt($text, $phrase, $radius = 100, $ellipsis = '...') |
626
|
|
|
{ |
627
|
|
|
if (empty($text) || empty($phrase)) { |
628
|
|
|
return static::truncate($text, $radius * 2, ['ellipsis' => $ellipsis]); |
629
|
|
|
} |
630
|
|
|
|
631
|
|
|
$append = $prepend = $ellipsis; |
632
|
|
|
|
633
|
|
|
$phraseLen = mb_strlen($phrase); |
634
|
|
|
$textLen = mb_strlen($text); |
|
|
|
|
635
|
|
|
|
636
|
|
|
$pos = mb_strpos(mb_strtolower($text), mb_strtolower($phrase)); |
637
|
|
|
if ($pos === false) { |
638
|
|
|
return mb_substr($text, 0, $radius) . $ellipsis; |
639
|
|
|
} |
640
|
|
|
|
641
|
|
|
$startPos = $pos - $radius; |
642
|
|
|
if ($startPos <= 0) { |
643
|
|
|
$startPos = 0; |
644
|
|
|
$prepend = ''; |
|
|
|
|
645
|
|
|
} |
646
|
|
|
|
647
|
|
|
$endPos = $pos + $phraseLen + $radius; |
648
|
|
|
if ($endPos >= $textLen) { |
649
|
|
|
$endPos = $textLen; |
650
|
|
|
$append = ''; |
651
|
|
|
} |
652
|
|
|
|
653
|
|
|
$excerpt = mb_substr($text, $startPos, $endPos - $startPos); |
654
|
|
|
$excerpt = $prepend . $excerpt . $append; |
655
|
|
|
|
656
|
|
|
return $excerpt; |
657
|
|
|
} |
658
|
|
|
|
659
|
|
|
/** |
660
|
|
|
* Creates a comma separated list where the last two items are joined with 'and', forming natural language. |
661
|
|
|
* |
662
|
|
|
* @param array $list The list to be joined. |
663
|
|
|
* @param string $and The word used to join the last and second last items together with. Defaults to 'and'. |
|
|
|
|
664
|
|
|
* @param string $separator The separator used to join all the other items together. Defaults to ', '. |
665
|
|
|
* @return string The glued together string. |
666
|
|
|
* @link http://book.cakephp.org/3.0/en/core-libraries/string.html#converting-an-array-to-sentence-form |
667
|
|
|
*/ |
668
|
|
|
public static function toList(array $list, $and = null, $separator = ', ') |
669
|
|
|
{ |
670
|
|
|
if ($and === null) { |
671
|
|
|
$and = __d('cake', 'and'); |
672
|
|
|
} |
673
|
|
|
if (count($list) > 1) { |
674
|
|
|
return implode($separator, array_slice($list, null, -1)) . ' ' . $and . ' ' . array_pop($list); |
675
|
|
|
} |
676
|
|
|
|
677
|
|
|
return array_pop($list); |
678
|
|
|
} |
679
|
|
|
|
680
|
|
|
/** |
681
|
|
|
* Check if the string contain multibyte characters |
682
|
|
|
* |
683
|
|
|
* @param string $string value to test |
684
|
|
|
* @return bool |
685
|
|
|
*/ |
686
|
|
|
public static function isMultibyte($string) |
687
|
|
|
{ |
688
|
|
|
$length = strlen($string); |
689
|
|
|
|
690
|
|
|
for ($i = 0; $i < $length; $i++) { |
691
|
|
|
$value = ord(($string[$i])); |
692
|
|
|
if ($value > 128) { |
693
|
|
|
return true; |
694
|
|
|
} |
695
|
|
|
} |
696
|
|
|
return false; |
697
|
|
|
} |
698
|
|
|
|
699
|
|
|
/** |
700
|
|
|
* Converts a multibyte character string |
701
|
|
|
* to the decimal value of the character |
702
|
|
|
* |
703
|
|
|
* @param string $string String to convert. |
704
|
|
|
* @return array |
705
|
|
|
*/ |
706
|
|
|
public static function utf8($string) |
707
|
|
|
{ |
708
|
|
|
$map = []; |
709
|
|
|
|
710
|
|
|
$values = []; |
711
|
|
|
$find = 1; |
|
|
|
|
712
|
|
|
$length = strlen($string); |
713
|
|
|
|
714
|
|
|
for ($i = 0; $i < $length; $i++) { |
715
|
|
|
$value = ord($string[$i]); |
716
|
|
|
|
717
|
|
|
if ($value < 128) { |
718
|
|
|
$map[] = $value; |
719
|
|
|
} else { |
720
|
|
|
if (empty($values)) { |
721
|
|
|
$find = ($value < 224) ? 2 : 3; |
722
|
|
|
} |
723
|
|
|
$values[] = $value; |
724
|
|
|
|
725
|
|
|
if (count($values) === $find) { |
726
|
|
|
if ($find == 3) { |
727
|
|
|
$map[] = (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64); |
728
|
|
|
} else { |
729
|
|
|
$map[] = (($values[0] % 32) * 64) + ($values[1] % 64); |
730
|
|
|
} |
731
|
|
|
$values = []; |
732
|
|
|
$find = 1; |
|
|
|
|
733
|
|
|
} |
734
|
|
|
} |
735
|
|
|
} |
736
|
|
|
return $map; |
737
|
|
|
} |
738
|
|
|
|
739
|
|
|
/** |
740
|
|
|
* Converts the decimal value of a multibyte character string |
741
|
|
|
* to a string |
742
|
|
|
* |
743
|
|
|
* @param array $array Array |
744
|
|
|
* @return string |
745
|
|
|
*/ |
746
|
|
|
public static function ascii(array $array) |
747
|
|
|
{ |
748
|
|
|
$ascii = ''; |
749
|
|
|
|
750
|
|
|
foreach ($array as $utf8) { |
751
|
|
|
if ($utf8 < 128) { |
752
|
|
|
$ascii .= chr($utf8); |
753
|
|
|
} elseif ($utf8 < 2048) { |
754
|
|
|
$ascii .= chr(192 + (($utf8 - ($utf8 % 64)) / 64)); |
755
|
|
|
$ascii .= chr(128 + ($utf8 % 64)); |
756
|
|
|
} else { |
757
|
|
|
$ascii .= chr(224 + (($utf8 - ($utf8 % 4096)) / 4096)); |
758
|
|
|
$ascii .= chr(128 + ((($utf8 % 4096) - ($utf8 % 64)) / 64)); |
759
|
|
|
$ascii .= chr(128 + ($utf8 % 64)); |
760
|
|
|
} |
761
|
|
|
} |
762
|
|
|
return $ascii; |
763
|
|
|
} |
764
|
|
|
|
765
|
|
|
/** |
766
|
|
|
* Converts filesize from human readable string to bytes |
767
|
|
|
* |
768
|
|
|
* @param string $size Size in human readable string like '5MB', '5M', '500B', '50kb' etc. |
769
|
|
|
* @param mixed $default Value to be returned when invalid size was used, for example 'Unknown type' |
770
|
|
|
* @return mixed Number of bytes as integer on success, `$default` on failure if not false |
771
|
|
|
* @throws \InvalidArgumentException On invalid Unit type. |
772
|
|
|
* @link http://book.cakephp.org/3.0/en/core-libraries/helpers/text.html |
773
|
|
|
*/ |
774
|
|
|
public static function parseFileSize($size, $default = false) |
775
|
|
|
{ |
776
|
|
|
if (ctype_digit($size)) { |
777
|
|
|
return (int)$size; |
778
|
|
|
} |
779
|
|
|
$size = strtoupper($size); |
780
|
|
|
|
781
|
|
|
$l = -2; |
|
|
|
|
782
|
|
|
$i = array_search(substr($size, -2), ['KB', 'MB', 'GB', 'TB', 'PB']); |
783
|
|
|
if ($i === false) { |
784
|
|
|
$l = -1; |
785
|
|
|
$i = array_search(substr($size, -1), ['K', 'M', 'G', 'T', 'P']); |
786
|
|
|
} |
787
|
|
|
if ($i !== false) { |
788
|
|
|
$size = substr($size, 0, $l); |
789
|
|
|
return $size * pow(1024, $i + 1); |
790
|
|
|
} |
791
|
|
|
|
792
|
|
|
if (substr($size, -1) === 'B' && ctype_digit(substr($size, 0, -1))) { |
793
|
|
|
$size = substr($size, 0, -1); |
794
|
|
|
return (int)$size; |
795
|
|
|
} |
796
|
|
|
|
797
|
|
|
if ($default !== false) { |
798
|
|
|
return $default; |
799
|
|
|
} |
800
|
|
|
throw new InvalidArgumentException('No unit type.'); |
801
|
|
|
} |
802
|
|
|
} |
|
|
|
|
803
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.