|
1
|
|
|
<?php defined('SYSPATH') or die('No direct access allowed.'); |
|
2
|
|
|
/** |
|
3
|
|
|
* Text helper class. |
|
4
|
|
|
* |
|
5
|
|
|
* $Id: text.php 3769 2008-12-15 00:48:56Z zombor $ |
|
6
|
|
|
* |
|
7
|
|
|
* @package Core |
|
8
|
|
|
* @author Kohana Team |
|
9
|
|
|
* @copyright (c) 2007-2008 Kohana Team |
|
10
|
|
|
* @license http://kohanaphp.com/license.html |
|
11
|
|
|
*/ |
|
12
|
|
|
class text_Core |
|
13
|
|
|
{ |
|
14
|
|
|
|
|
15
|
|
|
/** |
|
16
|
|
|
* Limits a phrase to a given number of words. |
|
17
|
|
|
* |
|
18
|
|
|
* @param string phrase to limit words of |
|
19
|
|
|
* @param integer number of words to limit to |
|
20
|
|
|
* @param string end character or entity |
|
21
|
|
|
* @return string |
|
22
|
|
|
*/ |
|
23
|
|
|
public static function limit_words($str, $limit = 100, $end_char = null) |
|
24
|
|
|
{ |
|
25
|
|
|
$limit = (int) $limit; |
|
26
|
|
|
$end_char = ($end_char === null) ? '…' : $end_char; |
|
27
|
|
|
|
|
28
|
|
|
if (trim($str) === '') { |
|
29
|
|
|
return $str; |
|
30
|
|
|
} |
|
31
|
|
|
|
|
32
|
|
|
if ($limit <= 0) { |
|
33
|
|
|
return $end_char; |
|
34
|
|
|
} |
|
35
|
|
|
|
|
36
|
|
|
preg_match('/^\s*+(?:\S++\s*+){1,'.$limit.'}/u', $str, $matches); |
|
37
|
|
|
|
|
38
|
|
|
// Only attach the end character if the matched string is shorter |
|
39
|
|
|
// than the starting string. |
|
40
|
|
|
return rtrim($matches[0]).(strlen($matches[0]) === strlen($str) ? '' : $end_char); |
|
41
|
|
|
} |
|
42
|
|
|
|
|
43
|
|
|
/** |
|
44
|
|
|
* Limits a phrase to a given number of characters. |
|
45
|
|
|
* |
|
46
|
|
|
* @param string phrase to limit characters of |
|
47
|
|
|
* @param integer number of characters to limit to |
|
48
|
|
|
* @param string end character or entity |
|
49
|
|
|
* @param boolean enable or disable the preservation of words while limiting |
|
50
|
|
|
* @return string |
|
51
|
|
|
*/ |
|
52
|
|
|
public static function limit_chars($str, $limit = 100, $end_char = null, $preserve_words = false) |
|
53
|
|
|
{ |
|
54
|
|
|
$end_char = ($end_char === null) ? '…' : $end_char; |
|
55
|
|
|
|
|
56
|
|
|
$limit = (int) $limit; |
|
57
|
|
|
|
|
58
|
|
|
if (trim($str) === '' or utf8::strlen($str) <= $limit) { |
|
59
|
|
|
return $str; |
|
60
|
|
|
} |
|
61
|
|
|
|
|
62
|
|
|
if ($limit <= 0) { |
|
63
|
|
|
return $end_char; |
|
64
|
|
|
} |
|
65
|
|
|
|
|
66
|
|
|
if ($preserve_words == false) { |
|
|
|
|
|
|
67
|
|
|
return rtrim(utf8::substr($str, 0, $limit)).$end_char; |
|
68
|
|
|
} |
|
69
|
|
|
|
|
70
|
|
|
preg_match('/^.{'.($limit - 1).'}\S*/us', $str, $matches); |
|
71
|
|
|
|
|
72
|
|
|
return rtrim($matches[0]).(strlen($matches[0]) == strlen($str) ? '' : $end_char); |
|
73
|
|
|
} |
|
74
|
|
|
|
|
75
|
|
|
/** |
|
76
|
|
|
* Alternates between two or more strings. |
|
77
|
|
|
* |
|
78
|
|
|
* @param string strings to alternate between |
|
79
|
|
|
* @return string |
|
80
|
|
|
*/ |
|
81
|
|
|
public static function alternate() |
|
82
|
|
|
{ |
|
83
|
|
|
static $i; |
|
84
|
|
|
|
|
85
|
|
|
if (func_num_args() === 0) { |
|
86
|
|
|
$i = 0; |
|
87
|
|
|
return ''; |
|
88
|
|
|
} |
|
89
|
|
|
|
|
90
|
|
|
$args = func_get_args(); |
|
91
|
|
|
return $args[($i++ % count($args))]; |
|
92
|
|
|
} |
|
93
|
|
|
|
|
94
|
|
|
/** |
|
95
|
|
|
* Generates a random string of a given type and length. |
|
96
|
|
|
* |
|
97
|
|
|
* @param string a type of pool, or a string of characters to use as the pool |
|
98
|
|
|
* @param integer length of string to return |
|
99
|
|
|
* @return string |
|
100
|
|
|
* |
|
101
|
|
|
* @tutorial alnum alpha-numeric characters |
|
102
|
|
|
* @tutorial alpha alphabetical characters |
|
103
|
|
|
* @tutorial hexdec hexadecimal characters, 0-9 plus a-f |
|
104
|
|
|
* @tutorial numeric digit characters, 0-9 |
|
105
|
|
|
* @tutorial nozero digit characters, 1-9 |
|
106
|
|
|
* @tutorial distinct clearly distinct alpha-numeric characters |
|
107
|
|
|
*/ |
|
108
|
|
|
public static function random($type = 'alnum', $length = 8) |
|
109
|
|
|
{ |
|
110
|
|
|
$utf8 = false; |
|
111
|
|
|
|
|
112
|
|
|
switch ($type) { |
|
113
|
|
|
case 'alnum': |
|
114
|
|
|
$pool = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
|
115
|
|
|
break; |
|
116
|
|
|
case 'alpha': |
|
117
|
|
|
$pool = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'; |
|
118
|
|
|
break; |
|
119
|
|
|
case 'hexdec': |
|
120
|
|
|
$pool = '0123456789abcdef'; |
|
121
|
|
|
break; |
|
122
|
|
|
case 'numeric': |
|
123
|
|
|
$pool = '0123456789'; |
|
124
|
|
|
break; |
|
125
|
|
|
case 'nozero': |
|
126
|
|
|
$pool = '123456789'; |
|
127
|
|
|
break; |
|
128
|
|
|
case 'distinct': |
|
129
|
|
|
$pool = '2345679ACDEFHJKLMNPRSTUVWXYZ'; |
|
130
|
|
|
break; |
|
131
|
|
|
default: |
|
132
|
|
|
$pool = (string) $type; |
|
133
|
|
|
$utf8 = ! utf8::is_ascii($pool); |
|
134
|
|
|
break; |
|
135
|
|
|
} |
|
136
|
|
|
|
|
137
|
|
|
// Split the pool into an array of characters |
|
138
|
|
|
$pool = ($utf8 === true) ? utf8::str_split($pool, 1) : str_split($pool, 1); |
|
139
|
|
|
|
|
140
|
|
|
// Largest pool key |
|
141
|
|
|
$max = count($pool) - 1; |
|
142
|
|
|
|
|
143
|
|
|
$str = ''; |
|
144
|
|
|
for ($i = 0; $i < $length; $i++) { |
|
145
|
|
|
// Select a random character from the pool and add it to the string |
|
146
|
|
|
$str .= $pool[mt_rand(0, $max)]; |
|
147
|
|
|
} |
|
148
|
|
|
|
|
149
|
|
|
// Make sure alnum strings contain at least one letter and one digit |
|
150
|
|
|
if ($type === 'alnum' and $length > 1) { |
|
151
|
|
|
if (ctype_alpha($str)) { |
|
152
|
|
|
// Add a random digit |
|
153
|
|
|
$str[mt_rand(0, $length - 1)] = chr(mt_rand(48, 57)); |
|
154
|
|
|
} elseif (ctype_digit($str)) { |
|
155
|
|
|
// Add a random letter |
|
156
|
|
|
$str[mt_rand(0, $length - 1)] = chr(mt_rand(65, 90)); |
|
157
|
|
|
} |
|
158
|
|
|
} |
|
159
|
|
|
|
|
160
|
|
|
return $str; |
|
161
|
|
|
} |
|
162
|
|
|
|
|
163
|
|
|
/** |
|
164
|
|
|
* Reduces multiple slashes in a string to single slashes. |
|
165
|
|
|
* |
|
166
|
|
|
* @param string string to reduce slashes of |
|
167
|
|
|
* @return string |
|
168
|
|
|
*/ |
|
169
|
|
|
public static function reduce_slashes($str) |
|
170
|
|
|
{ |
|
171
|
|
|
return preg_replace('#(?<!:)//+#', '/', $str); |
|
172
|
|
|
} |
|
173
|
|
|
|
|
174
|
|
|
/** |
|
175
|
|
|
* Replaces the given words with a string. |
|
176
|
|
|
* |
|
177
|
|
|
* @param string phrase to replace words in |
|
178
|
|
|
* @param array words to replace |
|
179
|
|
|
* @param string replacement string |
|
180
|
|
|
* @param boolean replace words across word boundries (space, period, etc) |
|
181
|
|
|
* @return string |
|
182
|
|
|
*/ |
|
183
|
|
|
public static function censor($str, $badwords, $replacement = '#', $replace_partial_words = false) |
|
|
|
|
|
|
184
|
|
|
{ |
|
185
|
|
|
foreach ((array) $badwords as $key => $badword) { |
|
186
|
|
|
$badwords[$key] = str_replace('\*', '\S*?', preg_quote((string) $badword)); |
|
187
|
|
|
} |
|
188
|
|
|
|
|
189
|
|
|
$regex = '('.implode('|', $badwords).')'; |
|
190
|
|
|
|
|
191
|
|
|
if ($replace_partial_words == true) { |
|
|
|
|
|
|
192
|
|
|
// Just using \b isn't sufficient when we need to replace a badword that already contains word boundaries itself |
|
193
|
|
|
$regex = '(?<=\b|\s|^)'.$regex.'(?=\b|\s|$)'; |
|
194
|
|
|
} |
|
195
|
|
|
|
|
196
|
|
|
$regex = '!'.$regex.'!ui'; |
|
197
|
|
|
|
|
198
|
|
|
if (utf8::strlen($replacement) == 1) { |
|
199
|
|
|
$regex .= 'e'; |
|
200
|
|
|
return preg_replace($regex, 'str_repeat($replacement, utf8::strlen(\'$1\'))', $str); |
|
201
|
|
|
} |
|
202
|
|
|
|
|
203
|
|
|
return preg_replace($regex, $replacement, $str); |
|
204
|
|
|
} |
|
205
|
|
|
|
|
206
|
|
|
/** |
|
207
|
|
|
* Finds the text that is similar between a set of words. |
|
208
|
|
|
* |
|
209
|
|
|
* @param array words to find similar text of |
|
210
|
|
|
* @return string |
|
211
|
|
|
*/ |
|
212
|
|
|
public static function similar(array $words) |
|
213
|
|
|
{ |
|
214
|
|
|
// First word is the word to match against |
|
215
|
|
|
$word = current($words); |
|
216
|
|
|
|
|
217
|
|
|
for ($i = 0, $max = strlen($word); $i < $max; ++$i) { |
|
218
|
|
|
foreach ($words as $w) { |
|
219
|
|
|
// Once a difference is found, break out of the loops |
|
220
|
|
|
if (! isset($w[$i]) or $w[$i] !== $word[$i]) { |
|
221
|
|
|
break 2; |
|
222
|
|
|
} |
|
223
|
|
|
} |
|
224
|
|
|
} |
|
225
|
|
|
|
|
226
|
|
|
// Return the similar text |
|
227
|
|
|
return substr($word, 0, $i); |
|
228
|
|
|
} |
|
229
|
|
|
|
|
230
|
|
|
/** |
|
231
|
|
|
* Converts text email addresses and anchors into links. |
|
232
|
|
|
* |
|
233
|
|
|
* @param string text to auto link |
|
234
|
|
|
* @return string |
|
235
|
|
|
*/ |
|
236
|
|
|
public static function auto_link($text) |
|
237
|
|
|
{ |
|
238
|
|
|
// Auto link emails first to prevent problems with "[email protected]" |
|
239
|
|
|
return text::auto_link_urls(text::auto_link_emails($text)); |
|
240
|
|
|
} |
|
241
|
|
|
|
|
242
|
|
|
/** |
|
243
|
|
|
* Converts text anchors into links. |
|
244
|
|
|
* |
|
245
|
|
|
* @param string text to auto link |
|
246
|
|
|
* @return string |
|
247
|
|
|
*/ |
|
248
|
|
|
public static function auto_link_urls($text) |
|
249
|
|
|
{ |
|
250
|
|
|
// Finds all http/https/ftp/ftps links that are not part of an existing html anchor |
|
251
|
|
View Code Duplication |
if (preg_match_all('~\b(?<!href="|">)(?:ht|f)tps?://\S+(?:/|\b)~i', $text, $matches)) { |
|
|
|
|
|
|
252
|
|
|
foreach ($matches[0] as $match) { |
|
253
|
|
|
// Replace each link with an anchor |
|
254
|
|
|
$text = str_replace($match, html::anchor($match), $text); |
|
255
|
|
|
} |
|
256
|
|
|
} |
|
257
|
|
|
|
|
258
|
|
|
// Find all naked www.links.com (without http://) |
|
259
|
|
View Code Duplication |
if (preg_match_all('~\b(?<!://)www(?:\.[a-z0-9][-a-z0-9]*+)+\.[a-z]{2,6}\b~i', $text, $matches)) { |
|
|
|
|
|
|
260
|
|
|
foreach ($matches[0] as $match) { |
|
261
|
|
|
// Replace each link with an anchor |
|
262
|
|
|
$text = str_replace($match, html::anchor('http://'.$match, $match), $text); |
|
263
|
|
|
} |
|
264
|
|
|
} |
|
265
|
|
|
|
|
266
|
|
|
return $text; |
|
267
|
|
|
} |
|
268
|
|
|
|
|
269
|
|
|
/** |
|
270
|
|
|
* Converts text email addresses into links. |
|
271
|
|
|
* |
|
272
|
|
|
* @param string text to auto link |
|
273
|
|
|
* @return string |
|
274
|
|
|
*/ |
|
275
|
|
|
public static function auto_link_emails($text) |
|
276
|
|
|
{ |
|
277
|
|
|
// Finds all email addresses that are not part of an existing html mailto anchor |
|
278
|
|
|
// Note: The "58;" negative lookbehind prevents matching of existing encoded html mailto anchors |
|
279
|
|
|
// The html entity for a colon (:) is : or : or : etc. |
|
280
|
|
View Code Duplication |
if (preg_match_all('~\b(?<!href="mailto:|">|58;)(?!\.)[-+_a-z0-9.]++(?<!\.)@(?![-.])[-a-z0-9.]+(?<!\.)\.[a-z]{2,6}\b~i', $text, $matches)) { |
|
|
|
|
|
|
281
|
|
|
foreach ($matches[0] as $match) { |
|
282
|
|
|
// Replace each email with an encoded mailto |
|
283
|
|
|
$text = str_replace($match, html::mailto($match), $text); |
|
284
|
|
|
} |
|
285
|
|
|
} |
|
286
|
|
|
|
|
287
|
|
|
return $text; |
|
288
|
|
|
} |
|
289
|
|
|
|
|
290
|
|
|
/** |
|
291
|
|
|
* Automatically applies <p> and <br /> markup to text. Basically nl2br() on steroids. |
|
292
|
|
|
* |
|
293
|
|
|
* @param string subject |
|
294
|
|
|
* @return string |
|
295
|
|
|
*/ |
|
296
|
|
|
public static function auto_p($str) |
|
297
|
|
|
{ |
|
298
|
|
|
// Trim whitespace |
|
299
|
|
|
if (($str = trim($str)) === '') { |
|
300
|
|
|
return ''; |
|
301
|
|
|
} |
|
302
|
|
|
|
|
303
|
|
|
// Standardize newlines |
|
304
|
|
|
$str = str_replace(array("\r\n", "\r"), "\n", $str); |
|
305
|
|
|
|
|
306
|
|
|
// Trim whitespace on each line |
|
307
|
|
|
$str = preg_replace('~^[ \t]+~m', '', $str); |
|
308
|
|
|
$str = preg_replace('~[ \t]+$~m', '', $str); |
|
309
|
|
|
|
|
310
|
|
|
// The following regexes only need to be executed if the string contains html |
|
311
|
|
|
if ($html_found = (strpos($str, '<') !== false)) { |
|
312
|
|
|
// Elements that should not be surrounded by p tags |
|
313
|
|
|
$no_p = '(?:p|div|h[1-6r]|ul|ol|li|blockquote|d[dlt]|pre|t[dhr]|t(?:able|body|foot|head)|c(?:aption|olgroup)|form|s(?:elect|tyle)|a(?:ddress|rea)|ma(?:p|th))'; |
|
314
|
|
|
|
|
315
|
|
|
// Put at least two linebreaks before and after $no_p elements |
|
316
|
|
|
$str = preg_replace('~^<'.$no_p.'[^>]*+>~im', "\n$0", $str); |
|
317
|
|
|
$str = preg_replace('~</'.$no_p.'\s*+>$~im', "$0\n", $str); |
|
318
|
|
|
} |
|
319
|
|
|
|
|
320
|
|
|
// Do the <p> magic! |
|
321
|
|
|
$str = '<p>'.trim($str).'</p>'; |
|
322
|
|
|
$str = preg_replace('~\n{2,}~', "</p>\n\n<p>", $str); |
|
323
|
|
|
|
|
324
|
|
|
// The following regexes only need to be executed if the string contains html |
|
325
|
|
|
if ($html_found !== false) { |
|
326
|
|
|
// Remove p tags around $no_p elements |
|
327
|
|
|
$str = preg_replace('~<p>(?=</?'.$no_p.'[^>]*+>)~i', '', $str); |
|
|
|
|
|
|
328
|
|
|
$str = preg_replace('~(</?'.$no_p.'[^>]*+>)</p>~i', '$1', $str); |
|
329
|
|
|
} |
|
330
|
|
|
|
|
331
|
|
|
// Convert single linebreaks to <br /> |
|
332
|
|
|
$str = preg_replace('~(?<!\n)\n(?!\n)~', "<br />\n", $str); |
|
333
|
|
|
|
|
334
|
|
|
return $str; |
|
335
|
|
|
} |
|
336
|
|
|
|
|
337
|
|
|
/** |
|
338
|
|
|
* Returns human readable sizes. |
|
339
|
|
|
* @see Based on original functions written by: |
|
340
|
|
|
* @see Aidan Lister: http://aidanlister.com/repos/v/function.size_readable.php |
|
341
|
|
|
* @see Quentin Zervaas: http://www.phpriot.com/d/code/strings/filesize-format/ |
|
342
|
|
|
* |
|
343
|
|
|
* @param integer size in bytes |
|
344
|
|
|
* @param string a definitive unit |
|
345
|
|
|
* @param string the return string format |
|
346
|
|
|
* @param boolean whether to use SI prefixes or IEC |
|
347
|
|
|
* @return string |
|
348
|
|
|
*/ |
|
349
|
|
|
public static function bytes($bytes, $force_unit = null, $format = null, $si = true) |
|
|
|
|
|
|
350
|
|
|
{ |
|
351
|
|
|
// Format string |
|
352
|
|
|
$format = ($format === null) ? '%01.2f %s' : (string) $format; |
|
353
|
|
|
|
|
354
|
|
|
// IEC prefixes (binary) |
|
355
|
|
|
if ($si == false or strpos($force_unit, 'i') !== false) { |
|
|
|
|
|
|
356
|
|
|
$units = array('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB'); |
|
357
|
|
|
$mod = 1024; |
|
358
|
|
|
} |
|
359
|
|
|
// SI prefixes (decimal) |
|
360
|
|
|
else { |
|
361
|
|
|
$units = array('B', 'kB', 'MB', 'GB', 'TB', 'PB'); |
|
362
|
|
|
$mod = 1000; |
|
363
|
|
|
} |
|
364
|
|
|
|
|
365
|
|
|
// Determine unit to use |
|
366
|
|
|
if (($power = array_search((string) $force_unit, $units)) === false) { |
|
367
|
|
|
$power = ($bytes > 0) ? floor(log($bytes, $mod)) : 0; |
|
368
|
|
|
} |
|
369
|
|
|
|
|
370
|
|
|
return sprintf($format, $bytes / pow($mod, $power), $units[$power]); |
|
371
|
|
|
} |
|
372
|
|
|
|
|
373
|
|
|
/** |
|
374
|
|
|
* Prevents widow words by inserting a non-breaking space between the last two words. |
|
375
|
|
|
* @see http://www.shauninman.com/archive/2006/08/22/widont_wordpress_plugin |
|
376
|
|
|
* |
|
377
|
|
|
* @param string string to remove widows from |
|
378
|
|
|
* @return string |
|
379
|
|
|
*/ |
|
380
|
|
|
public static function widont($str) |
|
381
|
|
|
{ |
|
382
|
|
|
$str = rtrim($str); |
|
383
|
|
|
$space = strrpos($str, ' '); |
|
384
|
|
|
|
|
385
|
|
|
if ($space !== false) { |
|
386
|
|
|
$str = substr($str, 0, $space).' '.substr($str, $space + 1); |
|
387
|
|
|
} |
|
388
|
|
|
|
|
389
|
|
|
return $str; |
|
390
|
|
|
} |
|
391
|
|
|
} // End text |
|
392
|
|
|
|
When comparing two booleans, it is generally considered safer to use the strict comparison operator.