1
|
|
|
<?php |
2
|
|
|
/** |
3
|
|
|
* @package utf8 |
4
|
|
|
*/ |
5
|
|
|
|
6
|
|
|
/** |
7
|
|
|
* Define UTF8_CORE as required |
8
|
|
|
*/ |
9
|
|
|
if ( !defined('UTF8_CORE') ) { |
10
|
|
|
define('UTF8_CORE',TRUE); |
11
|
|
|
} |
12
|
|
|
|
13
|
|
|
//-------------------------------------------------------------------- |
14
|
|
|
/** |
15
|
|
|
* Wrapper round mb_strlen |
16
|
|
|
* Assumes you have mb_internal_encoding to UTF-8 already |
17
|
|
|
* Note: this function does not count bad bytes in the string - these |
18
|
|
|
* are simply ignored |
19
|
|
|
* @param string UTF-8 string |
20
|
|
|
* @return int number of UTF-8 characters in string |
21
|
|
|
* @package utf8 |
22
|
|
|
*/ |
23
|
|
|
function utf8_strlen($str){ |
24
|
|
|
return mb_strlen($str); |
25
|
|
|
} |
26
|
|
|
|
27
|
|
|
|
28
|
|
|
//-------------------------------------------------------------------- |
29
|
|
|
/** |
30
|
|
|
* Assumes mbstring internal encoding is set to UTF-8 |
31
|
|
|
* Wrapper around mb_strpos |
32
|
|
|
* Find position of first occurrence of a string |
33
|
|
|
* @param string haystack |
34
|
|
|
* @param string needle (you should validate this with utf8_is_valid) |
35
|
|
|
* @param integer offset in characters (from left) |
36
|
|
|
* @return mixed integer position or FALSE on failure |
37
|
|
|
* @package utf8 |
38
|
|
|
*/ |
39
|
|
View Code Duplication |
function utf8_strpos($str, $search, $offset = FALSE){ |
|
|
|
|
40
|
|
|
if ( $offset === FALSE ) { |
41
|
|
|
return mb_strpos($str, $search); |
42
|
|
|
} else { |
43
|
|
|
return mb_strpos($str, $search, $offset); |
44
|
|
|
} |
45
|
|
|
} |
46
|
|
|
|
47
|
|
|
//-------------------------------------------------------------------- |
48
|
|
|
/** |
49
|
|
|
* Assumes mbstring internal encoding is set to UTF-8 |
50
|
|
|
* Wrapper around mb_strrpos |
51
|
|
|
* Find position of last occurrence of a char in a string |
52
|
|
|
* @param string haystack |
53
|
|
|
* @param string needle (you should validate this with utf8_is_valid) |
54
|
|
|
* @param integer (optional) offset (from left) |
55
|
|
|
* @return mixed integer position or FALSE on failure |
56
|
|
|
* @package utf8 |
57
|
|
|
*/ |
58
|
|
|
function utf8_strrpos($str, $search, $offset = FALSE){ |
59
|
|
|
if ( $offset === FALSE ) { |
60
|
|
|
# Emulate behaviour of strrpos rather than raising warning |
61
|
|
|
if ( empty($str) ) { |
62
|
|
|
return FALSE; |
63
|
|
|
} |
64
|
|
|
return mb_strrpos($str, $search); |
65
|
|
View Code Duplication |
} else { |
|
|
|
|
66
|
|
|
if ( !is_int($offset) ) { |
67
|
|
|
trigger_error('utf8_strrpos expects parameter 3 to be long',E_USER_WARNING); |
68
|
|
|
return FALSE; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
$str = mb_substr($str, $offset); |
72
|
|
|
|
73
|
|
|
if ( FALSE !== ( $pos = mb_strrpos($str, $search) ) ) { |
74
|
|
|
return $pos + $offset; |
75
|
|
|
} |
76
|
|
|
|
77
|
|
|
return FALSE; |
78
|
|
|
} |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
//-------------------------------------------------------------------- |
82
|
|
|
/** |
83
|
|
|
* Assumes mbstring internal encoding is set to UTF-8 |
84
|
|
|
* Wrapper around mb_substr |
85
|
|
|
* Return part of a string given character offset (and optionally length) |
86
|
|
|
* @param string |
87
|
|
|
* @param integer number of UTF-8 characters offset (from left) |
88
|
|
|
* @param integer (optional) length in UTF-8 characters from offset |
89
|
|
|
* @return mixed string or FALSE if failure |
90
|
|
|
* @package utf8 |
91
|
|
|
*/ |
92
|
|
View Code Duplication |
function utf8_substr($str, $offset, $length = FALSE){ |
|
|
|
|
93
|
|
|
if ( $length === FALSE ) { |
94
|
|
|
return mb_substr($str, $offset); |
95
|
|
|
} else { |
96
|
|
|
return mb_substr($str, $offset, $length); |
97
|
|
|
} |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
//-------------------------------------------------------------------- |
101
|
|
|
/** |
102
|
|
|
* Assumes mbstring internal encoding is set to UTF-8 |
103
|
|
|
* Wrapper around mb_strtolower |
104
|
|
|
* Make a string lowercase |
105
|
|
|
* Note: The concept of a characters "case" only exists is some alphabets |
106
|
|
|
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
107
|
|
|
* not exist in the Chinese alphabet, for example. See Unicode Standard |
108
|
|
|
* Annex #21: Case Mappings |
109
|
|
|
* @param string |
110
|
|
|
* @return mixed either string in lowercase or FALSE is UTF-8 invalid |
111
|
|
|
* @package utf8 |
112
|
|
|
*/ |
113
|
|
|
function utf8_strtolower($str){ |
114
|
|
|
return mb_strtolower($str); |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
//-------------------------------------------------------------------- |
118
|
|
|
/** |
119
|
|
|
* Assumes mbstring internal encoding is set to UTF-8 |
120
|
|
|
* Wrapper around mb_strtoupper |
121
|
|
|
* Make a string uppercase |
122
|
|
|
* Note: The concept of a characters "case" only exists is some alphabets |
123
|
|
|
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
124
|
|
|
* not exist in the Chinese alphabet, for example. See Unicode Standard |
125
|
|
|
* Annex #21: Case Mappings |
126
|
|
|
* @param string |
127
|
|
|
* @return mixed either string in lowercase or FALSE is UTF-8 invalid |
128
|
|
|
* @package utf8 |
129
|
|
|
*/ |
130
|
|
|
function utf8_strtoupper($str){ |
131
|
|
|
return mb_strtoupper($str); |
132
|
|
|
} |
133
|
|
|
|
Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.
You can also find more detailed suggestions in the “Code” section of your repository.