@@ 104-125 (lines=22) @@ | ||
101 | * @return string |
|
102 | * @package utf8 |
|
103 | */ |
|
104 | function utf8_bad_strip($str) { |
|
105 | $UTF8_BAD = |
|
106 | '([\x00-\x7F]'. # ASCII (including control chars) |
|
107 | '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte |
|
108 | '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs |
|
109 | '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte |
|
110 | '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates |
|
111 | '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 |
|
112 | '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 |
|
113 | '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 |
|
114 | '|(.{1}))'; # invalid byte |
|
115 | ob_start(); |
|
116 | while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { |
|
117 | if ( !isset($matches[2])) { |
|
118 | echo $matches[0]; |
|
119 | } |
|
120 | $str = substr($str,strlen($matches[0])); |
|
121 | } |
|
122 | $result = ob_get_contents(); |
|
123 | ob_end_clean(); |
|
124 | return $result; |
|
125 | } |
|
126 | ||
127 | //-------------------------------------------------------------------- |
|
128 | /** |
|
@@ 140-163 (lines=24) @@ | ||
137 | * @return string |
|
138 | * @package utf8 |
|
139 | */ |
|
140 | function utf8_bad_replace($str, $replace = '?') { |
|
141 | $UTF8_BAD = |
|
142 | '([\x00-\x7F]'. # ASCII (including control chars) |
|
143 | '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte |
|
144 | '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs |
|
145 | '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte |
|
146 | '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates |
|
147 | '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 |
|
148 | '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 |
|
149 | '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 |
|
150 | '|(.{1}))'; # invalid byte |
|
151 | ob_start(); |
|
152 | while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { |
|
153 | if ( !isset($matches[2])) { |
|
154 | echo $matches[0]; |
|
155 | } else { |
|
156 | echo $replace; |
|
157 | } |
|
158 | $str = substr($str,strlen($matches[0])); |
|
159 | } |
|
160 | $result = ob_get_contents(); |
|
161 | ob_end_clean(); |
|
162 | return $result; |
|
163 | } |
|
164 | ||
165 | //-------------------------------------------------------------------- |
|
166 | /** |