| @@ 104-125 (lines=22) @@ | ||
| 101 | * @return string |
|
| 102 | * @package utf8 |
|
| 103 | */ |
|
| 104 | function utf8_bad_strip($str) { |
|
| 105 | $UTF8_BAD = |
|
| 106 | '([\x00-\x7F]'. # ASCII (including control chars) |
|
| 107 | '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte |
|
| 108 | '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs |
|
| 109 | '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte |
|
| 110 | '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates |
|
| 111 | '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 |
|
| 112 | '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 |
|
| 113 | '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 |
|
| 114 | '|(.{1}))'; # invalid byte |
|
| 115 | ob_start(); |
|
| 116 | while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { |
|
| 117 | if ( !isset($matches[2])) { |
|
| 118 | echo $matches[0]; |
|
| 119 | } |
|
| 120 | $str = substr($str,strlen($matches[0])); |
|
| 121 | } |
|
| 122 | $result = ob_get_contents(); |
|
| 123 | ob_end_clean(); |
|
| 124 | return $result; |
|
| 125 | } |
|
| 126 | ||
| 127 | //-------------------------------------------------------------------- |
|
| 128 | /** |
|
| @@ 140-163 (lines=24) @@ | ||
| 137 | * @return string |
|
| 138 | * @package utf8 |
|
| 139 | */ |
|
| 140 | function utf8_bad_replace($str, $replace = '?') { |
|
| 141 | $UTF8_BAD = |
|
| 142 | '([\x00-\x7F]'. # ASCII (including control chars) |
|
| 143 | '|[\xC2-\xDF][\x80-\xBF]'. # non-overlong 2-byte |
|
| 144 | '|\xE0[\xA0-\xBF][\x80-\xBF]'. # excluding overlongs |
|
| 145 | '|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}'. # straight 3-byte |
|
| 146 | '|\xED[\x80-\x9F][\x80-\xBF]'. # excluding surrogates |
|
| 147 | '|\xF0[\x90-\xBF][\x80-\xBF]{2}'. # planes 1-3 |
|
| 148 | '|[\xF1-\xF3][\x80-\xBF]{3}'. # planes 4-15 |
|
| 149 | '|\xF4[\x80-\x8F][\x80-\xBF]{2}'. # plane 16 |
|
| 150 | '|(.{1}))'; # invalid byte |
|
| 151 | ob_start(); |
|
| 152 | while (preg_match('/'.$UTF8_BAD.'/S', $str, $matches)) { |
|
| 153 | if ( !isset($matches[2])) { |
|
| 154 | echo $matches[0]; |
|
| 155 | } else { |
|
| 156 | echo $replace; |
|
| 157 | } |
|
| 158 | $str = substr($str,strlen($matches[0])); |
|
| 159 | } |
|
| 160 | $result = ob_get_contents(); |
|
| 161 | ob_end_clean(); |
|
| 162 | return $result; |
|
| 163 | } |
|
| 164 | ||
| 165 | //-------------------------------------------------------------------- |
|
| 166 | /** |
|