|
@@ 1312-1317 (lines=6) @@
|
| 1309 |
|
if (function_exists('get_last_retrieve_url_contents_content_type')) { |
| 1310 |
|
$contentTypeHeader = get_last_retrieve_url_contents_content_type(); |
| 1311 |
|
$success = preg_match('/charset=(.+)/', $contentTypeHeader, $matches); |
| 1312 |
|
if ($success) { |
| 1313 |
|
$charset = $matches[1]; |
| 1314 |
|
if (is_object($debug_object)) { |
| 1315 |
|
$debug_object->debugLog(2, 'header content-type found charset of: ' . $charset); |
| 1316 |
|
} |
| 1317 |
|
} |
| 1318 |
|
} |
| 1319 |
|
|
| 1320 |
|
if (empty($charset)) { |
|
@@ 1330-1338 (lines=9) @@
|
| 1327 |
|
|
| 1328 |
|
if (!empty($fullvalue)) { |
| 1329 |
|
$success = preg_match('/charset=(.+)/', $fullvalue, $matches); |
| 1330 |
|
if ($success) { |
| 1331 |
|
$charset = $matches[1]; |
| 1332 |
|
} else { |
| 1333 |
|
// If there is a meta tag, and they don't specify the character set, research says that it's typically ISO-8859-1 |
| 1334 |
|
if (is_object($debug_object)) { |
| 1335 |
|
$debug_object->debugLog(2, 'meta content-type tag couldn\'t be parsed. using iso-8859 default.'); |
| 1336 |
|
} |
| 1337 |
|
$charset = 'ISO-8859-1'; |
| 1338 |
|
} |
| 1339 |
|
} |
| 1340 |
|
} |
| 1341 |
|
} |
|
@@ 1352-1357 (lines=6) @@
|
| 1349 |
|
} |
| 1350 |
|
|
| 1351 |
|
// and if this doesn't work... then we need to just wrongheadedly assume it's UTF-8 so that we can move on - cause this will usually give us most of what we need... |
| 1352 |
|
if ($charset === false) { |
| 1353 |
|
if (is_object($debug_object)) { |
| 1354 |
|
$debug_object->debugLog(2, 'since mb_detect failed - using default of utf-8'); |
| 1355 |
|
} |
| 1356 |
|
$charset = 'UTF-8'; |
| 1357 |
|
} |
| 1358 |
|
} |
| 1359 |
|
|
| 1360 |
|
// Since CP1252 is a superset, if we get one of it's subsets, we want it instead. |