Check if string is strict UTF8 and if not encode

Handles other ISO formatted filetypes this way
This commit is contained in:
Matt Pass
2015-07-30 19:19:55 +01:00
parent e741d6c207
commit f9418db57f

View File

@@ -132,7 +132,7 @@ function xssClean($data,$type) {
function toUTF8noBOM($string,$message) {
// Attempt to detect encoding
if (function_exists('mb_detect_encoding')) {
$encType = mb_detect_encoding($string);
$strictUTF8 = mb_detect_encoding($string, 'UTF-8', true);
// Get rid of any UTF-8 BOM
$string = preg_replace('/\x{EF}\x{BB}\x{BF}/','',$string);
// Test for any bad characters
@@ -140,7 +140,7 @@ function toUTF8noBOM($string,$message) {
$teststringBroken = utf8_decode($teststring);
$teststringConverted = iconv("UTF-8", "UTF-8//IGNORE", $teststringBroken);
// If we have a matching length, UTF8 encode it
if ($encType != "ASCII" && $encType != "UTF-8" && strlen($teststringConverted) == strlen($teststringBroken)) {
if (!$strictUTF8 && strlen($teststringConverted) == strlen($teststringBroken)) {
$string = utf8_encode($string);
if ($message) {
echo "top.ICEcoder.message('".$t['Your document does...'].".');";