Replace only first BOM, include ending if needed

This commit is contained in:
Matt Pass
2016-05-05 15:35:43 +01:00
parent e8ef9e5195
commit d686faf8a7

View File

@@ -133,8 +133,18 @@ function toUTF8noBOM($string,$message) {
// Attempt to detect encoding
if (function_exists('mb_detect_encoding')) {
$strictUTF8 = mb_detect_encoding($string, 'UTF-8', true);
// Get rid of any UTF-8 BOM, need to replace with a line return
$string = preg_replace("/\x{EF}\x{BB}\x{BF}/",PHP_EOL,$string);
// Establish a BOM
$bom = pack("CCC", 0xef, 0xbb, 0xbf);
// If there's a BOM in the first few chars
if (0 === strncmp($string, $bom, 3)) {
// chop BOM off and prefix returned string with a PHP_EOL
if (0 === strncmp($string, $bom."\r\n", 5)) {
$string = PHP_EOL.substr($string, 3);
// Else, simply chop off the BOM
} else {
$string = substr($string, 3);
}
}
// Test for any bad characters
$teststring = $string;