diff --git a/LZCompressor/LZContext.php b/LZCompressor/LZContext.php new file mode 100644 index 0000000..0e41a3e --- /dev/null +++ b/LZCompressor/LZContext.php @@ -0,0 +1,91 @@ +data = new LZData; + } + + // Helper + + /** + * @param string $val + * @return bool + */ + public function dictionaryContains($val) { + return array_key_exists($val, $this->dictionary); + } + + /** + * @param $val + */ + public function addToDictionary($val) { + $this->dictionary[$val] = $this->dictSize++; + } + + /** + * @param string $val + * @return bool + */ + public function dictionaryToCreateContains($val) { + return array_key_exists($val, $this->dictionaryToCreate); + } + + /** + * decrements enlargeIn and extends numbits in case enlargeIn drops to 0 + */ + public function enlargeIn() { + $this->enlargeIn--; + if($this->enlargeIn==0) { + $this->enlargeIn = pow(2, $this->numBits); + $this->numBits++; + } + } +} diff --git a/LZCompressor/LZData.php b/LZCompressor/LZData.php new file mode 100644 index 0000000..b536ce1 --- /dev/null +++ b/LZCompressor/LZData.php @@ -0,0 +1,29 @@ +str .= $str; + } +} diff --git a/LZCompressor/LZReverseDictionary.php b/LZCompressor/LZReverseDictionary.php new file mode 100644 index 0000000..264caa4 --- /dev/null +++ b/LZCompressor/LZReverseDictionary.php @@ -0,0 +1,33 @@ +entries); + } + + public function hasEntry($index) { + return array_key_exists($index, $this->entries); + } + + public function getEntry($index) { + return $this->entries[$index]; + } + + public function addEntry($char) { + $this->entries[] = $char; + } + +} \ No newline at end of file diff --git a/LZCompressor/LZString.php b/LZCompressor/LZString.php new file mode 100644 index 0000000..75791fa --- /dev/null +++ b/LZCompressor/LZString.php @@ -0,0 +1,286 @@ +c = LZUtil::utf8_charAt($uncompressed, $ii); + if(!$context->dictionaryContains($context->c)) { + $context->addToDictionary($context->c); + $context->dictionaryToCreate[$context->c] = true; + } + $context->wc = $context->w . $context->c; + if($context->dictionaryContains($context->wc)) { + $context->w = $context->wc; + } else { + self::produceW($context, $bitsPerChar, $getCharFromInt); + } + } + if($context->w !== '') { + self::produceW($context, $bitsPerChar, $getCharFromInt); + } + + $value = 2; + for($i=0; $i<$context->numBits; $i++) { + self::writeBit($value&1, $context->data, $bitsPerChar, $getCharFromInt); + $value = $value >> 1; + } + + while (true) { + $context->data->val = $context->data->val << 1; + if ($context->data->position == ($bitsPerChar-1)) { + $context->data->append($getCharFromInt($context->data->val)); + break; + } + $context->data->position++; + } + + return $context->data->str; + } + + /** + * @param LZContext $context + * @param integer $bitsPerChar + * @param callable $getCharFromInt + * + * @return LZContext + */ + private static function produceW(LZContext $context, $bitsPerChar, $getCharFromInt) + { + if($context->dictionaryToCreateContains($context->w)) { + if(LZUtil::charCodeAt($context->w)<256) { + for ($i=0; $i<$context->numBits; $i++) { + self::writeBit(null, $context->data, $bitsPerChar, $getCharFromInt); + } + $value = LZUtil::charCodeAt($context->w); + for ($i=0; $i<8; $i++) { + self::writeBit($value&1, $context->data, $bitsPerChar, $getCharFromInt); + $value = $value >> 1; + } + } else { + $value = 1; + for ($i=0; $i<$context->numBits; $i++) { + self::writeBit($value, $context->data, $bitsPerChar, $getCharFromInt); + $value = 0; + } + $value = LZUtil::charCodeAt($context->w); + for ($i=0; $i<16; $i++) { + self::writeBit($value&1, $context->data, $bitsPerChar, $getCharFromInt); + $value = $value >> 1; + } + } + $context->enlargeIn(); + unset($context->dictionaryToCreate[$context->w]); + } else { + $value = $context->dictionary[$context->w]; + for ($i=0; $i<$context->numBits; $i++) { + self::writeBit($value&1, $context->data, $bitsPerChar, $getCharFromInt); + $value = $value >> 1; + } + } + $context->enlargeIn(); + $context->addToDictionary($context->wc); + $context->w = $context->c.''; + } + + /** + * @param string $value + * @param LZData $data + * @param integer $bitsPerChar + * @param callable $getCharFromInt + */ + private static function writeBit($value, LZData $data, $bitsPerChar, $getCharFromInt) + { + if(null !== $value) { + $data->val = ($data->val << 1) | $value; + } else { + $data->val = ($data->val << 1); + } + if ($data->position == ($bitsPerChar-1)) { + $data->position = 0; + $data->append($getCharFromInt($data->val)); + $data->val = 0; + } else { + $data->position++; + } + } + + /** + * @param LZData $data + * @param integer $resetValue + * @param callable $getNextValue + * @param integer $exponent + * @param string $feed + * @return integer + */ + private static function readBits(LZData $data, $resetValue, $getNextValue, $feed, $exponent) + { + $bits = 0; + $maxPower = pow(2, $exponent); + $power=1; + while($power != $maxPower) { + $resb = $data->val & $data->position; + $data->position >>= 1; + if ($data->position == 0) { + $data->position = $resetValue; + $data->val = $getNextValue($feed, $data->index++); + } + $bits |= (($resb>0 ? 1 : 0) * $power); + $power <<= 1; + } + return $bits; + } + + /** + * @param string $compressed + * @param integer $resetValue + * @param callable $getNextValue + * @return string + */ + private static function _decompress($compressed, $resetValue, $getNextValue) + { + if(!is_string($compressed) || strlen($compressed) === 0) { + return ''; + } + + $length = LZUtil::utf8_strlen($compressed); + $entry = null; + $enlargeIn = 4; + $numBits = 3; + $result = ''; + + $dictionary = new LZReverseDictionary(); + + $data = new LZData(); + $data->str = $compressed; + $data->val = $getNextValue($compressed, 0); + $data->position = $resetValue; + $data->index = 1; + + $next = self::readBits($data, $resetValue, $getNextValue, $compressed, 2); + + if($next < 0 || $next > 1) { + return ''; + } + + $exponent = ($next == 0) ? 8 : 16; + $bits = self::readBits($data, $resetValue, $getNextValue, $compressed, $exponent); + + $c = LZUtil::fromCharCode($bits); + $dictionary->addEntry($c); + $w = $c; + + $result .= $c; + + while(true) { + if($data->index > $length) { + return ''; + } + $bits = self::readBits($data, $resetValue, $getNextValue, $compressed, $numBits); + + $c = $bits; + + switch($c) { + case 0: + $bits = self::readBits($data, $resetValue, $getNextValue, $compressed, 8); + $c = $dictionary->size(); + $dictionary->addEntry(LZUtil::fromCharCode($bits)); + $enlargeIn--; + break; + case 1: + $bits = self::readBits($data, $resetValue, $getNextValue, $compressed, 16); + $c = $dictionary->size(); + $dictionary->addEntry(LZUtil::fromCharCode($bits)); + $enlargeIn--; + break; + case 2: + return $result; + break; + } + + if($enlargeIn == 0) { + $enlargeIn = pow(2, $numBits); + $numBits++; + } + + if($dictionary->hasEntry($c)) { + $entry = $dictionary->getEntry($c); + } + else { + if ($c == $dictionary->size()) { + $entry = $w . $w{0}; + } else { + return null; + } + } + + $result .= $entry; + $dictionary->addEntry($w . $entry{0}); + $w = $entry; + + $enlargeIn--; + if($enlargeIn == 0) { + $enlargeIn = pow(2, $numBits); + $numBits++; + } + } + } +} diff --git a/LZCompressor/LZUtil.php b/LZCompressor/LZUtil.php new file mode 100644 index 0000000..4b27a1e --- /dev/null +++ b/LZCompressor/LZUtil.php @@ -0,0 +1,112 @@ + 1) return ($h & 0x1F) << 6 | (ord($ch{1}) & 0x3F); + if ($h <= 0xEF && $len > 2) return ($h & 0x0F) << 12 | (ord($ch{1}) & 0x3F) << 6 | (ord($ch{2}) & 0x3F); + if ($h <= 0xF4 && $len > 3) + return ($h & 0x0F) << 18 | (ord($ch{1}) & 0x3F) << 12 | (ord($ch{2}) & 0x3F) << 6 | (ord($ch{3}) & 0x3F); + return -2; + } + + /** + * @param string $str + * @param integer $num + * + * @return string + */ + public static function utf8_charAt($str, $num) + { + return mb_substr($str, $num, 1, 'UTF-8'); + } + + /** + * @param string $str + * @return integer + */ + public static function utf8_strlen($str) { + return mb_strlen($str, 'UTF-8'); + } + + +} \ No newline at end of file