8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-27 18:03:04 +01:00
firebird-mirror/src/jrd/sqz.cpp
2014-07-17 18:48:46 +00:00

462 lines
9.9 KiB
C++

/*
* PROGRAM: JRD Access Method
* MODULE: sqz.cpp
* DESCRIPTION: Record compression/decompression
*
* The contents of this file are subject to the Interbase Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy
* of the License at http://www.Inprise.com/IPL.html
*
* Software distributed under the License is distributed on an
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
* or implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code was created by Inprise Corporation
* and its predecessors. Portions created by Inprise Corporation are
* Copyright (C) Inprise Corporation.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#include "firebird.h"
#include <string.h>
#include "../jrd/sqz.h"
#include "../jrd/req.h"
#include "../jrd/err_proto.h"
#include "../yvalve/gds_proto.h"
using namespace Jrd;
Compressor::Compressor(MemoryPool& pool, FB_SIZE_T length, const UCHAR* data)
: m_control(pool), m_length(0)
{
UCHAR* control = m_control.getBuffer((length + 1) / 2, false);
const UCHAR* const end = data + length;
FB_SIZE_T count;
FB_SIZE_T max;
while ( (count = end - data) )
{
const UCHAR* start = data;
// Find length of non-compressable run
if ((max = count - 1) > 1)
{
do {
if (data[0] != data[1] || data[0] != data[2])
{
data++;
}
else
{
count = data - start;
break;
}
} while (--max > 1);
}
data = start + count;
// Non-compressable runs are limited to 127 bytes
while (count)
{
max = MIN(count, 127U);
m_length += 1 + max;
count -= max;
*control++ = (UCHAR) max;
}
// Find compressible run. Compressable runs are limited to 128 bytes.
if ((max = MIN(128, end - data)) >= 3)
{
start = data;
const UCHAR c = *data;
do
{
if (*data != c)
{
break;
}
++data;
} while (--max);
*control++ = (UCHAR) (start - data);
m_length += 2;
}
}
// set array size to the really used length
m_control.shrink(control - m_control.begin());
}
FB_SIZE_T Compressor::applyDiff(FB_SIZE_T diffLength,
const UCHAR* differences,
FB_SIZE_T outLength,
UCHAR* const output)
{
/**************************************
*
* Apply a differences (delta) to a record.
* Return the length.
*
**************************************/
if (diffLength > MAX_DIFFERENCES)
{
BUGCHECK(176); // msg 176 bad difference record
}
const UCHAR* const end = differences + diffLength;
UCHAR* p = output;
const UCHAR* const p_end = output + outLength;
while (differences < end && p < p_end)
{
const int l = (signed char) *differences++;
if (l > 0)
{
if (p + l > p_end)
{
BUGCHECK(177); // msg 177 applied differences will not fit in record
}
if (differences + l > end)
{
BUGCHECK(176); // msg 176 bad difference record
}
memcpy(p, differences, l);
p += l;
differences += l;
}
else
{
p += -l;
}
}
const FB_SIZE_T length = p - output;
if (length > outLength || differences < end)
{
BUGCHECK(177); // msg 177 applied differences will not fit in record
}
return length;
}
FB_SIZE_T Compressor::pack(const UCHAR* input, FB_SIZE_T outLength, UCHAR* output) const
{
/**************************************
*
* Compress a string into an area of known length.
* If it doesn't fit, throw BUGCHECK error.
*
**************************************/
const UCHAR* const start = input;
const UCHAR* control = m_control.begin();
const UCHAR* const dcc_end = m_control.end();
int space = (int) outLength;
while (control < dcc_end)
{
if (--space <= 0)
{
if (space == 0)
{
*output = 0;
}
return input - start;
}
int length = (signed char) *control++;
*output++ = (UCHAR) length;
if (length < 0)
{
--space;
*output++ = *input;
input += (-length) & 255;
}
else
{
if ((space -= length) < 0)
{
length += space;
output[-1] = (UCHAR) length;
if (length > 0)
{
memcpy(output, input, length);
input += length;
}
return input - start;
}
if (length > 0)
{
memcpy(output, input, length);
output += length;
input += length;
}
}
}
BUGCHECK(178); // msg 178 record length inconsistent
return 0; // shut up compiler warning
}
FB_SIZE_T Compressor::getPartialLength(FB_SIZE_T inLength, const UCHAR* input) const
{
/**************************************
*
* Same as pack() without the output.
* If it doesn't fit, return the number of bytes that did.
*
**************************************/
const UCHAR* const start = input;
const UCHAR* control = m_control.begin();
const UCHAR* const dcc_end = m_control.end();
int space = (int) inLength;
while (control < dcc_end)
{
if (--space <= 0)
{
return input - start;
}
int length = (signed char) *control++;
if (length < 0)
{
--space;
input += (-length) & 255;
}
else
{
if ((space -= length) < 0)
{
length += space;
input += length;
return input - start;
}
input += length;
}
}
BUGCHECK(178); // msg 178 record length inconsistent
return 0; // shut up compiler warning
}
UCHAR* Compressor::unpack(FB_SIZE_T inLength,
const UCHAR* input,
FB_SIZE_T outLength,
UCHAR* output)
{
/**************************************
*
* Decompress a compressed string into a buffer.
* Return the address where the output stopped.
*
**************************************/
const UCHAR* const end = input + inLength;
const UCHAR* const output_end = output + outLength;
while (input < end)
{
const int len = (signed char) *input++;
if (len < 0)
{
if (input >= end || (output - len) > output_end)
{
BUGCHECK(179); // msg 179 decompression overran buffer
}
const UCHAR c = *input++;
memset(output, c, (-1 * len));
output -= len;
}
else
{
if ((output + len) > output_end)
{
BUGCHECK(179); // msg 179 decompression overran buffer
}
memcpy(output, input, len);
output += len;
input += len;
}
}
if (output > output_end)
{
BUGCHECK(179); // msg 179 decompression overran buffer
}
return output;
}
FB_SIZE_T Compressor::makeNoDiff(FB_SIZE_T outLength, UCHAR* output)
{
/**************************************
*
* Generates differences record marking that there are no differences.
*
**************************************/
UCHAR* temp = output;
int length = (int) outLength;
while (length > 127)
{
*temp++ = -127;
length -= 127;
}
if (length)
{
*temp++ = (UCHAR) -length;
}
return temp - output;
}
FB_SIZE_T Compressor::makeDiff(FB_SIZE_T length1,
const UCHAR* rec1,
FB_SIZE_T length2,
UCHAR* rec2,
FB_SIZE_T outLength,
UCHAR* output)
{
/**************************************
*
* Compute differences between two records. The difference
* record, when applied to the first record, produces the
* second record.
*
* difference_record := <control_string>...
*
* control_string := <positive_integer> <positive_integer data bytes>
* := <negative_integer>
*
* Return the total length of the differences string.
*
**************************************/
UCHAR *p;
#define STUFF(val) if (output < end) *output++ = val; else return MAX_ULONG;
/* WHY IS THIS RETURNING MAX_ULONG ???
* It returns a large positive value to indicate to the caller that we ran out
* of buffer space in the 'out' argument. Thus we could not create a
* successful differences record. Now it is upto the caller to check the
* return value of this function and figure out whether the differences record
* was created or not. Check prepare_update() (JRD/vio.c) for further
* information. Of course, the size for a 'differences' record is not expected
* to go near 2^32 in the future.
*
* This was investigated as a part of solving bug 10206, bsriram - 25-Feb-1999.
*/
const UCHAR* const start = output;
const UCHAR* const end = output + outLength;
const UCHAR* const end1 = rec1 + MIN(length1, length2);
const UCHAR* const end2 = rec2 + length2;
while (end1 - rec1 > 2)
{
if (rec1[0] != rec2[0] || rec1[1] != rec2[1])
{
p = output++;
// cast this to LONG to take care of OS/2 pointer arithmetic
// when rec1 is at the end of a segment, to avoid wrapping around
const UCHAR* yellow = (UCHAR*) MIN((U_IPTR) end1, ((U_IPTR) rec1 + 127)) - 1;
while (rec1 <= yellow && (rec1[0] != rec2[0] || (rec1 < yellow && rec1[1] != rec2[1])))
{
STUFF(*rec2++);
++rec1;
}
*p = output - p - 1;
continue;
}
for (p = rec2; rec1 < end1 && *rec1 == *rec2; rec1++, rec2++)
; // no-op
// This "l" could be more than 32K since the Old and New records
// could be the same for more than 32K characters.
// MAX record size is currently 64K. Hence it is defined as "int".
int l = p - rec2;
while (l < -127)
{
STUFF(-127);
l += 127;
}
if (l)
{
STUFF(l);
}
}
while (rec2 < end2)
{
p = output++;
// cast this to LONG to take care of OS/2 pointer arithmetic
// when rec1 is at the end of a segment, to avoid wrapping around
const UCHAR* yellow = (UCHAR*) MIN((U_IPTR) end2, ((U_IPTR) rec2 + 127));
while (rec2 < yellow)
{
STUFF(*rec2++);
}
*p = output - p - 1;
}
return output - start;
#undef STUFF
}
void Compressor::pack(const UCHAR* input, UCHAR* output) const
{
/**************************************
*
* Compress a string into a sufficiently large area.
* Don't check nuttin' -- go for speed, man, raw SPEED!
*
**************************************/
const UCHAR* control = m_control.begin();
const UCHAR* const dcc_end = m_control.end();
while (control < dcc_end)
{
const int length = (signed char) *control++;
*output++ = (UCHAR) length;
if (length < 0)
{
*output++ = *input;
input -= length;
}
else if (length > 0)
{
memcpy(output, input, length);
output += length;
input += length;
}
}
}