8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-31 08:03:04 +01:00
firebird-mirror/src/jrd/sqz.cpp

458 lines
9.7 KiB
C++
Raw Normal View History

2001-05-23 15:26:42 +02:00
/*
* PROGRAM: JRD Access Method
* MODULE: sqz.cpp
2001-05-23 15:26:42 +02:00
* DESCRIPTION: Record compression/decompression
*
* The contents of this file are subject to the Interbase Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy
* of the License at http://www.Inprise.com/IPL.html
*
* Software distributed under the License is distributed on an
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
* or implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code was created by Inprise Corporation
* and its predecessors. Portions created by Inprise Corporation are
* Copyright (C) Inprise Corporation.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#include "firebird.h"
2001-05-23 15:26:42 +02:00
#include <string.h>
2004-03-22 12:38:23 +01:00
#include "../jrd/common.h"
2001-05-23 15:26:42 +02:00
#include "../jrd/sqz.h"
#include "../jrd/req.h"
#include "../jrd/err_proto.h"
#include "../jrd/gds_proto.h"
using namespace Jrd;
2001-05-23 15:26:42 +02:00
Compressor::Compressor(size_t length, const UCHAR* data)
: m_control(getPool()), m_length(0)
{
UCHAR* control = m_control.getBuffer((length + 1) / 2, false);
const UCHAR* const end = data + length;
size_t count;
size_t max;
while ( (count = end - data) )
{
const UCHAR* start = data;
// Find length of non-compressable run
if ((max = count - 1) > 1)
{
do {
if (data[0] != data[1] || data[0] != data[2])
{
data++;
}
else
{
count = data - start;
break;
}
} while (--max > 1);
}
data = start + count;
// Non-compressable runs are limited to 127 bytes
while (count)
{
max = MIN(count, 127);
m_length += 1 + max;
count -= max;
*control++ = (UCHAR) max;
}
// Find compressible run. Compressable runs are limited to 128 bytes.
if ((max = MIN(128, end - data)) >= 3)
{
start = data;
const UCHAR c = *data;
do
{
if (*data != c)
{
break;
}
++data;
} while (--max);
*control++ = (UCHAR) (start - data);
m_length += 2;
}
}
// set array size to the really used length
m_control.shrink(control - m_control.begin());
}
size_t Compressor::applyDiff(size_t diffLength,
const UCHAR* differences,
size_t outLength,
UCHAR* output)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* Apply a differences (delta) to a record.
* Return the length.
2001-05-23 15:26:42 +02:00
*
**************************************/
if (diffLength > MAX_DIFFERENCES)
2001-05-23 15:26:42 +02:00
{
2009-11-23 10:13:38 +01:00
BUGCHECK(176); // msg 176 bad difference record
2001-05-23 15:26:42 +02:00
}
const UCHAR* const end = differences + diffLength;
UCHAR* p = output;
const UCHAR* const p_end = output + outLength;
2001-05-23 15:26:42 +02:00
while (differences < end && p < p_end)
{
const int l = (signed char) *differences++;
2001-05-23 15:26:42 +02:00
if (l > 0)
{
if (p + l > p_end)
{
2009-11-23 10:13:38 +01:00
BUGCHECK(177); // msg 177 applied differences will not fit in record
2001-05-23 15:26:42 +02:00
}
memcpy(p, differences, l);
p += l;
differences += l;
}
else
{
p += -l;
}
}
const size_t length = p - output;
2001-05-23 15:26:42 +02:00
if (length > outLength || differences < end)
2001-05-23 15:26:42 +02:00
{
2009-11-23 10:13:38 +01:00
BUGCHECK(177); // msg 177 applied differences will not fit in record
2001-05-23 15:26:42 +02:00
}
return length;
}
size_t Compressor::pack(const UCHAR* input, size_t outLength, UCHAR* output) const
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* Compress a string into an area of known length.
* If it doesn't fit, throw BUGCHECK error.
2001-05-23 15:26:42 +02:00
*
**************************************/
const UCHAR* const start = input;
2001-05-23 15:26:42 +02:00
const UCHAR* control = m_control.begin();
const UCHAR* const dcc_end = m_control.end();
2001-05-23 15:26:42 +02:00
int space = (int) outLength;
2009-06-20 20:46:59 +02:00
while (control < dcc_end)
2009-06-10 15:31:34 +02:00
{
if (--space <= 0)
2001-05-23 15:26:42 +02:00
{
2009-06-10 15:31:34 +02:00
if (space == 0)
{
2009-06-10 15:31:34 +02:00
*output = 0;
}
2009-06-10 15:31:34 +02:00
return input - start;
}
int length = (signed char) *control++;
*output++ = (UCHAR) length;
if (length < 0)
2009-06-10 15:31:34 +02:00
{
--space;
*output++ = *input;
input += (-length) & 255;
}
else
{
if ((space -= length) < 0)
2001-05-23 15:26:42 +02:00
{
2009-06-10 15:31:34 +02:00
length += space;
output[-1] = (UCHAR) length;
2009-06-10 15:31:34 +02:00
if (length > 0)
2001-05-23 15:26:42 +02:00
{
2008-02-03 11:41:44 +01:00
memcpy(output, input, length);
2001-05-23 15:26:42 +02:00
input += length;
}
2009-06-10 15:31:34 +02:00
return input - start;
}
if (length > 0)
{
2009-06-10 15:31:34 +02:00
memcpy(output, input, length);
output += length;
input += length;
2001-05-23 15:26:42 +02:00
}
}
2009-06-10 15:31:34 +02:00
}
2009-06-20 20:46:59 +02:00
BUGCHECK(178); // msg 178 record length inconsistent
return 0; // shut up compiler warning
2001-05-23 15:26:42 +02:00
}
size_t Compressor::getPartialLength(size_t inLength, const UCHAR* input) const
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* Same as pack() without the output.
* If it doesn't fit, return the number of bytes that did.
2001-05-23 15:26:42 +02:00
*
**************************************/
const UCHAR* const start = input;
2001-05-23 15:26:42 +02:00
const UCHAR* control = m_control.begin();
const UCHAR* const dcc_end = m_control.end();
2001-05-23 15:26:42 +02:00
int space = (int) inLength;
2009-06-20 20:46:59 +02:00
while (control < dcc_end)
2009-06-10 15:31:34 +02:00
{
if (--space <= 0)
{
2009-06-10 15:31:34 +02:00
return input - start;
}
int length = (signed char) *control++;
if (length < 0)
{
2009-06-10 15:31:34 +02:00
--space;
input += (-length) & 255;
}
else
{
if ((space -= length) < 0)
{
2009-06-10 15:31:34 +02:00
length += space;
2001-05-23 15:26:42 +02:00
input += length;
2009-06-10 15:31:34 +02:00
return input - start;
2001-05-23 15:26:42 +02:00
}
2009-06-10 15:31:34 +02:00
input += length;
2008-01-16 10:48:41 +01:00
}
2009-06-10 15:31:34 +02:00
}
2009-06-20 20:46:59 +02:00
BUGCHECK(178); // msg 178 record length inconsistent
return 0; // shut up compiler warning
2001-05-23 15:26:42 +02:00
}
UCHAR* Compressor::unpack(size_t inLength,
const UCHAR* input,
size_t outLength,
UCHAR* output)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* Decompress a compressed string into a buffer.
* Return the address where the output stopped.
2001-05-23 15:26:42 +02:00
*
**************************************/
const UCHAR* const end = input + inLength;
const UCHAR* const output_end = output + outLength;
2001-05-23 15:26:42 +02:00
while (input < end)
2001-05-23 15:26:42 +02:00
{
const int len = (signed char) *input++;
if (len < 0)
2001-05-23 15:26:42 +02:00
{
const UCHAR c = *input++;
2001-05-23 15:26:42 +02:00
if ((output - len) > output_end)
2001-05-23 15:26:42 +02:00
{
2009-11-23 10:13:38 +01:00
BUGCHECK(179); // msg 179 decompression overran buffer
2001-05-23 15:26:42 +02:00
}
memset(output, c, (-1 * len));
output -= len;
2001-05-23 15:26:42 +02:00
}
else
{
if ((output + len) > output_end)
2001-05-23 15:26:42 +02:00
{
2009-11-23 10:13:38 +01:00
BUGCHECK(179); // msg 179 decompression overran buffer
2001-05-23 15:26:42 +02:00
}
memcpy(output, input, len);
output += len;
input += len;
2001-05-23 15:26:42 +02:00
}
}
if (output > output_end)
{
2009-11-23 10:13:38 +01:00
BUGCHECK(179); // msg 179 decompression overran buffer
2001-05-23 15:26:42 +02:00
}
return output;
}
size_t Compressor::makeNoDiff(size_t outLength, UCHAR* output)
{
/**************************************
*
* Generates differences record marking that there are no differences.
*
**************************************/
UCHAR* temp = output;
int length = (int) outLength;
while (length > 127)
{
*temp++ = -127;
length -= 127;
}
if (length)
{
*temp++ = (UCHAR) -length;
2003-12-31 06:36:12 +01:00
}
return temp - output;
}
size_t Compressor::makeDiff(size_t length1,
const UCHAR* rec1,
size_t length2,
UCHAR* rec2,
size_t outLength,
UCHAR* output)
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* Compute differences between two records. The difference
2001-05-23 15:26:42 +02:00
* record, when applied to the first record, produces the
2008-12-05 02:20:14 +01:00
* second record.
2001-05-23 15:26:42 +02:00
*
* difference_record := <control_string>...
2001-05-23 15:26:42 +02:00
*
* control_string := <positive_integer> <positive_integer data bytes>
2001-05-23 15:26:42 +02:00
* := <negative_integer>
*
2008-12-05 02:20:14 +01:00
* Return the total length of the differences string.
2001-05-23 15:26:42 +02:00
*
**************************************/
UCHAR *p;
#define STUFF(val) if (output < end) *output++ = val; else return MAX_ULONG;
/* WHY IS THIS RETURNING MAX_ULONG ???
* It returns a large positive value to indicate to the caller that we ran out
2009-11-23 10:13:38 +01:00
* of buffer space in the 'out' argument. Thus we could not create a
* successful differences record. Now it is upto the caller to check the
* return value of this function and figure out whether the differences record
* was created or not. Check prepare_update() (JRD/vio.c) for further
* information. Of course, the size for a 'differences' record is not expected
* to go near 2^32 in the future.
2009-11-23 10:13:38 +01:00
*
* This was investigated as a part of solving bug 10206, bsriram - 25-Feb-1999.
*/
2001-05-23 15:26:42 +02:00
const UCHAR* const start = output;
const UCHAR* const end = output + outLength;
const UCHAR* const end1 = rec1 + MIN(length1, length2);
const UCHAR* const end2 = rec2 + length2;
2001-05-23 15:26:42 +02:00
while (end1 - rec1 > 2)
{
if (rec1[0] != rec2[0] || rec1[1] != rec2[1])
{
p = output++;
2001-05-23 15:26:42 +02:00
2009-11-23 10:13:38 +01:00
// cast this to LONG to take care of OS/2 pointer arithmetic
// when rec1 is at the end of a segment, to avoid wrapping around
2001-05-23 15:26:42 +02:00
const UCHAR* yellow = (UCHAR*) MIN((U_IPTR) end1, ((U_IPTR) rec1 + 127)) - 1;
2008-12-25 07:09:37 +01:00
while (rec1 <= yellow && (rec1[0] != rec2[0] || (rec1[1] != rec2[1] && rec1 < yellow)))
2003-12-31 06:36:12 +01:00
{
2001-05-23 15:26:42 +02:00
STUFF(*rec2++);
++rec1;
}
*p = output - p - 1;
2001-05-23 15:26:42 +02:00
continue;
}
2001-05-23 15:26:42 +02:00
for (p = rec2; rec1 < end1 && *rec1 == *rec2; rec1++, rec2++)
; // no-op
// This "l" could be more than 32K since the Old and New records
// could be the same for more than 32K characters.
// MAX record size is currently 64K. Hence it is defined as "int".
int l = p - rec2;
2001-05-23 15:26:42 +02:00
while (l < -127)
{
STUFF(-127);
l += 127;
}
2001-05-23 15:26:42 +02:00
if (l)
{
STUFF(l);
}
}
while (rec2 < end2)
{
p = output++;
2001-05-23 15:26:42 +02:00
2009-11-23 10:13:38 +01:00
// cast this to LONG to take care of OS/2 pointer arithmetic
// when rec1 is at the end of a segment, to avoid wrapping around
2001-05-23 15:26:42 +02:00
const UCHAR* yellow = (UCHAR*) MIN((U_IPTR) end2, ((U_IPTR) rec2 + 127));
2001-05-23 15:26:42 +02:00
while (rec2 < yellow)
{
STUFF(*rec2++);
}
*p = output - p - 1;
2001-05-23 15:26:42 +02:00
}
return output - start;
2001-05-23 15:26:42 +02:00
}
void Compressor::pack(const UCHAR* input, UCHAR* output) const
2001-05-23 15:26:42 +02:00
{
/**************************************
*
* Compress a string into a sufficiently large area.
* Don't check nuttin' -- go for speed, man, raw SPEED!
2001-05-23 15:26:42 +02:00
*
**************************************/
const UCHAR* control = m_control.begin();
const UCHAR* const dcc_end = m_control.end();
2009-06-20 20:46:59 +02:00
while (control < dcc_end)
2009-06-10 15:31:34 +02:00
{
const int length = (signed char) *control++;
*output++ = (UCHAR) length;
2009-06-10 15:31:34 +02:00
if (length < 0)
2001-05-23 15:26:42 +02:00
{
2009-06-10 15:31:34 +02:00
*output++ = *input;
input -= length;
2001-05-23 15:26:42 +02:00
}
2009-06-10 15:31:34 +02:00
else if (length > 0)
{
memcpy(output, input, length);
output += length;
input += length;
}
}
2001-05-23 15:26:42 +02:00
}