8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-23 19:23:03 +01:00

Improvement CORE-1277 - Automatic transliteration of text blobs

This commit is contained in:
asfernandes 2007-05-22 02:14:20 +00:00
parent 7f83fcf614
commit c67afcb4ee
10 changed files with 199 additions and 122 deletions

View File

@ -1614,12 +1614,17 @@ static void gen_descriptor( dsql_req* request, const dsc* desc, bool texttype)
stuff(request, blr_timestamp);
break;
case dtype_blob:
case dtype_array:
stuff(request, blr_quad);
stuff(request, 0);
break;
case dtype_blob:
stuff(request, blr_blob2);
stuff_word(request, desc->dsc_sub_type);
stuff_word(request, desc->getTextType());
break;
default:
// don't understand dtype
ERRD_post(isc_sqlerr, isc_arg_number, (SLONG) - 804,

View File

@ -9902,39 +9902,47 @@ static bool set_parameter_type(dsql_req* request, dsql_nod* in_node, dsql_nod* n
MAKE_desc(request, &in_node->nod_desc, node, NULL);
if (in_node->nod_desc.dsc_dtype <= dtype_any_text &&
request->req_dbb->dbb_att_charset != CS_NONE &&
if (request->req_dbb->dbb_att_charset != CS_NONE &&
request->req_dbb->dbb_att_charset != CS_BINARY)
{
int diff = 0;
switch (in_node->nod_desc.dsc_dtype)
{
case dtype_varying:
diff = sizeof(USHORT);
break;
case dtype_cstring:
diff = 1;
break;
}
in_node->nod_desc.dsc_length -= diff;
USHORT fromCharSet = INTL_GET_CHARSET(&in_node->nod_desc);
USHORT fromCharSet = in_node->nod_desc.getCharSet();
USHORT toCharSet = (fromCharSet == CS_NONE || fromCharSet == CS_BINARY) ?
fromCharSet : request->req_dbb->dbb_att_charset;
if (toCharSet != fromCharSet)
if (in_node->nod_desc.dsc_dtype <= dtype_any_text)
{
USHORT fromCharSetBPC = METD_get_charset_bpc(request, fromCharSet);
USHORT toCharSetBPC = METD_get_charset_bpc(request, toCharSet);
int diff = 0;
switch (in_node->nod_desc.dsc_dtype)
{
case dtype_varying:
diff = sizeof(USHORT);
break;
case dtype_cstring:
diff = 1;
break;
}
in_node->nod_desc.dsc_length -= diff;
INTL_ASSIGN_TTYPE(&in_node->nod_desc, INTL_CS_COLL_TO_TTYPE(toCharSet,
(fromCharSet == toCharSet ? INTL_GET_COLLATE(&in_node->nod_desc) : 0)));
if (toCharSet != fromCharSet)
{
USHORT fromCharSetBPC = METD_get_charset_bpc(request, fromCharSet);
USHORT toCharSetBPC = METD_get_charset_bpc(request, toCharSet);
in_node->nod_desc.dsc_length =
UTLD_char_length_to_byte_length(in_node->nod_desc.dsc_length / fromCharSetBPC, toCharSetBPC);
INTL_ASSIGN_TTYPE(&in_node->nod_desc, INTL_CS_COLL_TO_TTYPE(toCharSet,
(fromCharSet == toCharSet ? INTL_GET_COLLATE(&in_node->nod_desc) : 0)));
in_node->nod_desc.dsc_length =
UTLD_char_length_to_byte_length(in_node->nod_desc.dsc_length / fromCharSetBPC, toCharSetBPC);
}
in_node->nod_desc.dsc_length += diff;
}
else if (in_node->nod_desc.dsc_dtype == dtype_blob &&
in_node->nod_desc.dsc_sub_type == isc_blob_text &&
fromCharSet != CS_NONE && fromCharSet != CS_BINARY)
{
in_node->nod_desc.setTextType(toCharSet);
}
in_node->nod_desc.dsc_length += diff;
}
dsql_par* parameter = (dsql_par*) in_node->nod_arg[e_par_parameter];

View File

@ -428,39 +428,33 @@ void BLB_garbage_collect(
}
void BLB_gen_bpb_from_descs(const dsc* fromDesc, const dsc* toDesc, Firebird::UCharBuffer& bpb)
void BLB_gen_bpb(SSHORT source, SSHORT target, UCHAR sourceCharset, UCHAR targetCharset, Firebird::UCharBuffer& bpb)
{
bpb.resize(15);
UCHAR* p = bpb.begin();
*p++ = isc_bpb_version1;
SSHORT subType = fromDesc->getBlobSubType();
UCHAR charSet = fromDesc->getCharSet();
*p++ = isc_bpb_source_type;
*p++ = 2;
put_short(p, subType);
put_short(p, source);
p += 2;
if (subType == isc_blob_text)
if (source == isc_blob_text)
{
*p++ = isc_bpb_source_interp;
*p++ = 1;
*p++ = charSet;
*p++ = sourceCharset;
}
subType = toDesc->getBlobSubType();
charSet = toDesc->getCharSet();
*p++ = isc_bpb_target_type;
*p++ = 2;
put_short(p, subType);
put_short(p, target);
p += 2;
if (subType == isc_blob_text)
if (target == isc_blob_text)
{
*p++ = isc_bpb_target_interp;
*p++ = 1;
*p++ = charSet;
*p++ = targetCharset;
}
// set the array count to the number of bytes we used
@ -468,6 +462,13 @@ void BLB_gen_bpb_from_descs(const dsc* fromDesc, const dsc* toDesc, Firebird::UC
}
void BLB_gen_bpb_from_descs(const dsc* fromDesc, const dsc* toDesc, Firebird::UCharBuffer& bpb)
{
BLB_gen_bpb(fromDesc->getBlobSubType(), toDesc->getBlobSubType(),
fromDesc->getCharSet(), toDesc->getCharSet(), bpb);
}
blb* BLB_get_array(thread_db* tdbb, jrd_tra* transaction, const bid* blob_id,
Ods::InternalArrayDesc* desc)
{
@ -1153,6 +1154,8 @@ void BLB_move(thread_db* tdbb, dsc* from_desc, dsc* to_desc, jrd_nod* field)
}
blob->blb_relation = relation;
blob->blb_sub_type = to_desc->getBlobSubType();
blob->blb_charset = to_desc->getCharSet();
destination->set_permanent(relation->rel_id, DPM_store_blob(tdbb, blob, record));
// This is the only place in the engine where blobs are materialized
// If new places appear code below should transform to common sub-routine
@ -1210,7 +1213,8 @@ blb* BLB_open(thread_db* tdbb, jrd_tra* transaction, const bid* blob_id)
blb* BLB_open2(thread_db* tdbb,
jrd_tra* transaction, const bid* blob_id,
USHORT bpb_length, const UCHAR* bpb)
USHORT bpb_length, const UCHAR* bpb,
bool external_call)
{
/**************************************
*
@ -1229,12 +1233,19 @@ blb* BLB_open2(thread_db* tdbb,
/* Handle filter case */
SSHORT from, to;
SSHORT from_charset, to_charset;
bool from_type_specified;
bool from_charset_specified;
bool to_type_specified;
bool to_charset_specified;
gds__parse_bpb2(bpb_length,
bpb,
&from,
&to,
reinterpret_cast<USHORT*>(&from_charset),
reinterpret_cast<USHORT*>(&to_charset));
reinterpret_cast<USHORT*>(&to_charset),
&from_type_specified, &from_charset_specified,
&to_type_specified, &to_charset_specified);
blb* blob = allocate_blob(tdbb, transaction);
@ -1244,61 +1255,15 @@ blb* BLB_open2(thread_db* tdbb,
get_replay_blob(tdbb, blob_id);
#endif
blob->blb_target_interp = to_charset;
blob->blb_source_interp = from_charset;
BlobFilter* filter = NULL;
bool filter_required = false;
if (to && from != to) {
filter = find_filter(tdbb, from, to);
filter_required = true;
}
else if (to == isc_blob_text && (from_charset != to_charset)) {
if (from_charset == CS_dynamic)
from_charset = tdbb->tdbb_attachment->att_charset;
if (to_charset == CS_dynamic)
to_charset = tdbb->tdbb_attachment->att_charset;
if ((to_charset != CS_NONE) && (from_charset != CS_NONE) &&
(to_charset != CS_BINARY) && (from_charset != CS_BINARY) &&
(from_charset != to_charset))
{
filter = FB_NEW(*dbb->dbb_permanent) BlobFilter(*dbb->dbb_permanent);
filter->blf_filter = filter_transliterate_text;
filter_required = true;
}
}
if (filter_required) {
BlobControl* control = 0;
if (BLF_open_blob(tdbb,
transaction,
&control,
blob_id,
bpb_length,
bpb,
reinterpret_cast<FPTR_BFILTER_CALLBACK>(blob_filter),
filter))
{
ERR_punt();
}
blob->blb_filter = control;
blob->blb_max_segment = control->ctl_max_segment;
blob->blb_count = control->ctl_number_segments;
blob->blb_length = control->ctl_total_length;
return blob;
}
bool try_relations = false;
BlobIndex* current = NULL;
if (!blob_id->bid_internal.bid_relation_id)
{
if (blob_id->isEmpty())
{
blob->blb_flags |= BLB_eof;
return blob;
}
else {
else
{
/* Note: Prior to 1991, we would immediately report bad_segstr_id here,
* but then we decided to allow a newly created blob to be opened,
* leaving the possibility of receiving a garbage blob ID from
@ -1342,42 +1307,114 @@ blb* BLB_open2(thread_db* tdbb,
blob->blb_segment =
(UCHAR *) ((blob_page*) new_blob->blb_data)->blp_page;
}
return blob;
}
else
try_relations = true;
}
}
else
try_relations = true;
if (try_relations)
{
// Ordinarily, we would call MET_relation to get the relation id.
// However, since the blob id must be consider suspect, this is
// not a good idea. On the other hand, if we don't already
// know about the relation, the blob id has got to be invalid
// anyway.
vec<jrd_rel*>* vector = dbb->dbb_relations;
if (blob_id->bid_internal.bid_relation_id >= vector->count() ||
!(blob->blb_relation = (*vector)[blob_id->bid_internal.bid_relation_id] ) )
{
ERR_post(isc_bad_segstr_id, 0);
}
blob->blb_pg_space_id = blob->blb_relation->getPages(tdbb)->rel_pg_space_id;
DPM_get_blob(tdbb, blob, blob_id->get_permanent_number(), false, (SLONG) 0);
// If the blob is known to be damaged, ignore it.
if (blob->blb_flags & BLB_damaged) {
if (!(dbb->dbb_flags & DBB_damaged))
IBERROR(194); // msg 194 blob not found
blob->blb_flags |= BLB_eof;
return blob;
}
// Get first data page in anticipation of reading.
if (blob->blb_level == 0)
blob->blb_segment = blob->blb_data;
}
Firebird::UCharBuffer new_bpb;
if (external_call &&
ENCODE_ODS(dbb->dbb_ods_version, dbb->dbb_minor_original) >= ODS_11_1)
{
if (!from_type_specified)
from = blob->blb_sub_type;
if (!from_charset_specified)
from_charset = blob->blb_charset;
if (!to_type_specified && from == isc_blob_text)
to = isc_blob_text;
if (!to_charset_specified && from == isc_blob_text)
to_charset = CS_dynamic;
BLB_gen_bpb(from, to, from_charset, to_charset, new_bpb);
bpb = new_bpb.begin();
bpb_length = new_bpb.getCount();
}
blob->blb_target_interp = to_charset;
blob->blb_source_interp = from_charset;
BlobFilter* filter = NULL;
bool filter_required = false;
if (to && from != to) {
filter = find_filter(tdbb, from, to);
filter_required = true;
}
else if (to == isc_blob_text && (from_charset != to_charset)) {
if (from_charset == CS_dynamic)
from_charset = tdbb->tdbb_attachment->att_charset;
if (to_charset == CS_dynamic)
to_charset = tdbb->tdbb_attachment->att_charset;
if ((to_charset != CS_NONE) && (from_charset != CS_NONE) &&
(to_charset != CS_BINARY) && (from_charset != CS_BINARY) &&
(from_charset != to_charset))
{
filter = FB_NEW(*dbb->dbb_permanent) BlobFilter(*dbb->dbb_permanent);
filter->blf_filter = filter_transliterate_text;
filter_required = true;
}
}
/* Ordinarily, we would call MET_relation to get the relation id.
However, since the blob id must be consider suspect, this is
not a good idea. On the other hand, if we don't already
know about the relation, the blob id has got to be invalid
anyway. */
vec<jrd_rel*>* vector = dbb->dbb_relations;
if (blob_id->bid_internal.bid_relation_id >= vector->count() ||
!(blob->blb_relation = (*vector)[blob_id->bid_internal.bid_relation_id] ) )
if (filter_required)
{
ERR_post(isc_bad_segstr_id, 0);
}
blob->blb_pg_space_id = blob->blb_relation->getPages(tdbb)->rel_pg_space_id;
DPM_get_blob(tdbb, blob, blob_id->get_permanent_number(), false, (SLONG) 0);
/* If the blob is known to be damaged, ignore it. */
if (blob->blb_flags & BLB_damaged) {
if (!(dbb->dbb_flags & DBB_damaged))
IBERROR(194); /* msg 194 blob not found */
blob->blb_flags |= BLB_eof;
BlobControl* control = 0;
if (BLF_open_blob(tdbb,
transaction,
&control,
blob_id,
bpb_length,
bpb,
reinterpret_cast<FPTR_BFILTER_CALLBACK>(blob_filter),
filter))
{
ERR_punt();
}
blob->blb_filter = control;
blob->blb_max_segment = control->ctl_max_segment;
blob->blb_count = control->ctl_number_segments;
blob->blb_length = control->ctl_total_length;
return blob;
}
/* Get first data page in anticipation of reading. */
if (blob->blb_level == 0)
blob->blb_segment = blob->blb_data;
return blob;
}

View File

@ -148,6 +148,7 @@ class blb : public pool_alloc_rpt<UCHAR, type_blb>
USHORT blb_source_interp; /* source interp (for writing) */
USHORT blb_target_interp; /* destination interp (for reading) */
SSHORT blb_sub_type; /* Blob's declared sub-type */
UCHAR blb_charset; // Blob's charset
USHORT blb_pg_space_id; // page space
ULONG blb_sequence; /* Blob page sequence */
ULONG blb_max_sequence; /* Number of data pages */

View File

@ -37,6 +37,7 @@ void BLB_close(Jrd::thread_db*, Jrd::blb*);
Jrd::blb* BLB_create(Jrd::thread_db*, Jrd::jrd_tra*, Jrd::bid*);
Jrd::blb* BLB_create2(Jrd::thread_db*, Jrd::jrd_tra*, Jrd::bid*, USHORT, const UCHAR*);
void BLB_garbage_collect(Jrd::thread_db*, Jrd::RecordStack&, Jrd::RecordStack&, SLONG, Jrd::jrd_rel*);
void BLB_gen_bpb(SSHORT source, SSHORT target, UCHAR sourceCharset, UCHAR targetCharset, Firebird::UCharBuffer& bpb);
void BLB_gen_bpb_from_descs(const dsc*, const dsc*, Firebird::UCharBuffer&);
Jrd::blb* BLB_get_array(Jrd::thread_db*, Jrd::jrd_tra*, const Jrd::bid*, Ods::InternalArrayDesc*);
ULONG BLB_get_data(Jrd::thread_db*, Jrd::blb*, UCHAR*, SLONG, bool = true);
@ -47,7 +48,7 @@ SLONG BLB_lseek(Jrd::blb*, USHORT, SLONG);
void BLB_move(Jrd::thread_db*, dsc*, dsc*, Jrd::jrd_nod*);
Jrd::blb* BLB_open(Jrd::thread_db*, Jrd::jrd_tra*, const Jrd::bid*);
Jrd::blb* BLB_open2(Jrd::thread_db*, Jrd::jrd_tra*, const Jrd::bid*, USHORT, const UCHAR*);
Jrd::blb* BLB_open2(Jrd::thread_db*, Jrd::jrd_tra*, const Jrd::bid*, USHORT, const UCHAR*, bool = false);
void BLB_put_data(Jrd::thread_db*, Jrd::blb*, const UCHAR*, SLONG);
void BLB_put_segment(Jrd::thread_db*, Jrd::blb*, const UCHAR*, USHORT);
void BLB_put_slice(Jrd::thread_db*, Jrd::jrd_tra*, Jrd::bid*, const UCHAR*, USHORT,

View File

@ -1426,6 +1426,8 @@ ULONG DPM_get_blob(thread_db* tdbb,
blob->blb_max_segment = header->blh_max_segment;
blob->blb_level = header->blh_level;
blob->blb_sub_type = header->blh_sub_type;
if (ENCODE_ODS(dbb->dbb_ods_version, dbb->dbb_minor_original) >= ODS_11_1)
blob->blb_charset = header->blh_charset;
// Unless this is the only attachment, don't allow the sequential scan
// of very large blobs to flush pages used by other attachments.
@ -2046,6 +2048,8 @@ RecordNumber DPM_store_blob(thread_db* tdbb, blb* blob, Record* record)
header->blh_length = blob->blb_length;
header->blh_level = blob->blb_level;
header->blh_sub_type = blob->blb_sub_type;
if (ENCODE_ODS(dbb->dbb_ods_version, dbb->dbb_minor_original) >= ODS_11_1)
header->blh_charset = blob->blb_charset;
UCHAR* p = (UCHAR *) header->blh_page;
if (length) {

View File

@ -2035,7 +2035,11 @@ USHORT API_ROUTINE gds__parse_bpb2(USHORT bpb_length,
SSHORT* source,
SSHORT* target,
USHORT* source_interp,
USHORT* target_interp)
USHORT* target_interp,
bool* source_type_specified,
bool* source_interp_specified,
bool* target_type_specified,
bool* target_interp_specified)
{
/**************************************
*
@ -2057,6 +2061,14 @@ USHORT API_ROUTINE gds__parse_bpb2(USHORT bpb_length,
*source_interp = 0;
if (target_interp)
*target_interp = 0;
if (source_type_specified)
*source_type_specified = false;
if (source_interp_specified)
*source_interp_specified = false;
if (target_type_specified)
*target_type_specified = false;
if (target_interp_specified)
*target_interp_specified = false;
if (!bpb_length || !bpb)
return type;
@ -2073,10 +2085,14 @@ USHORT API_ROUTINE gds__parse_bpb2(USHORT bpb_length,
switch (op) {
case isc_bpb_source_type:
*source = (USHORT) gds__vax_integer(p, length);
if (source_type_specified)
*source_type_specified = true;
break;
case isc_bpb_target_type:
*target = (USHORT) gds__vax_integer(p, length);
if (target_type_specified)
*target_type_specified = true;
break;
case isc_bpb_type:
@ -2087,11 +2103,15 @@ USHORT API_ROUTINE gds__parse_bpb2(USHORT bpb_length,
case isc_bpb_source_interp:
if (source_interp)
*source_interp = (USHORT) gds__vax_integer(p, length);
if (source_interp_specified)
*source_interp_specified = true;
break;
case isc_bpb_target_interp:
if (target_interp)
*target_interp = (USHORT) gds__vax_integer(p, length);
if (target_interp_specified)
*target_interp_specified = true;
break;
default:

View File

@ -104,7 +104,7 @@ SLONG API_ROUTINE gds__get_prefix(SSHORT, const TEXT*);
ISC_STATUS API_ROUTINE gds__print_status(const ISC_STATUS*);
USHORT API_ROUTINE gds__parse_bpb(USHORT, const UCHAR*, USHORT*, USHORT*);
USHORT API_ROUTINE gds__parse_bpb2(USHORT, const UCHAR*, SSHORT*, SSHORT*,
USHORT*, USHORT*);
USHORT*, USHORT*, bool* = NULL, bool* = NULL, bool* = NULL, bool* = NULL);
SLONG API_ROUTINE gds__ftof(const SCHAR*, const USHORT length1, SCHAR*,
const USHORT length2);
int API_ROUTINE gds__print_blr(const UCHAR*,

View File

@ -2921,7 +2921,7 @@ ISC_STATUS GDS_OPEN_BLOB2(ISC_STATUS* user_status,
jrd_tra* transaction =
find_transaction(tdbb, *tra_handle, isc_segstr_wrong_db);
blb* blob = BLB_open2(tdbb, transaction, blob_id, bpb_length, bpb);
blb* blob = BLB_open2(tdbb, transaction, blob_id, bpb_length, bpb, true);
*blob_handle = blob;
#ifdef REPLAY_OSRI_API_CALLS_SUBSYSTEM

View File

@ -524,7 +524,8 @@ struct blh {
SLONG blh_count; /* Total number of segments */
SLONG blh_length; /* Total length of data */
USHORT blh_sub_type; /* Blob sub-type */
USHORT blh_unused;
UCHAR blh_charset; // Blob charset (since ODS 11.1)
UCHAR blh_unused;
SLONG blh_page[1]; /* Page vector for blob pages */
};