From e1cb23f9860ab8f185bcc228c85ce1e60e97408a Mon Sep 17 00:00:00 2001 From: dimitr Date: Wed, 7 Jul 2010 17:39:47 +0000 Subject: [PATCH] Backported CORE-2122: Translation of large text BLOB between UNICODE_FSS (UTF8) and other charsets. --- src/jrd/filters.cpp | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/src/jrd/filters.cpp b/src/jrd/filters.cpp index 97dab88543..8e2a0427cd 100644 --- a/src/jrd/filters.cpp +++ b/src/jrd/filters.cpp @@ -909,7 +909,7 @@ ISC_STATUS filter_transliterate_text(USHORT action, BlobControl* control) /* Always keep a minimal count of bytes in the input buffer, * to prevent the case of truncated characters. */ - if (length < 3) + if (length < 4) can_use_more = true; } @@ -920,13 +920,14 @@ ISC_STATUS filter_transliterate_text(USHORT action, BlobControl* control) (We don't want to blindly keep topping off this buffer if we already have more than we can use) */ - if (!length || can_use_more - && (aux->ctlaux_source_blob_status == isc_segment)) + USHORT bytes_read_from_source = 0; + + if (!length || can_use_more) +// && (aux->ctlaux_source_blob_status == isc_segment)) { // Get a segment, or partial segment, from the source // into the temporary buffer - USHORT bytes_read_from_source = 0; status = caller(isc_blob_filter_get_segment, control, (USHORT) MIN((aux->ctlaux_buffer1_len - length), control->ctl_buffer_length), @@ -960,10 +961,17 @@ ISC_STATUS filter_transliterate_text(USHORT action, BlobControl* control) return isc_transliteration_failed; } - if (err_position < length) { - /* Bad input *might* be due to input buffer truncation in the middle - of a character, so shuffle bytes, add some more data, and try again. - If we already tried that then it's really some bad input */ + if (err_position == 0 && bytes_read_from_source != 0 && length != 0 && length < 4) { + // We don't have sufficient bytes to always transliterate a character. + // A bad input on the first character is unrecoverable, so we cache + // the bytes for the next read. + result_length = 0; + } + else if (err_position < length) + { + // Bad input *might* be due to input buffer truncation in the middle + // of a character, so shuffle bytes, add some more data, and try again. + // If we already tried that then it's really some bad input. if (err_position == 0) return isc_transliteration_failed;