8
0
mirror of https://github.com/FirebirdSQL/firebird.git synced 2025-01-31 22:43:04 +01:00
firebird-mirror/src/intl/mapgen4.c

417 lines
11 KiB
C
Raw Normal View History

2001-05-23 15:26:42 +02:00
/*
* The contents of this file are subject to the Interbase Public
* License Version 1.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy
* of the License at http://www.Inprise.com/IPL.html
*
* Software distributed under the License is distributed on an
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
* or implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code was created by Inprise Corporation
* and its predecessors. Portions created by Inprise Corporation are
* Copyright (C) Inprise Corporation.
*
* All Rights Reserved.
* Contributor(s): ______________________________________.
*/
#include "firebird.h"
2001-05-23 15:26:42 +02:00
#include "../jrd/ib_stdio.h"
#include <string.h>
#include "../jrd/common.h"
typedef struct {
unsigned int codepoint;
unsigned int equivilant;
unsigned int exists; /* Count of times in input table */
unsigned int not_defined; /* Defined as NOT DEFINED */
char name[80]; /* Name of the character */
} CONVERSION;
typedef struct {
unsigned int high_point;
unsigned int low_point;
unsigned int count;
CONVERSION table[0xFFFF + 1];
} TABLE;
TABLE codepoint_conversion;
TABLE unicode_conversion;
#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD
#define CANT_MAP_CHARACTER 0
void print_condensed_indexed_table(char *name, TABLE * table,
unsigned short replacement);
void print_indexed_table(char *name, TABLE * table,
unsigned short replacement);
void print_direct_table(char *name, TABLE * table);
void declare(unsigned short codepoint, unsigned short unicode, char *name);
main(int argc, char *argv[])
{
IB_FILE *f = NULL;
char line[100];
int i;
int linecount = 0;
int option_euc = 0;
int option_sjis = 0;
int option_condensed = 0;
int usage = 0;
char *filename = NULL;
for (i = 1; i < argc && !usage; i++) {
if (argv[i][0] == '-') {
if (strcmp(argv[i], "-euc") == 0)
option_euc++;
else if (strcmp(argv[i], "-sjis") == 0)
option_sjis++;
else if (strcmp(argv[i], "-condense") == 0)
option_condensed++;
else
usage++;
}
else if (filename)
usage++;
else
filename = argv[i];
}
if (usage) {
ib_fprintf(ib_stderr,
"usage: mapgen4 [-condense | -euc | -sjis] <name of unicode map file>\n");
exit(1);
}
f = ib_fopen(filename, "r");
if (!f) {
ib_fprintf(ib_stderr, "Unable to open file '%s'\n", filename);
exit(1);
};
memset((void *) &codepoint_conversion, 0, sizeof(codepoint_conversion));
memset((void *) &unicode_conversion, 0, sizeof(unicode_conversion));
codepoint_conversion.low_point = 0xFFFF;
unicode_conversion.low_point = 0xFFFF;
ib_printf("/* -------------------------------------------- \n");
ib_printf(" THIS FILE WAS GENERATED BY intl/mapgen4.c\n");
ib_printf(" ");
for (i = 0; i < argc; i++)
ib_printf("%s ", argv[i]);
ib_printf("\n\n");
ib_printf(" Unicode mapping table generated from file %s \n",
filename);
ib_printf("\n");
while (ib_fgets(line, sizeof(line), f)) {
unsigned int unicode;
unsigned int codepoint;
char *p;
/* Strip trailing control codes from the line */
p = &line[strlen(line) - 1];
while (p >= line && (*p == '\r' || *p == '\n' || *p == 26))
*p-- = 0;
ib_printf("%s\n", line);
linecount++;
/* Skip blank lines */
if (line[0] == 0)
continue;
/* Skip comment lines */
if (line[0] == '#')
continue;
p = line;
if (option_sjis) {
unsigned int sjis_point;
if (1 != sscanf(p, "0x%04x\t", &sjis_point)) {
ib_fprintf(ib_stderr, "Problem in input file - line %d\n",
linecount);
exit(1);
};
p = strchr(p, '\t') + 1;
}
codepoint = 0;
if (1 != sscanf(p, "0x%04x\t", &codepoint)) {
ib_fprintf(ib_stderr, "Problem in input file - line %d\n",
linecount);
exit(1);
};
p = strchr(p, '\t') + 1;
if (option_euc)
codepoint += 0x8080;
unicode = 0;
if (0 == sscanf(p, "0x%04x\t", &unicode)) {
/* Conversion is not defined */
codepoint_conversion.table[codepoint].not_defined++;
declare(codepoint, unicode, "NOT DEFINED");
}
else {
/* Find start of comment defining Unicode name */
p = strchr(p, '#');
if (p)
p++; /* Skip comment character */
else
p = "";
declare(codepoint, unicode, p);
}
}
ib_fclose(f);
ib_printf(" -------------------------------------------- */\n");
/* Declare any standard ASCII characters that didn't exist in the table */
for (i = 0; i <= 0x7f; i++)
if (!codepoint_conversion.table[i].exists)
declare(i, i, "ASCII");
ib_printf("\n");
ib_printf("\n");
ib_printf("#ifndef UNICODE_REPLACEMENT_CHARACTER\n");
ib_printf("#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD\n");
ib_printf("#endif\n");
ib_printf("\n");
/* Small tables print with all comments */
if ((codepoint_conversion.high_point < 256) || (!option_condensed))
print_indexed_table("to_unicode", &codepoint_conversion,
UNICODE_REPLACEMENT_CHARACTER);
else
print_condensed_indexed_table("to_unicode", &codepoint_conversion,
UNICODE_REPLACEMENT_CHARACTER);
ib_printf("\n");
ib_printf("\n");
ib_printf("#ifndef CANT_MAP_CHARACTER\n");
ib_printf("#define CANT_MAP_CHARACTER 0\n");
ib_printf("#endif\n");
ib_printf("\n");
/* Small tables print with all comments */
if ((unicode_conversion.count < 256) || (!option_condensed))
print_indexed_table("from_unicode", &unicode_conversion,
CANT_MAP_CHARACTER);
else
print_condensed_indexed_table("from_unicode", &unicode_conversion,
CANT_MAP_CHARACTER);
return 0;
}
void declare(unsigned short codepoint, unsigned short unicode, char *name)
{
if (codepoint < codepoint_conversion.low_point)
codepoint_conversion.low_point = codepoint;
if (codepoint > codepoint_conversion.high_point)
codepoint_conversion.high_point = codepoint;
if (codepoint_conversion.table[codepoint].exists++) {
if (unicode != codepoint_conversion.table[codepoint].equivilant)
ib_fprintf(ib_stderr,
"Error: duplicate unequal mappings for 0x%04x : 0x%04x and 0x%04x\n",
codepoint, unicode,
codepoint_conversion.table[codepoint].equivilant);
else
ib_fprintf(ib_stderr,
"Warning: duplicate entries for codepoint 0x%04x to 0x%04x\n",
codepoint, unicode);
}
else
codepoint_conversion.count++;
codepoint_conversion.table[codepoint].codepoint = codepoint;
strcpy(codepoint_conversion.table[codepoint].name, name);
if (!codepoint_conversion.table[codepoint].not_defined) {
codepoint_conversion.table[codepoint].equivilant = unicode;
if (unicode < unicode_conversion.low_point)
unicode_conversion.low_point = unicode;
if (unicode > unicode_conversion.high_point)
codepoint_conversion.high_point = unicode;
if (!unicode_conversion.table[unicode].exists++) {
unicode_conversion.table[unicode].codepoint = unicode;
if (unicode != UNICODE_REPLACEMENT_CHARACTER) {
unicode_conversion.table[unicode].equivilant = codepoint;
strcpy(unicode_conversion.table[unicode].name, name);
}
}
else {
if (unicode != UNICODE_REPLACEMENT_CHARACTER)
ib_fprintf(ib_stderr,
"Warning: Multiple mappings to Unicode 0x%04x : 0x%04x and 0x%04x\n",
unicode, codepoint,
unicode_conversion.table[unicode].equivilant);
}
};
}
void print_direct_table(char *name, TABLE * table)
{
int i;
ib_printf("static CONST USHORT %s_map[256] = {\n", name);
for (i = 0; i < 256; i++) {
if (table->table[i].exists != 1 || table->table[i].not_defined)
ib_printf("/* %02X */ UNICODE_REPLACEMENT_CHARACTER,\n", i);
else
ib_printf("/* %02X */ 0x%04X /* %45s */,\n",
i, table->table[i].equivilant, table->table[i].name);
}
ib_printf("};\n");
}
void print_indexed_table(char *name, TABLE * table,
unsigned short replacement)
{
unsigned int index;
unsigned int codepoint;
unsigned short upper_byte[256];
int i;
ib_printf("static CONST USHORT FAR_VARIABLE %s_mapping_array[] = {\n",
name);
for (index = 0; index < 256; index++)
if (replacement == UNICODE_REPLACEMENT_CHARACTER)
ib_printf
("/* U+XX%02X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n",
index % 256, index);
else
ib_printf("/* U+XX%02X */\tCANT_MAP_CHARACTER,\t/* %d */\n",
index % 256, index);
memset(upper_byte, 0, sizeof(upper_byte));
codepoint = 0;
while (codepoint < 0xFFFF + 1) {
if (!table->table[codepoint].exists) {
codepoint++;
continue;
};
upper_byte[codepoint / 256] = index;
while ((index % 256) < (codepoint % 256)) {
if (replacement == UNICODE_REPLACEMENT_CHARACTER)
ib_printf
("/* U+%04X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n",
(codepoint & 0xFF00) + (index % 256), index);
else
ib_printf("/* U+%04X */\tCANT_MAP_CHARACTER,\t/* %d */\n",
(codepoint & 0xFF00) + (index % 256), index);
index++;
}
do {
if (!table->table[codepoint].exists) {
if (replacement == UNICODE_REPLACEMENT_CHARACTER)
ib_printf
("/* U+%04X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n",
codepoint, index);
else
ib_printf("/* U+%04X */\tCANT_MAP_CHARACTER,\t/* %d */\n",
codepoint, index);
}
else
ib_printf("/* U+%04X */\t0x%04X\t/* %45s */,/* %d */\n",
codepoint, table->table[codepoint].equivilant,
table->table[codepoint].name, index);
index++;
codepoint++;
}
while (index % 256);
}
ib_printf("\t0 /* END OF MAP TABLE */\n");
ib_printf("};\n");
ib_printf("static CONST USHORT %s_map[256] = {\n", name);
for (i = 0; i < 256; i++)
ib_printf("/* U+%02X-- */\t%d,\n", i, upper_byte[i]);
ib_printf("};\n");
}
void print_condensed_indexed_table(
char *name,
TABLE * table, unsigned short replacement)
{
unsigned int index;
unsigned int codepoint;
unsigned short upper_byte[256];
int i;
ib_printf("static CONST USHORT FAR_VARIABLE %s_mapping_array[] = {\n",
name);
ib_printf("\n");
ib_printf("/* %5d to %5d */\n", 0, 255);
for (index = 0; index < 256; index++) {
if (replacement)
ib_printf("0x%04X,", replacement);
else
ib_printf("0, ");
if (index % 8 == 7)
ib_printf("\n");
else
ib_printf(" ");
}
memset(upper_byte, 0, sizeof(upper_byte));
codepoint = 0;
while (codepoint < 0xFFFF + 1) {
if (!table->table[codepoint].exists) {
codepoint++;
continue;
};
upper_byte[codepoint / 256] = index;
ib_printf("\n");
ib_printf("/* %5d to %5d */\n", index, index + 255);
while ((index % 256) < (codepoint % 256)) {
if (replacement)
ib_printf("0x%04X,", replacement);
else
ib_printf("0, ");
if (index % 8 == 7)
ib_printf("\n");
else
ib_printf(" ");
index++;
}
do {
if (!table->table[codepoint].exists) {
if (replacement)
ib_printf("0x%04X,", replacement);
else
ib_printf("0, ");
}
else
ib_printf("0x%04X,", table->table[codepoint].equivilant);
if (index % 8 == 7)
ib_printf("\n");
else
ib_printf(" ");
index++;
codepoint++;
}
while (index % 256);
}
ib_printf("\t0 /* END OF MAP TABLE */\n");
ib_printf("};\n");
ib_printf("static CONST USHORT %s_map[256] = {\n", name);
for (i = 0; i < 256; i++)
ib_printf("%d,\n", upper_byte[i]);
ib_printf("};\n");
}