2001-05-23 15:26:42 +02:00
|
|
|
/*
|
|
|
|
* The contents of this file are subject to the Interbase Public
|
|
|
|
* License Version 1.0 (the "License"); you may not use this file
|
|
|
|
* except in compliance with the License. You may obtain a copy
|
|
|
|
* of the License at http://www.Inprise.com/IPL.html
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an
|
|
|
|
* "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express
|
|
|
|
* or implied. See the License for the specific language governing
|
|
|
|
* rights and limitations under the License.
|
|
|
|
*
|
|
|
|
* The Original Code was created by Inprise Corporation
|
|
|
|
* and its predecessors. Portions created by Inprise Corporation are
|
|
|
|
* Copyright (C) Inprise Corporation.
|
|
|
|
*
|
|
|
|
* All Rights Reserved.
|
|
|
|
* Contributor(s): ______________________________________.
|
|
|
|
*/
|
|
|
|
|
2001-07-30 01:43:24 +02:00
|
|
|
#include "firebird.h"
|
2001-05-23 15:26:42 +02:00
|
|
|
#include "../jrd/ib_stdio.h"
|
|
|
|
#include <string.h>
|
|
|
|
#include "../jrd/common.h"
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
unsigned int codepoint;
|
|
|
|
unsigned int equivilant;
|
|
|
|
unsigned int exists; /* Count of times in input table */
|
|
|
|
unsigned int not_defined; /* Defined as NOT DEFINED */
|
|
|
|
char name[80]; /* Name of the character */
|
|
|
|
} CONVERSION;
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
unsigned int high_point;
|
|
|
|
unsigned int low_point;
|
|
|
|
unsigned int count;
|
|
|
|
CONVERSION table[0xFFFF + 1];
|
|
|
|
} TABLE;
|
|
|
|
|
|
|
|
TABLE codepoint_conversion;
|
|
|
|
TABLE unicode_conversion;
|
|
|
|
|
|
|
|
#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD
|
|
|
|
#define CANT_MAP_CHARACTER 0
|
|
|
|
|
|
|
|
void print_condensed_indexed_table(char *name, TABLE * table,
|
|
|
|
unsigned short replacement);
|
|
|
|
void print_indexed_table(char *name, TABLE * table,
|
|
|
|
unsigned short replacement);
|
|
|
|
void print_direct_table(char *name, TABLE * table);
|
|
|
|
void declare(unsigned short codepoint, unsigned short unicode, char *name);
|
|
|
|
|
|
|
|
main(int argc, char *argv[])
|
|
|
|
{
|
|
|
|
IB_FILE *f = NULL;
|
|
|
|
char line[100];
|
|
|
|
int i;
|
|
|
|
int linecount = 0;
|
|
|
|
int option_euc = 0;
|
|
|
|
int option_sjis = 0;
|
|
|
|
int option_condensed = 0;
|
|
|
|
int usage = 0;
|
|
|
|
char *filename = NULL;
|
|
|
|
|
|
|
|
for (i = 1; i < argc && !usage; i++) {
|
|
|
|
if (argv[i][0] == '-') {
|
|
|
|
if (strcmp(argv[i], "-euc") == 0)
|
|
|
|
option_euc++;
|
|
|
|
else if (strcmp(argv[i], "-sjis") == 0)
|
|
|
|
option_sjis++;
|
|
|
|
else if (strcmp(argv[i], "-condense") == 0)
|
|
|
|
option_condensed++;
|
|
|
|
else
|
|
|
|
usage++;
|
|
|
|
}
|
|
|
|
else if (filename)
|
|
|
|
usage++;
|
|
|
|
else
|
|
|
|
filename = argv[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
if (usage) {
|
|
|
|
ib_fprintf(ib_stderr,
|
|
|
|
"usage: mapgen4 [-condense | -euc | -sjis] <name of unicode map file>\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
f = ib_fopen(filename, "r");
|
|
|
|
if (!f) {
|
|
|
|
ib_fprintf(ib_stderr, "Unable to open file '%s'\n", filename);
|
|
|
|
exit(1);
|
|
|
|
};
|
|
|
|
|
|
|
|
memset((void *) &codepoint_conversion, 0, sizeof(codepoint_conversion));
|
|
|
|
memset((void *) &unicode_conversion, 0, sizeof(unicode_conversion));
|
|
|
|
codepoint_conversion.low_point = 0xFFFF;
|
|
|
|
unicode_conversion.low_point = 0xFFFF;
|
|
|
|
|
|
|
|
ib_printf("/* -------------------------------------------- \n");
|
|
|
|
ib_printf(" THIS FILE WAS GENERATED BY intl/mapgen4.c\n");
|
|
|
|
ib_printf(" ");
|
|
|
|
for (i = 0; i < argc; i++)
|
|
|
|
ib_printf("%s ", argv[i]);
|
|
|
|
ib_printf("\n\n");
|
|
|
|
ib_printf(" Unicode mapping table generated from file %s \n",
|
|
|
|
filename);
|
|
|
|
ib_printf("\n");
|
|
|
|
|
|
|
|
|
|
|
|
while (ib_fgets(line, sizeof(line), f)) {
|
|
|
|
unsigned int unicode;
|
|
|
|
unsigned int codepoint;
|
|
|
|
char *p;
|
|
|
|
|
|
|
|
/* Strip trailing control codes from the line */
|
|
|
|
p = &line[strlen(line) - 1];
|
|
|
|
while (p >= line && (*p == '\r' || *p == '\n' || *p == 26))
|
|
|
|
*p-- = 0;
|
|
|
|
|
|
|
|
ib_printf("%s\n", line);
|
|
|
|
linecount++;
|
|
|
|
/* Skip blank lines */
|
|
|
|
if (line[0] == 0)
|
|
|
|
continue;
|
|
|
|
/* Skip comment lines */
|
|
|
|
if (line[0] == '#')
|
|
|
|
continue;
|
|
|
|
p = line;
|
|
|
|
|
|
|
|
if (option_sjis) {
|
|
|
|
unsigned int sjis_point;
|
|
|
|
if (1 != sscanf(p, "0x%04x\t", &sjis_point)) {
|
|
|
|
ib_fprintf(ib_stderr, "Problem in input file - line %d\n",
|
|
|
|
linecount);
|
|
|
|
exit(1);
|
|
|
|
};
|
|
|
|
p = strchr(p, '\t') + 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
codepoint = 0;
|
|
|
|
if (1 != sscanf(p, "0x%04x\t", &codepoint)) {
|
|
|
|
ib_fprintf(ib_stderr, "Problem in input file - line %d\n",
|
|
|
|
linecount);
|
|
|
|
exit(1);
|
|
|
|
};
|
|
|
|
p = strchr(p, '\t') + 1;
|
|
|
|
|
|
|
|
if (option_euc)
|
|
|
|
codepoint += 0x8080;
|
|
|
|
|
|
|
|
unicode = 0;
|
|
|
|
if (0 == sscanf(p, "0x%04x\t", &unicode)) {
|
|
|
|
/* Conversion is not defined */
|
|
|
|
codepoint_conversion.table[codepoint].not_defined++;
|
|
|
|
declare(codepoint, unicode, "NOT DEFINED");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* Find start of comment defining Unicode name */
|
|
|
|
p = strchr(p, '#');
|
|
|
|
if (p)
|
|
|
|
p++; /* Skip comment character */
|
|
|
|
else
|
|
|
|
p = "";
|
|
|
|
declare(codepoint, unicode, p);
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
ib_fclose(f);
|
|
|
|
ib_printf(" -------------------------------------------- */\n");
|
|
|
|
|
|
|
|
/* Declare any standard ASCII characters that didn't exist in the table */
|
|
|
|
for (i = 0; i <= 0x7f; i++)
|
|
|
|
if (!codepoint_conversion.table[i].exists)
|
|
|
|
declare(i, i, "ASCII");
|
|
|
|
|
|
|
|
ib_printf("\n");
|
|
|
|
ib_printf("\n");
|
|
|
|
ib_printf("#ifndef UNICODE_REPLACEMENT_CHARACTER\n");
|
|
|
|
ib_printf("#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD\n");
|
|
|
|
ib_printf("#endif\n");
|
|
|
|
ib_printf("\n");
|
|
|
|
|
|
|
|
/* Small tables print with all comments */
|
|
|
|
if ((codepoint_conversion.high_point < 256) || (!option_condensed))
|
|
|
|
print_indexed_table("to_unicode", &codepoint_conversion,
|
|
|
|
UNICODE_REPLACEMENT_CHARACTER);
|
|
|
|
else
|
|
|
|
print_condensed_indexed_table("to_unicode", &codepoint_conversion,
|
|
|
|
UNICODE_REPLACEMENT_CHARACTER);
|
|
|
|
|
|
|
|
|
|
|
|
ib_printf("\n");
|
|
|
|
ib_printf("\n");
|
|
|
|
ib_printf("#ifndef CANT_MAP_CHARACTER\n");
|
|
|
|
ib_printf("#define CANT_MAP_CHARACTER 0\n");
|
|
|
|
ib_printf("#endif\n");
|
|
|
|
ib_printf("\n");
|
|
|
|
|
|
|
|
/* Small tables print with all comments */
|
|
|
|
if ((unicode_conversion.count < 256) || (!option_condensed))
|
|
|
|
print_indexed_table("from_unicode", &unicode_conversion,
|
|
|
|
CANT_MAP_CHARACTER);
|
|
|
|
else
|
|
|
|
print_condensed_indexed_table("from_unicode", &unicode_conversion,
|
|
|
|
CANT_MAP_CHARACTER);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
void declare(unsigned short codepoint, unsigned short unicode, char *name)
|
|
|
|
{
|
|
|
|
if (codepoint < codepoint_conversion.low_point)
|
|
|
|
codepoint_conversion.low_point = codepoint;
|
|
|
|
if (codepoint > codepoint_conversion.high_point)
|
|
|
|
codepoint_conversion.high_point = codepoint;
|
|
|
|
|
|
|
|
if (codepoint_conversion.table[codepoint].exists++) {
|
|
|
|
if (unicode != codepoint_conversion.table[codepoint].equivilant)
|
|
|
|
ib_fprintf(ib_stderr,
|
|
|
|
"Error: duplicate unequal mappings for 0x%04x : 0x%04x and 0x%04x\n",
|
|
|
|
codepoint, unicode,
|
|
|
|
codepoint_conversion.table[codepoint].equivilant);
|
|
|
|
else
|
|
|
|
ib_fprintf(ib_stderr,
|
|
|
|
"Warning: duplicate entries for codepoint 0x%04x to 0x%04x\n",
|
|
|
|
codepoint, unicode);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
codepoint_conversion.count++;
|
|
|
|
|
|
|
|
codepoint_conversion.table[codepoint].codepoint = codepoint;
|
|
|
|
strcpy(codepoint_conversion.table[codepoint].name, name);
|
|
|
|
|
|
|
|
if (!codepoint_conversion.table[codepoint].not_defined) {
|
|
|
|
codepoint_conversion.table[codepoint].equivilant = unicode;
|
|
|
|
|
|
|
|
if (unicode < unicode_conversion.low_point)
|
|
|
|
unicode_conversion.low_point = unicode;
|
|
|
|
if (unicode > unicode_conversion.high_point)
|
|
|
|
codepoint_conversion.high_point = unicode;
|
|
|
|
if (!unicode_conversion.table[unicode].exists++) {
|
|
|
|
unicode_conversion.table[unicode].codepoint = unicode;
|
|
|
|
if (unicode != UNICODE_REPLACEMENT_CHARACTER) {
|
|
|
|
unicode_conversion.table[unicode].equivilant = codepoint;
|
|
|
|
strcpy(unicode_conversion.table[unicode].name, name);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (unicode != UNICODE_REPLACEMENT_CHARACTER)
|
|
|
|
ib_fprintf(ib_stderr,
|
|
|
|
"Warning: Multiple mappings to Unicode 0x%04x : 0x%04x and 0x%04x\n",
|
|
|
|
unicode, codepoint,
|
|
|
|
unicode_conversion.table[unicode].equivilant);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void print_direct_table(char *name, TABLE * table)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ib_printf("static CONST USHORT %s_map[256] = {\n", name);
|
|
|
|
for (i = 0; i < 256; i++) {
|
|
|
|
if (table->table[i].exists != 1 || table->table[i].not_defined)
|
|
|
|
ib_printf("/* %02X */ UNICODE_REPLACEMENT_CHARACTER,\n", i);
|
|
|
|
else
|
|
|
|
ib_printf("/* %02X */ 0x%04X /* %45s */,\n",
|
|
|
|
i, table->table[i].equivilant, table->table[i].name);
|
|
|
|
}
|
|
|
|
ib_printf("};\n");
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void print_indexed_table(char *name, TABLE * table,
|
|
|
|
unsigned short replacement)
|
|
|
|
{
|
|
|
|
unsigned int index;
|
|
|
|
unsigned int codepoint;
|
|
|
|
unsigned short upper_byte[256];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ib_printf("static CONST USHORT FAR_VARIABLE %s_mapping_array[] = {\n",
|
|
|
|
name);
|
|
|
|
for (index = 0; index < 256; index++)
|
|
|
|
if (replacement == UNICODE_REPLACEMENT_CHARACTER)
|
|
|
|
ib_printf
|
|
|
|
("/* U+XX%02X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n",
|
|
|
|
index % 256, index);
|
|
|
|
else
|
|
|
|
ib_printf("/* U+XX%02X */\tCANT_MAP_CHARACTER,\t/* %d */\n",
|
|
|
|
index % 256, index);
|
|
|
|
|
|
|
|
memset(upper_byte, 0, sizeof(upper_byte));
|
|
|
|
codepoint = 0;
|
|
|
|
while (codepoint < 0xFFFF + 1) {
|
|
|
|
if (!table->table[codepoint].exists) {
|
|
|
|
codepoint++;
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
upper_byte[codepoint / 256] = index;
|
|
|
|
while ((index % 256) < (codepoint % 256)) {
|
|
|
|
if (replacement == UNICODE_REPLACEMENT_CHARACTER)
|
|
|
|
ib_printf
|
|
|
|
("/* U+%04X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n",
|
|
|
|
(codepoint & 0xFF00) + (index % 256), index);
|
|
|
|
else
|
|
|
|
ib_printf("/* U+%04X */\tCANT_MAP_CHARACTER,\t/* %d */\n",
|
|
|
|
(codepoint & 0xFF00) + (index % 256), index);
|
|
|
|
index++;
|
|
|
|
}
|
|
|
|
do {
|
|
|
|
if (!table->table[codepoint].exists) {
|
|
|
|
if (replacement == UNICODE_REPLACEMENT_CHARACTER)
|
|
|
|
ib_printf
|
|
|
|
("/* U+%04X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n",
|
|
|
|
codepoint, index);
|
|
|
|
else
|
|
|
|
ib_printf("/* U+%04X */\tCANT_MAP_CHARACTER,\t/* %d */\n",
|
|
|
|
codepoint, index);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
ib_printf("/* U+%04X */\t0x%04X\t/* %45s */,/* %d */\n",
|
|
|
|
codepoint, table->table[codepoint].equivilant,
|
|
|
|
table->table[codepoint].name, index);
|
|
|
|
index++;
|
|
|
|
codepoint++;
|
|
|
|
}
|
|
|
|
while (index % 256);
|
|
|
|
}
|
|
|
|
ib_printf("\t0 /* END OF MAP TABLE */\n");
|
|
|
|
ib_printf("};\n");
|
|
|
|
|
|
|
|
ib_printf("static CONST USHORT %s_map[256] = {\n", name);
|
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
ib_printf("/* U+%02X-- */\t%d,\n", i, upper_byte[i]);
|
|
|
|
ib_printf("};\n");
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void print_condensed_indexed_table(
|
|
|
|
char *name,
|
|
|
|
TABLE * table, unsigned short replacement)
|
|
|
|
{
|
|
|
|
unsigned int index;
|
|
|
|
unsigned int codepoint;
|
|
|
|
unsigned short upper_byte[256];
|
|
|
|
int i;
|
|
|
|
|
|
|
|
ib_printf("static CONST USHORT FAR_VARIABLE %s_mapping_array[] = {\n",
|
|
|
|
name);
|
|
|
|
|
|
|
|
ib_printf("\n");
|
|
|
|
ib_printf("/* %5d to %5d */\n", 0, 255);
|
|
|
|
for (index = 0; index < 256; index++) {
|
|
|
|
if (replacement)
|
|
|
|
ib_printf("0x%04X,", replacement);
|
|
|
|
else
|
|
|
|
ib_printf("0, ");
|
|
|
|
if (index % 8 == 7)
|
|
|
|
ib_printf("\n");
|
|
|
|
else
|
|
|
|
ib_printf(" ");
|
|
|
|
}
|
|
|
|
|
|
|
|
memset(upper_byte, 0, sizeof(upper_byte));
|
|
|
|
codepoint = 0;
|
|
|
|
while (codepoint < 0xFFFF + 1) {
|
|
|
|
if (!table->table[codepoint].exists) {
|
|
|
|
codepoint++;
|
|
|
|
continue;
|
|
|
|
};
|
|
|
|
upper_byte[codepoint / 256] = index;
|
|
|
|
|
|
|
|
ib_printf("\n");
|
|
|
|
ib_printf("/* %5d to %5d */\n", index, index + 255);
|
|
|
|
|
|
|
|
while ((index % 256) < (codepoint % 256)) {
|
|
|
|
if (replacement)
|
|
|
|
ib_printf("0x%04X,", replacement);
|
|
|
|
else
|
|
|
|
ib_printf("0, ");
|
|
|
|
if (index % 8 == 7)
|
|
|
|
ib_printf("\n");
|
|
|
|
else
|
|
|
|
ib_printf(" ");
|
|
|
|
index++;
|
|
|
|
}
|
|
|
|
do {
|
|
|
|
if (!table->table[codepoint].exists) {
|
|
|
|
if (replacement)
|
|
|
|
ib_printf("0x%04X,", replacement);
|
|
|
|
else
|
|
|
|
ib_printf("0, ");
|
|
|
|
}
|
|
|
|
else
|
|
|
|
ib_printf("0x%04X,", table->table[codepoint].equivilant);
|
|
|
|
if (index % 8 == 7)
|
|
|
|
ib_printf("\n");
|
|
|
|
else
|
|
|
|
ib_printf(" ");
|
|
|
|
index++;
|
|
|
|
codepoint++;
|
|
|
|
}
|
|
|
|
while (index % 256);
|
|
|
|
}
|
|
|
|
ib_printf("\t0 /* END OF MAP TABLE */\n");
|
|
|
|
ib_printf("};\n");
|
|
|
|
|
|
|
|
ib_printf("static CONST USHORT %s_map[256] = {\n", name);
|
|
|
|
for (i = 0; i < 256; i++)
|
|
|
|
ib_printf("%d,\n", upper_byte[i]);
|
|
|
|
ib_printf("};\n");
|
|
|
|
|
|
|
|
}
|