/* * The contents of this file are subject to the Interbase Public * License Version 1.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy * of the License at http://www.Inprise.com/IPL.html * * Software distributed under the License is distributed on an * "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express * or implied. See the License for the specific language governing * rights and limitations under the License. * * The Original Code was created by Inprise Corporation * and its predecessors. Portions created by Inprise Corporation are * Copyright (C) Inprise Corporation. * * All Rights Reserved. * Contributor(s): ______________________________________. */ #include "firebird.h" #include "../jrd/ib_stdio.h" #include #include "../jrd/common.h" typedef struct { unsigned int codepoint; unsigned int equivilant; unsigned int exists; /* Count of times in input table */ unsigned int not_defined; /* Defined as NOT DEFINED */ char name[80]; /* Name of the character */ } CONVERSION; typedef struct { unsigned int high_point; unsigned int low_point; unsigned int count; CONVERSION table[0xFFFF + 1]; } TABLE; TABLE codepoint_conversion; TABLE unicode_conversion; #define UNICODE_REPLACEMENT_CHARACTER 0xFFFD #define CANT_MAP_CHARACTER 0 void print_condensed_indexed_table(char *name, TABLE * table, unsigned short replacement); void print_indexed_table(char *name, TABLE * table, unsigned short replacement); void print_direct_table(char *name, TABLE * table); void declare(unsigned short codepoint, unsigned short unicode, char *name); main(int argc, char *argv[]) { IB_FILE *f = NULL; char line[100]; int i; int linecount = 0; int option_euc = 0; int option_sjis = 0; int option_condensed = 0; int usage = 0; char *filename = NULL; for (i = 1; i < argc && !usage; i++) { if (argv[i][0] == '-') { if (strcmp(argv[i], "-euc") == 0) option_euc++; else if (strcmp(argv[i], "-sjis") == 0) option_sjis++; else if (strcmp(argv[i], "-condense") == 0) option_condensed++; else usage++; } else if (filename) usage++; else filename = argv[i]; } if (usage) { ib_fprintf(ib_stderr, "usage: mapgen4 [-condense | -euc | -sjis] \n"); exit(1); } f = ib_fopen(filename, "r"); if (!f) { ib_fprintf(ib_stderr, "Unable to open file '%s'\n", filename); exit(1); }; memset((void *) &codepoint_conversion, 0, sizeof(codepoint_conversion)); memset((void *) &unicode_conversion, 0, sizeof(unicode_conversion)); codepoint_conversion.low_point = 0xFFFF; unicode_conversion.low_point = 0xFFFF; ib_printf("/* -------------------------------------------- \n"); ib_printf(" THIS FILE WAS GENERATED BY intl/mapgen4.c\n"); ib_printf(" "); for (i = 0; i < argc; i++) ib_printf("%s ", argv[i]); ib_printf("\n\n"); ib_printf(" Unicode mapping table generated from file %s \n", filename); ib_printf("\n"); while (ib_fgets(line, sizeof(line), f)) { unsigned int unicode; unsigned int codepoint; char *p; /* Strip trailing control codes from the line */ p = &line[strlen(line) - 1]; while (p >= line && (*p == '\r' || *p == '\n' || *p == 26)) *p-- = 0; ib_printf("%s\n", line); linecount++; /* Skip blank lines */ if (line[0] == 0) continue; /* Skip comment lines */ if (line[0] == '#') continue; p = line; if (option_sjis) { unsigned int sjis_point; if (1 != sscanf(p, "0x%04x\t", &sjis_point)) { ib_fprintf(ib_stderr, "Problem in input file - line %d\n", linecount); exit(1); }; p = strchr(p, '\t') + 1; } codepoint = 0; if (1 != sscanf(p, "0x%04x\t", &codepoint)) { ib_fprintf(ib_stderr, "Problem in input file - line %d\n", linecount); exit(1); }; p = strchr(p, '\t') + 1; if (option_euc) codepoint += 0x8080; unicode = 0; if (0 == sscanf(p, "0x%04x\t", &unicode)) { /* Conversion is not defined */ codepoint_conversion.table[codepoint].not_defined++; declare(codepoint, unicode, "NOT DEFINED"); } else { /* Find start of comment defining Unicode name */ p = strchr(p, '#'); if (p) p++; /* Skip comment character */ else p = ""; declare(codepoint, unicode, p); } } ib_fclose(f); ib_printf(" -------------------------------------------- */\n"); /* Declare any standard ASCII characters that didn't exist in the table */ for (i = 0; i <= 0x7f; i++) if (!codepoint_conversion.table[i].exists) declare(i, i, "ASCII"); ib_printf("\n"); ib_printf("\n"); ib_printf("#ifndef UNICODE_REPLACEMENT_CHARACTER\n"); ib_printf("#define UNICODE_REPLACEMENT_CHARACTER 0xFFFD\n"); ib_printf("#endif\n"); ib_printf("\n"); /* Small tables print with all comments */ if ((codepoint_conversion.high_point < 256) || (!option_condensed)) print_indexed_table("to_unicode", &codepoint_conversion, UNICODE_REPLACEMENT_CHARACTER); else print_condensed_indexed_table("to_unicode", &codepoint_conversion, UNICODE_REPLACEMENT_CHARACTER); ib_printf("\n"); ib_printf("\n"); ib_printf("#ifndef CANT_MAP_CHARACTER\n"); ib_printf("#define CANT_MAP_CHARACTER 0\n"); ib_printf("#endif\n"); ib_printf("\n"); /* Small tables print with all comments */ if ((unicode_conversion.count < 256) || (!option_condensed)) print_indexed_table("from_unicode", &unicode_conversion, CANT_MAP_CHARACTER); else print_condensed_indexed_table("from_unicode", &unicode_conversion, CANT_MAP_CHARACTER); return 0; } void declare(unsigned short codepoint, unsigned short unicode, char *name) { if (codepoint < codepoint_conversion.low_point) codepoint_conversion.low_point = codepoint; if (codepoint > codepoint_conversion.high_point) codepoint_conversion.high_point = codepoint; if (codepoint_conversion.table[codepoint].exists++) { if (unicode != codepoint_conversion.table[codepoint].equivilant) ib_fprintf(ib_stderr, "Error: duplicate unequal mappings for 0x%04x : 0x%04x and 0x%04x\n", codepoint, unicode, codepoint_conversion.table[codepoint].equivilant); else ib_fprintf(ib_stderr, "Warning: duplicate entries for codepoint 0x%04x to 0x%04x\n", codepoint, unicode); } else codepoint_conversion.count++; codepoint_conversion.table[codepoint].codepoint = codepoint; strcpy(codepoint_conversion.table[codepoint].name, name); if (!codepoint_conversion.table[codepoint].not_defined) { codepoint_conversion.table[codepoint].equivilant = unicode; if (unicode < unicode_conversion.low_point) unicode_conversion.low_point = unicode; if (unicode > unicode_conversion.high_point) codepoint_conversion.high_point = unicode; if (!unicode_conversion.table[unicode].exists++) { unicode_conversion.table[unicode].codepoint = unicode; if (unicode != UNICODE_REPLACEMENT_CHARACTER) { unicode_conversion.table[unicode].equivilant = codepoint; strcpy(unicode_conversion.table[unicode].name, name); } } else { if (unicode != UNICODE_REPLACEMENT_CHARACTER) ib_fprintf(ib_stderr, "Warning: Multiple mappings to Unicode 0x%04x : 0x%04x and 0x%04x\n", unicode, codepoint, unicode_conversion.table[unicode].equivilant); } }; } void print_direct_table(char *name, TABLE * table) { int i; ib_printf("static CONST USHORT %s_map[256] = {\n", name); for (i = 0; i < 256; i++) { if (table->table[i].exists != 1 || table->table[i].not_defined) ib_printf("/* %02X */ UNICODE_REPLACEMENT_CHARACTER,\n", i); else ib_printf("/* %02X */ 0x%04X /* %45s */,\n", i, table->table[i].equivilant, table->table[i].name); } ib_printf("};\n"); } void print_indexed_table(char *name, TABLE * table, unsigned short replacement) { unsigned int index; unsigned int codepoint; unsigned short upper_byte[256]; int i; ib_printf("static CONST USHORT FAR_VARIABLE %s_mapping_array[] = {\n", name); for (index = 0; index < 256; index++) if (replacement == UNICODE_REPLACEMENT_CHARACTER) ib_printf ("/* U+XX%02X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n", index % 256, index); else ib_printf("/* U+XX%02X */\tCANT_MAP_CHARACTER,\t/* %d */\n", index % 256, index); memset(upper_byte, 0, sizeof(upper_byte)); codepoint = 0; while (codepoint < 0xFFFF + 1) { if (!table->table[codepoint].exists) { codepoint++; continue; }; upper_byte[codepoint / 256] = index; while ((index % 256) < (codepoint % 256)) { if (replacement == UNICODE_REPLACEMENT_CHARACTER) ib_printf ("/* U+%04X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n", (codepoint & 0xFF00) + (index % 256), index); else ib_printf("/* U+%04X */\tCANT_MAP_CHARACTER,\t/* %d */\n", (codepoint & 0xFF00) + (index % 256), index); index++; } do { if (!table->table[codepoint].exists) { if (replacement == UNICODE_REPLACEMENT_CHARACTER) ib_printf ("/* U+%04X */\tUNICODE_REPLACEMENT_CHARACTER,\t/* %d */\n", codepoint, index); else ib_printf("/* U+%04X */\tCANT_MAP_CHARACTER,\t/* %d */\n", codepoint, index); } else ib_printf("/* U+%04X */\t0x%04X\t/* %45s */,/* %d */\n", codepoint, table->table[codepoint].equivilant, table->table[codepoint].name, index); index++; codepoint++; } while (index % 256); } ib_printf("\t0 /* END OF MAP TABLE */\n"); ib_printf("};\n"); ib_printf("static CONST USHORT %s_map[256] = {\n", name); for (i = 0; i < 256; i++) ib_printf("/* U+%02X-- */\t%d,\n", i, upper_byte[i]); ib_printf("};\n"); } void print_condensed_indexed_table( char *name, TABLE * table, unsigned short replacement) { unsigned int index; unsigned int codepoint; unsigned short upper_byte[256]; int i; ib_printf("static CONST USHORT FAR_VARIABLE %s_mapping_array[] = {\n", name); ib_printf("\n"); ib_printf("/* %5d to %5d */\n", 0, 255); for (index = 0; index < 256; index++) { if (replacement) ib_printf("0x%04X,", replacement); else ib_printf("0, "); if (index % 8 == 7) ib_printf("\n"); else ib_printf(" "); } memset(upper_byte, 0, sizeof(upper_byte)); codepoint = 0; while (codepoint < 0xFFFF + 1) { if (!table->table[codepoint].exists) { codepoint++; continue; }; upper_byte[codepoint / 256] = index; ib_printf("\n"); ib_printf("/* %5d to %5d */\n", index, index + 255); while ((index % 256) < (codepoint % 256)) { if (replacement) ib_printf("0x%04X,", replacement); else ib_printf("0, "); if (index % 8 == 7) ib_printf("\n"); else ib_printf(" "); index++; } do { if (!table->table[codepoint].exists) { if (replacement) ib_printf("0x%04X,", replacement); else ib_printf("0, "); } else ib_printf("0x%04X,", table->table[codepoint].equivilant); if (index % 8 == 7) ib_printf("\n"); else ib_printf(" "); index++; codepoint++; } while (index % 256); } ib_printf("\t0 /* END OF MAP TABLE */\n"); ib_printf("};\n"); ib_printf("static CONST USHORT %s_map[256] = {\n", name); for (i = 0; i < 256; i++) ib_printf("%d,\n", upper_byte[i]); ib_printf("};\n"); }