258 lines
10 KiB
C
258 lines
10 KiB
C
/*===========================================================================
|
|
nametab.h
|
|
this header contains ASCII and UTF-8 tables for qualified
|
|
name checking + macros for comparing multibyte UTF-8 sequences:
|
|
UTF8_GET_NAMING2 and UTF8_GET_NAMING3
|
|
|
|
UTF8_GET_NAMING... and UTF-8 tables are stolen from EXPAT
|
|
note: DO NOT increment parameters in these macro calls i.e.
|
|
ISMAPCH(whitespace, *c++) will return invalid value
|
|
|
|
memory usage:
|
|
|
|
namingBitmap (16x80): 1280 bytes
|
|
nmstrtPages & namePages (2x8x32) 512 bytes
|
|
4 ascii tables (4x32) 128 bytes
|
|
---------------------------------------------------
|
|
total 1920 bytes
|
|
|
|
TODO: fix ascii tables (separate to pages to save space)
|
|
|
|
see parsifal.h for copyright info
|
|
===========================================================================*/
|
|
|
|
#ifndef NAMETAB__H
|
|
#define NAMETAB__H
|
|
|
|
#include "xmlcfg.h" /* for UINT32 */
|
|
|
|
/* ascii tables generated by GENMAPS.C */
|
|
static const XMLCH nameStartAscii[32] = {
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x04,
|
|
0xFE, 0xFF, 0xFF, 0x87,
|
|
0xFE, 0xFF, 0xFF, 0x07,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00
|
|
}; /* ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_: */
|
|
|
|
static const XMLCH nameAscii[32] = {
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x60, 0xFF, 0x07,
|
|
0xFE, 0xFF, 0xFF, 0x87,
|
|
0xFE, 0xFF, 0xFF, 0x07,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00
|
|
}; /* ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_:0123456789.- */
|
|
|
|
static const XMLCH whitespace[32] = {
|
|
0x00, 0x26, 0x00, 0x00,
|
|
0x01, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00
|
|
};
|
|
|
|
static const XMLCH illByte[32] = {
|
|
0xFF, 0xD9, 0xFF, 0xFF,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00
|
|
}; /* from \0x0 to \0x1f excluding whitespace chars 0x9, 0xA, 0xD */
|
|
|
|
#define utf8_isName2(c) (UTF8_GET_NAMING2(namePages, (c)))
|
|
#define utf8_isName3(c) (UTF8_GET_NAMING3(namePages, (c)))
|
|
#define utf8_isNmstrt2(c) (UTF8_GET_NAMING2(nmstrtPages, (c)))
|
|
#define utf8_isNmstrt3(c) (UTF8_GET_NAMING3(nmstrtPages, (c)))
|
|
|
|
/* the rest is stolen from EXPAT: */
|
|
|
|
/* A 2 byte UTF-8 representation splits the characters 11 bits
|
|
between the bottom 5 and 6 bits of the bytes.
|
|
We need 8 bits to index into pages, 3 bits to add to that index and
|
|
5 bits to generate the mask. */
|
|
#define UTF8_GET_NAMING2(pages, byte) \
|
|
(namingBitmap[((pages)[(((byte)[0]) >> 2) & 7] << 3) \
|
|
+ ((((byte)[0]) & 3) << 1) \
|
|
+ ((((byte)[1]) >> 5) & 1)] \
|
|
& (1 << (((byte)[1]) & 0x1F)))
|
|
|
|
/* A 3 byte UTF-8 representation splits the characters 16 bits
|
|
between the bottom 4, 6 and 6 bits of the bytes.
|
|
We need 8 bits to index into pages, 3 bits to add to that index and
|
|
5 bits to generate the mask. */
|
|
#define UTF8_GET_NAMING3(pages, byte) \
|
|
(namingBitmap[((pages)[((((byte)[0]) & 0xF) << 4) \
|
|
+ ((((byte)[1]) >> 2) & 0xF)] \
|
|
<< 3) \
|
|
+ ((((byte)[1]) & 3) << 1) \
|
|
+ ((((byte)[2]) >> 5) & 1)] \
|
|
& (1 << (((byte)[2]) & 0x1F)))
|
|
|
|
static const UINT32 namingBitmap[] = {
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
|
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
|
0x00000000, 0x04000000, 0x87FFFFFE, 0x07FFFFFE,
|
|
0x00000000, 0x00000000, 0xFF7FFFFF, 0xFF7FFFFF,
|
|
0xFFFFFFFF, 0x7FF3FFFF, 0xFFFFFDFE, 0x7FFFFFFF,
|
|
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE00F, 0xFC31FFFF,
|
|
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
|
|
0xFFFFFFFF, 0xF80001FF, 0x00000003, 0x00000000,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0xFFFFD740, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
|
|
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
|
|
0xFFFF0003, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
|
|
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
|
|
0x0000007F, 0x00000000, 0xFFFF0000, 0x000707FF,
|
|
0x00000000, 0x07FFFFFE, 0x000007FE, 0xFFFE0000,
|
|
0xFFFFFFFF, 0x7CFFFFFF, 0x002F7FFF, 0x00000060,
|
|
0xFFFFFFE0, 0x23FFFFFF, 0xFF000000, 0x00000003,
|
|
0xFFF99FE0, 0x03C5FDFF, 0xB0000000, 0x00030003,
|
|
0xFFF987E0, 0x036DFDFF, 0x5E000000, 0x001C0000,
|
|
0xFFFBAFE0, 0x23EDFDFF, 0x00000000, 0x00000001,
|
|
0xFFF99FE0, 0x23CDFDFF, 0xB0000000, 0x00000003,
|
|
0xD63DC7E0, 0x03BFC718, 0x00000000, 0x00000000,
|
|
0xFFFDDFE0, 0x03EFFDFF, 0x00000000, 0x00000003,
|
|
0xFFFDDFE0, 0x03EFFDFF, 0x40000000, 0x00000003,
|
|
0xFFFDDFE0, 0x03FFFDFF, 0x00000000, 0x00000003,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0xFFFFFFFE, 0x000D7FFF, 0x0000003F, 0x00000000,
|
|
0xFEF02596, 0x200D6CAE, 0x0000001F, 0x00000000,
|
|
0x00000000, 0x00000000, 0xFFFFFEFF, 0x000003FF,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0x00000000, 0xFFFFFFFF, 0xFFFF003F, 0x007FFFFF,
|
|
0x0007DAED, 0x50000000, 0x82315001, 0x002C62AB,
|
|
0x40000000, 0xF580C900, 0x00000007, 0x02010800,
|
|
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
|
0x0FFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x03FFFFFF,
|
|
0x3F3FFFFF, 0xFFFFFFFF, 0xAAFF3F3F, 0x3FFFFFFF,
|
|
0xFFFFFFFF, 0x5FDFFFFF, 0x0FCF1FDC, 0x1FDC1FFF,
|
|
0x00000000, 0x00004C40, 0x00000000, 0x00000000,
|
|
0x00000007, 0x00000000, 0x00000000, 0x00000000,
|
|
0x00000080, 0x000003FE, 0xFFFFFFFE, 0xFFFFFFFF,
|
|
0x001FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x07FFFFFF,
|
|
0xFFFFFFE0, 0x00001FFF, 0x00000000, 0x00000000,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
|
0xFFFFFFFF, 0x0000003F, 0x00000000, 0x00000000,
|
|
0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF,
|
|
0xFFFFFFFF, 0x0000000F, 0x00000000, 0x00000000,
|
|
0x00000000, 0x07FF6000, 0x87FFFFFE, 0x07FFFFFE,
|
|
0x00000000, 0x00800000, 0xFF7FFFFF, 0xFF7FFFFF,
|
|
0x00FFFFFF, 0x00000000, 0xFFFF0000, 0xFFFFFFFF,
|
|
0xFFFFFFFF, 0xF80001FF, 0x00030003, 0x00000000,
|
|
0xFFFFFFFF, 0xFFFFFFFF, 0x0000003F, 0x00000003,
|
|
0xFFFFD7C0, 0xFFFFFFFB, 0x547F7FFF, 0x000FFFFD,
|
|
0xFFFFDFFE, 0xFFFFFFFF, 0xDFFEFFFF, 0xFFFFFFFF,
|
|
0xFFFF007B, 0xFFFFFFFF, 0xFFFF199F, 0x033FCFFF,
|
|
0x00000000, 0xFFFE0000, 0x027FFFFF, 0xFFFFFFFE,
|
|
0xFFFE007F, 0xBBFFFFFB, 0xFFFF0016, 0x000707FF,
|
|
0x00000000, 0x07FFFFFE, 0x0007FFFF, 0xFFFF03FF,
|
|
0xFFFFFFFF, 0x7CFFFFFF, 0xFFEF7FFF, 0x03FF3DFF,
|
|
0xFFFFFFEE, 0xF3FFFFFF, 0xFF1E3FFF, 0x0000FFCF,
|
|
0xFFF99FEE, 0xD3C5FDFF, 0xB080399F, 0x0003FFCF,
|
|
0xFFF987E4, 0xD36DFDFF, 0x5E003987, 0x001FFFC0,
|
|
0xFFFBAFEE, 0xF3EDFDFF, 0x00003BBF, 0x0000FFC1,
|
|
0xFFF99FEE, 0xF3CDFDFF, 0xB0C0398F, 0x0000FFC3,
|
|
0xD63DC7EC, 0xC3BFC718, 0x00803DC7, 0x0000FF80,
|
|
0xFFFDDFEE, 0xC3EFFDFF, 0x00603DDF, 0x0000FFC3,
|
|
0xFFFDDFEC, 0xC3EFFDFF, 0x40603DDF, 0x0000FFC3,
|
|
0xFFFDDFEC, 0xC3FFFDFF, 0x00803DCF, 0x0000FFC3,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0xFFFFFFFE, 0x07FF7FFF, 0x03FF7FFF, 0x00000000,
|
|
0xFEF02596, 0x3BFF6CAE, 0x03FF3F5F, 0x00000000,
|
|
0x03000000, 0xC2A003FF, 0xFFFFFEFF, 0xFFFE03FF,
|
|
0xFEBF0FDF, 0x02FE3FFF, 0x00000000, 0x00000000,
|
|
0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
|
0x00000000, 0x00000000, 0x1FFF0000, 0x00000002,
|
|
0x000000A0, 0x003EFFFE, 0xFFFFFFFE, 0xFFFFFFFF,
|
|
0x661FFFFF, 0xFFFFFFFE, 0xFFFFFFFF, 0x77FFFFFF,
|
|
};
|
|
static const unsigned char nmstrtPages[] = {
|
|
0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x00,
|
|
0x00, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
|
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
|
|
0x00, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x15, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
};
|
|
static const unsigned char namePages[] = {
|
|
0x19, 0x03, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x00,
|
|
0x00, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25,
|
|
0x10, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x12, 0x13,
|
|
0x26, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x27, 0x16, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x17,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
|
|
0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x18,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
};
|
|
|
|
#endif /* NAMETAB__H */
|
|
|
|
|
|
|