44# ifndef IS_INVALID_CHAR
45# define IS_INVALID_CHAR(enc, ptr, n) (0)
48# define INVALID_LEAD_CASE(n, ptr, nextTokPtr) \
51 return XML_TOK_PARTIAL_CHAR; \
52 if (IS_INVALID_CHAR(enc, ptr, n)) { \
53 *(nextTokPtr) = (ptr); \
54 return XML_TOK_INVALID; \
59# define INVALID_CASES(ptr, nextTokPtr) \
60 INVALID_LEAD_CASE(2, ptr, nextTokPtr) \
61 INVALID_LEAD_CASE(3, ptr, nextTokPtr) \
62 INVALID_LEAD_CASE(4, ptr, nextTokPtr) \
66 *(nextTokPtr) = (ptr); \
67 return XML_TOK_INVALID;
69# define CHECK_NAME_CASE(n, enc, ptr, end, nextTokPtr) \
72 return XML_TOK_PARTIAL_CHAR; \
73 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NAME_CHAR(enc, ptr, n)) { \
75 return XML_TOK_INVALID; \
80# define CHECK_NAME_CASES(enc, ptr, end, nextTokPtr) \
82 if (! IS_NAME_CHAR_MINBPC(enc, ptr)) { \
84 return XML_TOK_INVALID; \
94 CHECK_NAME_CASE(2, enc, ptr, end, nextTokPtr) \
95 CHECK_NAME_CASE(3, enc, ptr, end, nextTokPtr) \
96 CHECK_NAME_CASE(4, enc, ptr, end, nextTokPtr)
98# define CHECK_NMSTRT_CASE(n, enc, ptr, end, nextTokPtr) \
100 if ((end) - (ptr) < (n)) \
101 return XML_TOK_PARTIAL_CHAR; \
102 if (IS_INVALID_CHAR(enc, ptr, n) || ! IS_NMSTRT_CHAR(enc, ptr, n)) { \
104 return XML_TOK_INVALID; \
109# define CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr) \
111 if (! IS_NMSTRT_CHAR_MINBPC(enc, ptr)) { \
113 return XML_TOK_INVALID; \
118 ptr += MINBPC(enc); \
120 CHECK_NMSTRT_CASE(2, enc, ptr, end, nextTokPtr) \
121 CHECK_NMSTRT_CASE(3, enc, ptr, end, nextTokPtr) \
122 CHECK_NMSTRT_CASE(4, enc, ptr, end, nextTokPtr)
125# define PREFIX(ident) ident
128# define HAS_CHARS(enc, ptr, end, count) \
129 ((end) - (ptr) >= ((count)*MINBPC(enc)))
131# define HAS_CHAR(enc, ptr, end) HAS_CHARS(enc, ptr, end, 1)
133# define REQUIRE_CHARS(enc, ptr, end, count) \
135 if (! HAS_CHARS(enc, ptr, end, count)) { \
136 return XML_TOK_PARTIAL; \
140# define REQUIRE_CHAR(enc, ptr, end) REQUIRE_CHARS(enc, ptr, end, 1)
145PREFIX(scanComment)(
const ENCODING *enc,
const char *ptr,
const char *end,
146 const char **nextTokPtr) {
147 if (HAS_CHAR(enc, ptr, end)) {
153 while (HAS_CHAR(enc, ptr, end)) {
155 INVALID_CASES(ptr, nextTokPtr)
158 REQUIRE_CHAR(enc, ptr, end);
161 REQUIRE_CHAR(enc, ptr, end);
166 *nextTokPtr = ptr +
MINBPC(enc);
182PREFIX(scanDecl)(
const ENCODING *enc,
const char *ptr,
const char *end,
183 const char **nextTokPtr) {
184 REQUIRE_CHAR(enc, ptr, end);
187 return PREFIX(scanComment)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
189 *nextTokPtr = ptr +
MINBPC(enc);
199 while (HAS_CHAR(enc, ptr, end)) {
202 REQUIRE_CHARS(enc, ptr, end, 2);
231PREFIX(checkPiTarget)(
const ENCODING *enc,
const char *ptr,
const char *end,
236 if (end - ptr !=
MINBPC(enc) * 3)
276PREFIX(scanPi)(
const ENCODING *enc,
const char *ptr,
const char *end,
277 const char **nextTokPtr) {
279 const char *target = ptr;
280 REQUIRE_CHAR(enc, ptr, end);
282 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
287 while (HAS_CHAR(enc, ptr, end)) {
289 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
293 if (!
PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
298 while (HAS_CHAR(enc, ptr, end)) {
300 INVALID_CASES(ptr, nextTokPtr)
303 REQUIRE_CHAR(enc, ptr, end);
305 *nextTokPtr = ptr +
MINBPC(enc);
316 if (!
PREFIX(checkPiTarget)(enc, target, ptr, &tok)) {
321 REQUIRE_CHAR(enc, ptr, end);
323 *nextTokPtr = ptr +
MINBPC(enc);
336PREFIX(scanCdataSection)(
const ENCODING *enc,
const char *ptr,
const char *end,
337 const char **nextTokPtr) {
338 static const char CDATA_LSQB[]
343 REQUIRE_CHARS(enc, ptr, end, 6);
344 for (i = 0; i < 6; i++, ptr +=
MINBPC(enc)) {
355PREFIX(cdataSectionTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
356 const char **nextTokPtr) {
360 size_t n = end - ptr;
361 if (n & (
MINBPC(enc) - 1)) {
371 REQUIRE_CHAR(enc, ptr, end);
375 REQUIRE_CHAR(enc, ptr, end);
380 *nextTokPtr = ptr +
MINBPC(enc);
384 REQUIRE_CHAR(enc, ptr, end);
390 *nextTokPtr = ptr +
MINBPC(enc);
392 INVALID_CASES(ptr, nextTokPtr)
397 while (HAS_CHAR(enc, ptr, end)) {
399# define LEAD_CASE(n) \
401 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
403 return XML_TOK_DATA_CHARS; \
431PREFIX(scanEndTag)(
const ENCODING *enc,
const char *ptr,
const char *end,
432 const char **nextTokPtr) {
433 REQUIRE_CHAR(enc, ptr, end);
435 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
440 while (HAS_CHAR(enc, ptr, end)) {
442 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
446 for (ptr +=
MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr +=
MINBPC(enc)) {
453 *nextTokPtr = ptr +
MINBPC(enc);
469 *nextTokPtr = ptr +
MINBPC(enc);
482PREFIX(scanHexCharRef)(
const ENCODING *enc,
const char *ptr,
const char *end,
483 const char **nextTokPtr) {
484 if (HAS_CHAR(enc, ptr, end)) {
493 for (ptr +=
MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr +=
MINBPC(enc)) {
499 *nextTokPtr = ptr +
MINBPC(enc);
513PREFIX(scanCharRef)(
const ENCODING *enc,
const char *ptr,
const char *end,
514 const char **nextTokPtr) {
515 if (HAS_CHAR(enc, ptr, end)) {
517 return PREFIX(scanHexCharRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
525 for (ptr +=
MINBPC(enc); HAS_CHAR(enc, ptr, end); ptr +=
MINBPC(enc)) {
530 *nextTokPtr = ptr +
MINBPC(enc);
544PREFIX(scanRef)(
const ENCODING *enc,
const char *ptr,
const char *end,
545 const char **nextTokPtr) {
546 REQUIRE_CHAR(enc, ptr, end);
548 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
550 return PREFIX(scanCharRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
555 while (HAS_CHAR(enc, ptr, end)) {
557 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
559 *nextTokPtr = ptr +
MINBPC(enc);
572PREFIX(scanAtts)(
const ENCODING *enc,
const char *ptr,
const char *end,
573 const char **nextTokPtr) {
577 while (HAS_CHAR(enc, ptr, end)) {
579 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
588 REQUIRE_CHAR(enc, ptr, end);
590 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
604 REQUIRE_CHAR(enc, ptr, end);
626 REQUIRE_CHAR(enc, ptr, end);
644 REQUIRE_CHAR(enc, ptr, end);
649 INVALID_CASES(ptr, nextTokPtr)
651 int tok =
PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, &ptr);
668 REQUIRE_CHAR(enc, ptr, end);
685 REQUIRE_CHAR(enc, ptr, end);
687 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
694 *nextTokPtr = ptr +
MINBPC(enc);
699 REQUIRE_CHAR(enc, ptr, end);
704 *nextTokPtr = ptr +
MINBPC(enc);
725PREFIX(scanLt)(
const ENCODING *enc,
const char *ptr,
const char *end,
726 const char **nextTokPtr) {
730 REQUIRE_CHAR(enc, ptr, end);
732 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
735 REQUIRE_CHAR(enc, ptr, end);
738 return PREFIX(scanComment)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
740 return PREFIX(scanCdataSection)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
745 return PREFIX(scanPi)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
747 return PREFIX(scanEndTag)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
756 while (HAS_CHAR(enc, ptr, end)) {
758 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
767 REQUIRE_CHAR(enc, ptr, end);
769 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
780 while (HAS_CHAR(enc, ptr, end)) {
782 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
796 return PREFIX(scanAtts)(enc, ptr, end, nextTokPtr);
802 *nextTokPtr = ptr +
MINBPC(enc);
807 REQUIRE_CHAR(enc, ptr, end);
812 *nextTokPtr = ptr +
MINBPC(enc);
823PREFIX(contentTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
824 const char **nextTokPtr) {
828 size_t n = end - ptr;
829 if (n & (
MINBPC(enc) - 1)) {
838 return PREFIX(scanLt)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
840 return PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
843 if (! HAS_CHAR(enc, ptr, end))
850 *nextTokPtr = ptr +
MINBPC(enc);
854 if (! HAS_CHAR(enc, ptr, end))
859 if (! HAS_CHAR(enc, ptr, end))
867 INVALID_CASES(ptr, nextTokPtr)
872 while (HAS_CHAR(enc, ptr, end)) {
874# define LEAD_CASE(n) \
876 if (end - ptr < n || IS_INVALID_CHAR(enc, ptr, n)) { \
878 return XML_TOK_DATA_CHARS; \
887 if (HAS_CHARS(enc, ptr, end, 2)) {
892 if (HAS_CHARS(enc, ptr, end, 3)) {
897 *nextTokPtr = ptr + 2 *
MINBPC(enc);
923PREFIX(scanPercent)(
const ENCODING *enc,
const char *ptr,
const char *end,
924 const char **nextTokPtr) {
925 REQUIRE_CHAR(enc, ptr, end);
927 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
938 while (HAS_CHAR(enc, ptr, end)) {
940 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
942 *nextTokPtr = ptr +
MINBPC(enc);
953PREFIX(scanPoundName)(
const ENCODING *enc,
const char *ptr,
const char *end,
954 const char **nextTokPtr) {
955 REQUIRE_CHAR(enc, ptr, end);
957 CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
962 while (HAS_CHAR(enc, ptr, end)) {
964 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
983PREFIX(scanLit)(
int open,
const ENCODING *enc,
const char *ptr,
const char *end,
984 const char **nextTokPtr) {
985 while (HAS_CHAR(enc, ptr, end)) {
988 INVALID_CASES(ptr, nextTokPtr)
994 if (! HAS_CHAR(enc, ptr, end))
1017PREFIX(prologTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
1018 const char **nextTokPtr) {
1023 size_t n = end - ptr;
1024 if (n & (
MINBPC(enc) - 1)) {
1038 REQUIRE_CHAR(enc, ptr, end);
1041 return PREFIX(scanDecl)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1043 return PREFIX(scanPi)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1050 *nextTokPtr = ptr -
MINBPC(enc);
1057 if (ptr +
MINBPC(enc) == end) {
1067 if (! HAS_CHAR(enc, ptr, end))
1075 if (ptr +
MINBPC(enc) != end)
1086 return PREFIX(scanPercent)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1088 *nextTokPtr = ptr +
MINBPC(enc);
1091 *nextTokPtr = ptr +
MINBPC(enc);
1095 if (! HAS_CHAR(enc, ptr, end))
1098 REQUIRE_CHARS(enc, ptr, end, 2);
1100 *nextTokPtr = ptr + 2 *
MINBPC(enc);
1107 *nextTokPtr = ptr +
MINBPC(enc);
1111 if (! HAS_CHAR(enc, ptr, end))
1115 *nextTokPtr = ptr +
MINBPC(enc);
1118 *nextTokPtr = ptr +
MINBPC(enc);
1121 *nextTokPtr = ptr +
MINBPC(enc);
1136 *nextTokPtr = ptr +
MINBPC(enc);
1139 *nextTokPtr = ptr +
MINBPC(enc);
1142 return PREFIX(scanPoundName)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1143# define LEAD_CASE(n) \
1145 if (end - ptr < n) \
1146 return XML_TOK_PARTIAL_CHAR; \
1147 if (IS_INVALID_CHAR(enc, ptr, n)) { \
1148 *nextTokPtr = ptr; \
1149 return XML_TOK_INVALID; \
1151 if (IS_NMSTRT_CHAR(enc, ptr, n)) { \
1153 tok = XML_TOK_NAME; \
1156 if (IS_NAME_CHAR(enc, ptr, n)) { \
1158 tok = XML_TOK_NMTOKEN; \
1161 *nextTokPtr = ptr; \
1162 return XML_TOK_INVALID;
1197 while (HAS_CHAR(enc, ptr, end)) {
1199 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1216 REQUIRE_CHAR(enc, ptr, end);
1219 CHECK_NAME_CASES(enc, ptr, end, nextTokPtr)
1236 *nextTokPtr = ptr +
MINBPC(enc);
1243 *nextTokPtr = ptr +
MINBPC(enc);
1250 *nextTokPtr = ptr +
MINBPC(enc);
1261PREFIX(attributeValueTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
1262 const char **nextTokPtr) {
1266 else if (! HAS_CHAR(enc, ptr, end)) {
1275 while (HAS_CHAR(enc, ptr, end)) {
1277# define LEAD_CASE(n) \
1287 return PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1296 *nextTokPtr = ptr +
MINBPC(enc);
1304 if (! HAS_CHAR(enc, ptr, end))
1315 *nextTokPtr = ptr +
MINBPC(enc);
1330PREFIX(entityValueTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
1331 const char **nextTokPtr) {
1335 else if (! HAS_CHAR(enc, ptr, end)) {
1344 while (HAS_CHAR(enc, ptr, end)) {
1346# define LEAD_CASE(n) \
1356 return PREFIX(scanRef)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1361 int tok =
PREFIX(scanPercent)(enc, ptr +
MINBPC(enc), end, nextTokPtr);
1368 *nextTokPtr = ptr +
MINBPC(enc);
1376 if (! HAS_CHAR(enc, ptr, end))
1397PREFIX(ignoreSectionTok)(
const ENCODING *enc,
const char *ptr,
const char *end,
1398 const char **nextTokPtr) {
1401 size_t n = end - ptr;
1402 if (n & (
MINBPC(enc) - 1)) {
1407 while (HAS_CHAR(enc, ptr, end)) {
1409 INVALID_CASES(ptr, nextTokPtr)
1412 REQUIRE_CHAR(enc, ptr, end);
1415 REQUIRE_CHAR(enc, ptr, end);
1424 REQUIRE_CHAR(enc, ptr, end);
1427 REQUIRE_CHAR(enc, ptr, end);
1432 return XML_TOK_IGNORE_SECT;
1449PREFIX(isPublicId)(
const ENCODING *enc,
const char *ptr,
const char *end,
1450 const char **badPtr) {
1453 for (; HAS_CHAR(enc, ptr, end); ptr +=
MINBPC(enc)) {
1509PREFIX(getAtts)(
const ENCODING *enc,
const char *ptr,
int attsMax,
1511 enum { other, inName, inValue } state = inName;
1518# define START_NAME \
1519 if (state == other) { \
1520 if (nAtts < attsMax) { \
1521 atts[nAtts].name = ptr; \
1522 atts[nAtts].normalized = 1; \
1526# define LEAD_CASE(n) \
1528 START_NAME ptr += (n - MINBPC(enc)); \
1541 if (state != inValue) {
1542 if (nAtts < attsMax)
1543 atts[nAtts].valuePtr = ptr +
MINBPC(enc);
1548 if (nAtts < attsMax)
1549 atts[nAtts].valueEnd = ptr;
1554 if (state != inValue) {
1555 if (nAtts < attsMax)
1556 atts[nAtts].valuePtr = ptr +
MINBPC(enc);
1561 if (nAtts < attsMax)
1562 atts[nAtts].valueEnd = ptr;
1567 if (nAtts < attsMax)
1568 atts[nAtts].normalized = 0;
1571 if (state == inName)
1573 else if (state == inValue && nAtts < attsMax && atts[nAtts].normalized
1574 && (ptr == atts[nAtts].valuePtr
1578 atts[nAtts].normalized = 0;
1584 if (state == inName)
1586 else if (state == inValue && nAtts < attsMax)
1587 atts[nAtts].normalized = 0;
1591 if (state != inValue)
1644 if (result >= 0x110000)
1652 if (result >= 0x110000)
1656 return checkCharRefNumber(result);
1660PREFIX(predefinedEntityName)(
const ENCODING *enc,
const char *ptr,
1663 switch ((end - ptr) /
MINBPC(enc)) {
1715 const char *end1,
const char *ptr2) {
1717 for (; *ptr2; ptr1 +=
MINBPC(enc), ptr2++) {
1718 if (end1 - ptr1 <
MINBPC(enc)) {
1729 return ptr1 == end1;
1734 const char *start = ptr;
1737# define LEAD_CASE(n) \
1757 return (
int)(ptr - start);
1778PREFIX(updatePosition)(
const ENCODING *enc,
const char *ptr,
const char *end,
1780 while (HAS_CHAR(enc, ptr, end)) {
1782# define LEAD_CASE(n) \
1785 pos->columnNumber++; \
1792 pos->columnNumber = 0;
1801 pos->columnNumber = 0;
1805 pos->columnNumber++;
1812# undef MULTIBYTE_CASES
1813# undef INVALID_CASES
1814# undef CHECK_NAME_CASE
1815# undef CHECK_NAME_CASES
1816# undef CHECK_NMSTRT_CASE
1817# undef CHECK_NMSTRT_CASES
#define CHAR_MATCHES(enc, p, c)
#define IS_NAME_CHAR_MINBPC(enc, p)
#define BYTE_TYPE(enc, p)
#define BYTE_TO_ASCII(enc, p)
#define IS_NMSTRT_CHAR_MINBPC(enc, p)
#define XML_TOK_CLOSE_PAREN_ASTERISK
#define XML_TOK_DECL_OPEN
#define XML_TOK_PREFIXED_NAME
#define XML_TOK_OPEN_PAREN
#define XML_TOK_OPEN_BRACKET
#define XML_TOK_CLOSE_PAREN_PLUS
#define XML_TOK_EMPTY_ELEMENT_NO_ATTS
#define XML_TOK_ENTITY_REF
#define XML_TOK_CLOSE_PAREN
#define XML_TOK_NAME_ASTERISK
#define XML_TOK_DATA_NEWLINE
#define XML_TOK_DECL_CLOSE
#define XML_TOK_NAME_PLUS
#define XML_TOK_NAME_QUESTION
#define XML_TOK_COND_SECT_CLOSE
#define XML_TOK_START_TAG_WITH_ATTS
#define XML_TOK_CDATA_SECT_CLOSE
#define XML_TOK_DATA_CHARS
#define XML_TOK_COND_SECT_OPEN
#define XML_TOK_POUND_NAME
#define XML_TOK_PARAM_ENTITY_REF
#define XML_TOK_CLOSE_BRACKET
#define XML_TOK_EMPTY_ELEMENT_WITH_ATTS
#define XML_TOK_TRAILING_RSQB
#define XML_TOK_ATTRIBUTE_VALUE_S
#define XML_TOK_START_TAG_NO_ATTS
#define XML_TOK_TRAILING_CR
#define XML_TOK_CDATA_SECT_OPEN
#define XML_TOK_INSTANCE_START
#define XML_TOK_CLOSE_PAREN_QUESTION