rtgui_xml.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. #include <rtgui/rtgui_xml.h>
  2. #include <rtgui/rtgui_system.h>
  3. /* Internal states that the parser can be in at any given time. */
  4. enum
  5. {
  6. STAT_START = 0, /* starting base state, default state */
  7. STAT_TEXT, /* text state */
  8. STAT_START_TAG, /* start tag state */
  9. STAT_START_TAGNAME, /* start tagname state */
  10. STAT_START_TAGNAME_END, /* start tagname ending state */
  11. STAT_END_TAG, /* end tag state */
  12. STAT_END_TAGNAME, /* end tag tagname state */
  13. STAT_END_TAGNAME_END, /* end tag tagname ending */
  14. STAT_EMPTY_TAG, /* empty tag state */
  15. STAT_SPACE, /* linear whitespace state */
  16. STAT_ATTR_NAME, /* attribute name state */
  17. STAT_ATTR_NAME_END, /* attribute name ending state */
  18. STAT_ATTR_VAL, /* attribute value starting state */
  19. STAT_ATTR_VAL2, /* attribute value state */
  20. STAT_ERROR /* error state */
  21. };
  22. /* character classes that we will match against; This could be expanded if
  23. need be, however, we are aiming for simple. */
  24. enum
  25. {
  26. CLASS_TYPE_NONE = 0, /* matches nothing, a base state */
  27. CLASS_TYPE_LEFT_ANGLE, /* matches start tag '<' */
  28. CLASS_TYPE_SLASH, /* matches forward slash */
  29. CLASS_TYPE_RIGHT_ANGLE, /* matches end tag '>' */
  30. CLASS_TYPE_EQUALS, /* matches equals sign */
  31. CLASS_TYPE_QUOTE, /* matches double-quotes */
  32. CLASS_TYPE_LETTERS, /* matches a-zA-Z letters and digits 0-9 */
  33. CLASS_TYPE_SPACE, /* matches whitespace */
  34. CLASS_TYPE_ANY /* matches any ASCII character; will match all
  35. above classes */
  36. };
  37. /* xml state transition table */
  38. struct rtgui_xml_state
  39. {
  40. rt_uint8_t state;
  41. rt_uint8_t class_type;
  42. rt_uint8_t next_state;
  43. rt_uint8_t event;
  44. };
  45. /* Note: States must be grouped in match order AND grouped together! */
  46. static const struct rtgui_xml_state RTGUI_XML_STATES [] =
  47. {
  48. /* [0-2] starting state, which also serves as the default state in case
  49. of error */
  50. { STAT_START, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  51. { STAT_START, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_NONE },
  52. { STAT_START, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
  53. /* [3-5] space state handles linear white space */
  54. { STAT_SPACE, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  55. { STAT_SPACE, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
  56. { STAT_SPACE, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
  57. /* [6-8] handle start tag */
  58. { STAT_START_TAG, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_COPY },
  59. { STAT_START_TAG, CLASS_TYPE_SLASH, STAT_END_TAG, EVENT_COPY },
  60. /* below added since some individuals get a little carried away with
  61. spacing around tag names, e.g. < tag > */
  62. { STAT_START_TAG, CLASS_TYPE_SPACE, STAT_START_TAG, EVENT_NONE },
  63. /* [9-12] handle start tag name */
  64. { STAT_START_TAGNAME, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_NONE },
  65. { STAT_START_TAGNAME, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_START },
  66. /* below added for tags without any space between tag and ending
  67. slash, e.g., <br/> */
  68. { STAT_START_TAGNAME, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_END },
  69. { STAT_START_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
  70. /* [13-16] handle start tag name end */
  71. { STAT_START_TAGNAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  72. /* below added to handle additional space in between attribute value
  73. pairs in start tags, e.g., <tag attr="2" attr2="test" > */
  74. { STAT_START_TAGNAME_END, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_NONE },
  75. { STAT_START_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
  76. /* below supports tags that are self-closing, e.g., <br /> */
  77. { STAT_START_TAGNAME_END, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_COPY },
  78. /* [17] handle empty tags, e.g., <br /> */
  79. { STAT_EMPTY_TAG, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
  80. /* [18] handle end tag, e.g., <tag /> */
  81. { STAT_END_TAG, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
  82. /* [19-21] handle end tag name */
  83. { STAT_END_TAGNAME, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
  84. { STAT_END_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
  85. /* below adds support for spaces at the end of an end tag (before
  86. closing bracket) */
  87. { STAT_END_TAGNAME, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_END },
  88. /* [22] handle ending of end tag name */
  89. { STAT_END_TAGNAME_END, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_NONE },
  90. { STAT_END_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_NONE },
  91. /* [23-25] handle text */
  92. { STAT_TEXT, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  93. { STAT_TEXT, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
  94. { STAT_TEXT, CLASS_TYPE_ANY, STAT_TEXT, EVENT_NONE },
  95. /* [26-30] handle attribute names */
  96. { STAT_ATTR_NAME, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  97. /* below add support for space before the equals sign, e.g, <tag
  98. attr ="2"> */
  99. { STAT_ATTR_NAME, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NAME },
  100. { STAT_ATTR_NAME, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NAME },
  101. /* [31-33] attribute name end */
  102. { STAT_ATTR_NAME_END, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NONE },
  103. { STAT_ATTR_NAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  104. { STAT_ATTR_NAME_END, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NONE },
  105. /* [34-35] handle attribute values, initial quote and spaces */
  106. { STAT_ATTR_VAL, CLASS_TYPE_QUOTE, STAT_ATTR_VAL2, EVENT_NONE },
  107. /* below handles initial spaces before quoted attribute value */
  108. { STAT_ATTR_VAL, CLASS_TYPE_SPACE, STAT_ATTR_VAL, EVENT_NONE },
  109. /* [36-37] handle actual attribute values */
  110. { STAT_ATTR_VAL2, CLASS_TYPE_QUOTE, STAT_START_TAGNAME_END, EVENT_VAL },
  111. { STAT_ATTR_VAL2, CLASS_TYPE_LETTERS, STAT_ATTR_VAL2, EVENT_COPY },
  112. { STAT_ATTR_VAL2, CLASS_TYPE_SLASH, STAT_ATTR_VAL2, EVENT_NONE },
  113. /* End of table marker */
  114. { STAT_ERROR, CLASS_TYPE_NONE, STAT_ERROR, EVENT_NONE }
  115. };
  116. struct rtgui_xml
  117. {
  118. /* event handler */
  119. rtgui_xml_event_handler_t event_handler;
  120. void *user;
  121. char *buffer; /* xml buffer */
  122. rt_size_t buffer_size; /* buffer size */
  123. rt_size_t position; /* current position in buffer */
  124. rt_uint16_t state, event; /* current state and event */
  125. rt_bool_t copy; /* copy text into tmp buffer */
  126. rt_bool_t halt; /* halt parsing of document */
  127. };
  128. rtgui_xml_t *rtgui_xml_create(rt_size_t buffer_size, rtgui_xml_event_handler_t handler,
  129. void *user)
  130. {
  131. rtgui_xml_t *xml = (rtgui_xml_t *) rtgui_malloc(sizeof(struct rtgui_xml));
  132. rt_memset(xml, 0, sizeof(rtgui_xml_t));
  133. xml->event_handler = handler;
  134. xml->user = user;
  135. /* create buffer */
  136. xml->buffer_size = buffer_size;
  137. xml->buffer = (char *)rtgui_malloc(xml->buffer_size);
  138. return xml;
  139. }
  140. void rtgui_xml_destroy(rtgui_xml_t *xml)
  141. {
  142. if (xml)
  143. {
  144. rtgui_free(xml->buffer);
  145. rtgui_free(xml);
  146. }
  147. }
  148. const char *rtgui_xml_event_str(rt_uint8_t event)
  149. {
  150. switch (event)
  151. {
  152. case EVENT_START:
  153. return "start tag";
  154. case EVENT_END:
  155. return "end tag";
  156. case EVENT_TEXT:
  157. return "text";
  158. case EVENT_NAME:
  159. return "attr name";
  160. case EVENT_VAL:
  161. return "attr val";
  162. case EVENT_END_DOC:
  163. return "end document";
  164. default:
  165. break;
  166. }
  167. return "err";
  168. }
  169. int rtgui_xml_parse(rtgui_xml_t *xml, const char *buf, rt_size_t len)
  170. {
  171. int i, j, c, match;
  172. #define is_space(ch) \
  173. ((rt_uint32_t)(ch - 9) < 5u || ch == ' ')
  174. #define is_alpha(ch) \
  175. ((rt_uint32_t)((ch | 0x20) - 'a') < 26u)
  176. #define is_digit(ch) \
  177. ((rt_uint32_t)(ch - '0') < 10u)
  178. #define is_letters(ch) \
  179. (is_alpha(ch) || is_digit(ch) || (ch == '.'))
  180. for (i = 0; i < len; i++)
  181. {
  182. if (xml->halt) break;
  183. c = buf[i] & 0xff;
  184. /* search in state table */
  185. for (j = 0, match = 0; RTGUI_XML_STATES[j].state != STAT_ERROR; j++)
  186. {
  187. if (RTGUI_XML_STATES[j].state != xml->state)
  188. continue;
  189. switch (RTGUI_XML_STATES[j].class_type)
  190. {
  191. case CLASS_TYPE_LETTERS:
  192. match = is_letters(c);
  193. break;
  194. case CLASS_TYPE_LEFT_ANGLE:
  195. match = (c == '<');
  196. break;
  197. case CLASS_TYPE_SLASH:
  198. match = (c == '/');
  199. break;
  200. case CLASS_TYPE_RIGHT_ANGLE:
  201. match = (c == '>');
  202. break;
  203. case CLASS_TYPE_EQUALS:
  204. match = (c == '=');
  205. break;
  206. case CLASS_TYPE_QUOTE:
  207. match = (c == '"');
  208. break;
  209. case CLASS_TYPE_SPACE:
  210. match = is_space(c);
  211. break;
  212. case CLASS_TYPE_ANY:
  213. match = 1;
  214. break;
  215. default:
  216. break;
  217. }
  218. /* we matched a character class */
  219. if (match)
  220. {
  221. if (RTGUI_XML_STATES[j].event == EVENT_COPY)
  222. {
  223. xml->copy = RT_TRUE;
  224. }
  225. else if (RTGUI_XML_STATES[j].event != EVENT_NONE)
  226. {
  227. if (xml->copy == RT_TRUE)
  228. {
  229. /* basically we are guaranteed never to have an event of
  230. type EVENT_COPY or EVENT_NONE here. */
  231. xml->event = RTGUI_XML_STATES[j].event;
  232. xml->buffer[xml->position] = 0; /* make a string */
  233. if (!xml->event_handler(RTGUI_XML_STATES[j].event,
  234. xml->buffer, xml->position ,
  235. xml->user))
  236. {
  237. xml->halt = 1; /* stop parsing from here out */
  238. }
  239. xml->position = 0;
  240. xml->copy = RT_FALSE;
  241. }
  242. }
  243. if (xml->copy == RT_TRUE)
  244. {
  245. /* check to see if we have room; one less for trailing
  246. nul */
  247. if (xml->position < xml->buffer_size - 1)
  248. {
  249. xml->buffer[xml->position] = buf[i];
  250. xml->position++;
  251. }
  252. }
  253. xml->state = RTGUI_XML_STATES[j].next_state; /* change state */
  254. break; /* break out of loop though state search */
  255. }
  256. }
  257. }
  258. return !xml->halt;
  259. }