rtgui_xml.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. #include <rtgui/rtgui_xml.h>
  2. /* Internal states that the parser can be in at any given time. */
  3. enum {
  4. STAT_START = 0, /* starting base state, default state */
  5. STAT_TEXT, /* text state */
  6. STAT_START_TAG, /* start tag state */
  7. STAT_START_TAGNAME, /* start tagname state */
  8. STAT_START_TAGNAME_END, /* start tagname ending state */
  9. STAT_END_TAG, /* end tag state */
  10. STAT_END_TAGNAME, /* end tag tagname state */
  11. STAT_END_TAGNAME_END, /* end tag tagname ending */
  12. STAT_EMPTY_TAG, /* empty tag state */
  13. STAT_SPACE, /* linear whitespace state */
  14. STAT_ATTR_NAME, /* attribute name state */
  15. STAT_ATTR_NAME_END, /* attribute name ending state */
  16. STAT_ATTR_VAL, /* attribute value starting state */
  17. STAT_ATTR_VAL2, /* attribute value state */
  18. STAT_ERROR /* error state */
  19. };
  20. /* character classes that we will match against; This could be expanded if
  21. need be, however, we are aiming for simple. */
  22. enum {
  23. CLASS_TYPE_NONE = 0, /* matches nothing, a base state */
  24. CLASS_TYPE_LEFT_ANGLE, /* matches start tag '<' */
  25. CLASS_TYPE_SLASH, /* matches forward slash */
  26. CLASS_TYPE_RIGHT_ANGLE, /* matches end tag '>' */
  27. CLASS_TYPE_EQUALS, /* matches equals sign */
  28. CLASS_TYPE_QUOTE, /* matches double-quotes */
  29. CLASS_TYPE_LETTERS, /* matches a-zA-Z letters and digits 0-9 */
  30. CLASS_TYPE_SPACE, /* matches whitespace */
  31. CLASS_TYPE_ANY /* matches any ASCII character; will match all
  32. above classes */
  33. };
  34. /* xml state transition table */
  35. struct rtgui_xml_state
  36. {
  37. rt_uint8_t state;
  38. rt_uint8_t class_type;
  39. rt_uint8_t next_state;
  40. rt_uint8_t event;
  41. };
  42. /* Note: States must be grouped in match order AND grouped together! */
  43. static const struct rtgui_xml_state RTGUI_XML_STATES [] = {
  44. /* [0-2] starting state, which also serves as the default state in case
  45. of error */
  46. { STAT_START, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  47. { STAT_START, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_NONE },
  48. { STAT_START, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
  49. /* [3-5] space state handles linear white space */
  50. { STAT_SPACE, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  51. { STAT_SPACE, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
  52. { STAT_SPACE, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
  53. /* [6-8] handle start tag */
  54. { STAT_START_TAG, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_COPY },
  55. { STAT_START_TAG, CLASS_TYPE_SLASH, STAT_END_TAG, EVENT_COPY },
  56. /* below added since some individuals get a little carried away with
  57. spacing around tag names, e.g. < tag > */
  58. { STAT_START_TAG, CLASS_TYPE_SPACE, STAT_START_TAG, EVENT_NONE },
  59. /* [9-12] handle start tag name */
  60. { STAT_START_TAGNAME, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_NONE },
  61. { STAT_START_TAGNAME, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_START },
  62. /* below added for tags without any space between tag and ending
  63. slash, e.g., <br/> */
  64. { STAT_START_TAGNAME, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_END },
  65. { STAT_START_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
  66. /* [13-16] handle start tag name end */
  67. { STAT_START_TAGNAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  68. /* below added to handle additional space in between attribute value
  69. pairs in start tags, e.g., <tag attr="2" attr2="test" > */
  70. { STAT_START_TAGNAME_END, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_NONE },
  71. { STAT_START_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
  72. /* below supports tags that are self-closing, e.g., <br /> */
  73. { STAT_START_TAGNAME_END, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_COPY },
  74. /* [17] handle empty tags, e.g., <br /> */
  75. { STAT_EMPTY_TAG, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
  76. /* [18] handle end tag, e.g., <tag /> */
  77. { STAT_END_TAG, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
  78. /* [19-21] handle end tag name */
  79. { STAT_END_TAGNAME, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
  80. { STAT_END_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
  81. /* below adds support for spaces at the end of an end tag (before
  82. closing bracket) */
  83. { STAT_END_TAGNAME, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_END },
  84. /* [22] handle ending of end tag name */
  85. { STAT_END_TAGNAME_END, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_NONE },
  86. { STAT_END_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE,STAT_START, EVENT_NONE },
  87. /* [23-25] handle text */
  88. { STAT_TEXT, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  89. { STAT_TEXT, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
  90. { STAT_TEXT, CLASS_TYPE_ANY, STAT_TEXT, EVENT_NONE },
  91. /* [26-30] handle attribute names */
  92. { STAT_ATTR_NAME, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  93. /* below add support for space before the equals sign, e.g, <tag
  94. attr ="2"> */
  95. { STAT_ATTR_NAME, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NAME },
  96. { STAT_ATTR_NAME, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NAME },
  97. /* [31-33] attribute name end */
  98. { STAT_ATTR_NAME_END, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NONE },
  99. { STAT_ATTR_NAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  100. { STAT_ATTR_NAME_END, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NONE },
  101. /* [34-35] handle attribute values, initial quote and spaces */
  102. { STAT_ATTR_VAL, CLASS_TYPE_QUOTE, STAT_ATTR_VAL2, EVENT_NONE },
  103. /* below handles initial spaces before quoted attribute value */
  104. { STAT_ATTR_VAL, CLASS_TYPE_SPACE, STAT_ATTR_VAL, EVENT_NONE },
  105. /* [36-37] handle actual attribute values */
  106. { STAT_ATTR_VAL2, CLASS_TYPE_QUOTE, STAT_START_TAGNAME_END, EVENT_VAL },
  107. { STAT_ATTR_VAL2, CLASS_TYPE_LETTERS, STAT_ATTR_VAL2, EVENT_COPY },
  108. { STAT_ATTR_VAL2, CLASS_TYPE_SLASH, STAT_ATTR_VAL2, EVENT_NONE },
  109. /* End of table marker */
  110. { STAT_ERROR, CLASS_TYPE_NONE, STAT_ERROR, EVENT_NONE }
  111. };
  112. struct rtgui_xml
  113. {
  114. /* event handler */
  115. rtgui_xml_event_handler_t event_handler;
  116. void* user;
  117. char* buffer; /* xml buffer */
  118. rt_size_t buffer_size; /* buffer size */
  119. rt_size_t position; /* current position in buffer */
  120. rt_uint16_t state, event; /* current state and event */
  121. rt_bool_t copy; /* copy text into tmp buffer */
  122. rt_bool_t halt; /* halt parsing of document */
  123. };
  124. rtgui_xml_t* rtgui_xml_create(rt_size_t buffer_size, rtgui_xml_event_handler_t handler,
  125. void* user)
  126. {
  127. rtgui_xml_t* xml = (rtgui_xml_t*) rtgui_malloc(sizeof(struct rtgui_xml));
  128. rt_memset(xml, 0, sizeof(rtgui_xml_t));
  129. xml->event_handler = handler;
  130. xml->user = user;
  131. /* create buffer */
  132. xml->buffer_size = buffer_size;
  133. xml->buffer = (char*)rtgui_malloc(xml->buffer_size);
  134. return xml;
  135. }
  136. void rtgui_xml_destroy(rtgui_xml_t* xml)
  137. {
  138. if(xml)
  139. {
  140. rtgui_free(xml->buffer);
  141. rtgui_free(xml);
  142. }
  143. }
  144. const char* rtgui_xml_event_str(rt_uint8_t event)
  145. {
  146. switch(event)
  147. {
  148. case EVENT_START:
  149. return "start tag";
  150. break;
  151. case EVENT_END:
  152. return "end tag";
  153. break;
  154. case EVENT_TEXT:
  155. return "text";
  156. break;
  157. case EVENT_NAME:
  158. return "attr name";
  159. break;
  160. case EVENT_VAL:
  161. return "attr val";
  162. break;
  163. case EVENT_END_DOC:
  164. return "end document";
  165. break;
  166. default:
  167. break;
  168. }
  169. return "err";
  170. }
  171. int rtgui_xml_parse(rtgui_xml_t* xml, const char* buf, rt_size_t len)
  172. {
  173. int i, j, c, match;
  174. #define is_space(ch) \
  175. ((rt_uint32_t)(ch - 9) < 5u || ch == ' ')
  176. #define is_alpha(ch) \
  177. ((rt_uint32_t)((ch | 0x20) - 'a') < 26u)
  178. #define is_digit(ch) \
  179. ((rt_uint32_t)(ch - '0') < 10u)
  180. #define is_letters(ch) \
  181. (is_alpha(ch) || is_digit(ch) || (ch == '.'))
  182. for(i=0; i<len; i++)
  183. {
  184. if(xml->halt) break;
  185. c = buf[i] & 0xff;
  186. /* search in state table */
  187. for(j=0, match = 0; RTGUI_XML_STATES[j].state != STAT_ERROR; j++)
  188. {
  189. if(RTGUI_XML_STATES[j].state != xml->state)
  190. continue;
  191. switch(RTGUI_XML_STATES[j].class_type)
  192. {
  193. case CLASS_TYPE_LETTERS:
  194. match = is_letters(c);
  195. break;
  196. case CLASS_TYPE_LEFT_ANGLE:
  197. match = (c == '<');
  198. break;
  199. case CLASS_TYPE_SLASH:
  200. match = (c == '/');
  201. break;
  202. case CLASS_TYPE_RIGHT_ANGLE:
  203. match = (c == '>');
  204. break;
  205. case CLASS_TYPE_EQUALS:
  206. match = (c == '=');
  207. break;
  208. case CLASS_TYPE_QUOTE:
  209. match = (c == '"');
  210. break;
  211. case CLASS_TYPE_SPACE:
  212. match = is_space(c);
  213. break;
  214. case CLASS_TYPE_ANY:
  215. match = 1;
  216. break;
  217. default:
  218. break;
  219. }
  220. /* we matched a character class */
  221. if(match)
  222. {
  223. if(RTGUI_XML_STATES[j].event == EVENT_COPY)
  224. {
  225. xml->copy = RT_TRUE;
  226. }
  227. else if(RTGUI_XML_STATES[j].event != EVENT_NONE)
  228. {
  229. if(xml->copy == RT_TRUE)
  230. {
  231. /* basically we are guaranteed never to have an event of
  232. type EVENT_COPY or EVENT_NONE here. */
  233. xml->event = RTGUI_XML_STATES[j].event;
  234. xml->buffer[xml->position] = 0; /* make a string */
  235. if(!xml->event_handler(RTGUI_XML_STATES[j].event,
  236. xml->buffer, xml->position ,
  237. xml->user))
  238. {
  239. xml->halt = 1; /* stop parsing from here out */
  240. }
  241. xml->position = 0;
  242. xml->copy = RT_FALSE;
  243. }
  244. }
  245. if(xml->copy == RT_TRUE)
  246. {
  247. /* check to see if we have room; one less for trailing
  248. nul */
  249. if(xml->position < xml->buffer_size-1)
  250. {
  251. xml->buffer[xml->position] = buf[i];
  252. xml->position++;
  253. }
  254. }
  255. xml->state = RTGUI_XML_STATES[j].next_state; /* change state */
  256. break; /* break out of loop though state search */
  257. }
  258. }
  259. }
  260. return !xml->halt;
  261. }