rtgui_xml.c 10.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. #include <rtgui/rtgui_xml.h>
  2. #include <rtgui/rtgui_system.h>
  3. /* Internal states that the parser can be in at any given time. */
  4. enum {
  5. STAT_START = 0, /* starting base state, default state */
  6. STAT_TEXT, /* text state */
  7. STAT_START_TAG, /* start tag state */
  8. STAT_START_TAGNAME, /* start tagname state */
  9. STAT_START_TAGNAME_END, /* start tagname ending state */
  10. STAT_END_TAG, /* end tag state */
  11. STAT_END_TAGNAME, /* end tag tagname state */
  12. STAT_END_TAGNAME_END, /* end tag tagname ending */
  13. STAT_EMPTY_TAG, /* empty tag state */
  14. STAT_SPACE, /* linear whitespace state */
  15. STAT_ATTR_NAME, /* attribute name state */
  16. STAT_ATTR_NAME_END, /* attribute name ending state */
  17. STAT_ATTR_VAL, /* attribute value starting state */
  18. STAT_ATTR_VAL2, /* attribute value state */
  19. STAT_ERROR /* error state */
  20. };
  21. /* character classes that we will match against; This could be expanded if
  22. need be, however, we are aiming for simple. */
  23. enum {
  24. CLASS_TYPE_NONE = 0, /* matches nothing, a base state */
  25. CLASS_TYPE_LEFT_ANGLE, /* matches start tag '<' */
  26. CLASS_TYPE_SLASH, /* matches forward slash */
  27. CLASS_TYPE_RIGHT_ANGLE, /* matches end tag '>' */
  28. CLASS_TYPE_EQUALS, /* matches equals sign */
  29. CLASS_TYPE_QUOTE, /* matches double-quotes */
  30. CLASS_TYPE_LETTERS, /* matches a-zA-Z letters and digits 0-9 */
  31. CLASS_TYPE_SPACE, /* matches whitespace */
  32. CLASS_TYPE_ANY /* matches any ASCII character; will match all
  33. above classes */
  34. };
  35. /* xml state transition table */
  36. struct rtgui_xml_state
  37. {
  38. rt_uint8_t state;
  39. rt_uint8_t class_type;
  40. rt_uint8_t next_state;
  41. rt_uint8_t event;
  42. };
  43. /* Note: States must be grouped in match order AND grouped together! */
  44. static const struct rtgui_xml_state RTGUI_XML_STATES [] = {
  45. /* [0-2] starting state, which also serves as the default state in case
  46. of error */
  47. { STAT_START, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  48. { STAT_START, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_NONE },
  49. { STAT_START, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
  50. /* [3-5] space state handles linear white space */
  51. { STAT_SPACE, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  52. { STAT_SPACE, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
  53. { STAT_SPACE, CLASS_TYPE_ANY, STAT_TEXT, EVENT_COPY },
  54. /* [6-8] handle start tag */
  55. { STAT_START_TAG, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_COPY },
  56. { STAT_START_TAG, CLASS_TYPE_SLASH, STAT_END_TAG, EVENT_COPY },
  57. /* below added since some individuals get a little carried away with
  58. spacing around tag names, e.g. < tag > */
  59. { STAT_START_TAG, CLASS_TYPE_SPACE, STAT_START_TAG, EVENT_NONE },
  60. /* [9-12] handle start tag name */
  61. { STAT_START_TAGNAME, CLASS_TYPE_LETTERS, STAT_START_TAGNAME, EVENT_NONE },
  62. { STAT_START_TAGNAME, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_START },
  63. /* below added for tags without any space between tag and ending
  64. slash, e.g., <br/> */
  65. { STAT_START_TAGNAME, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_END },
  66. { STAT_START_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
  67. /* [13-16] handle start tag name end */
  68. { STAT_START_TAGNAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  69. /* below added to handle additional space in between attribute value
  70. pairs in start tags, e.g., <tag attr="2" attr2="test" > */
  71. { STAT_START_TAGNAME_END, CLASS_TYPE_SPACE, STAT_START_TAGNAME_END, EVENT_NONE },
  72. { STAT_START_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_START },
  73. /* below supports tags that are self-closing, e.g., <br /> */
  74. { STAT_START_TAGNAME_END, CLASS_TYPE_SLASH, STAT_EMPTY_TAG, EVENT_COPY },
  75. /* [17] handle empty tags, e.g., <br /> */
  76. { STAT_EMPTY_TAG, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
  77. /* [18] handle end tag, e.g., <tag /> */
  78. { STAT_END_TAG, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
  79. /* [19-21] handle end tag name */
  80. { STAT_END_TAGNAME, CLASS_TYPE_LETTERS, STAT_END_TAGNAME, EVENT_NONE },
  81. { STAT_END_TAGNAME, CLASS_TYPE_RIGHT_ANGLE, STAT_START, EVENT_END },
  82. /* below adds support for spaces at the end of an end tag (before
  83. closing bracket) */
  84. { STAT_END_TAGNAME, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_END },
  85. /* [22] handle ending of end tag name */
  86. { STAT_END_TAGNAME_END, CLASS_TYPE_SPACE, STAT_END_TAGNAME_END, EVENT_NONE },
  87. { STAT_END_TAGNAME_END, CLASS_TYPE_RIGHT_ANGLE,STAT_START, EVENT_NONE },
  88. /* [23-25] handle text */
  89. { STAT_TEXT, CLASS_TYPE_SPACE, STAT_SPACE, EVENT_NONE },
  90. { STAT_TEXT, CLASS_TYPE_LEFT_ANGLE, STAT_START_TAG, EVENT_TEXT },
  91. { STAT_TEXT, CLASS_TYPE_ANY, STAT_TEXT, EVENT_NONE },
  92. /* [26-30] handle attribute names */
  93. { STAT_ATTR_NAME, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  94. /* below add support for space before the equals sign, e.g, <tag
  95. attr ="2"> */
  96. { STAT_ATTR_NAME, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NAME },
  97. { STAT_ATTR_NAME, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NAME },
  98. /* [31-33] attribute name end */
  99. { STAT_ATTR_NAME_END, CLASS_TYPE_SPACE, STAT_ATTR_NAME_END, EVENT_NONE },
  100. { STAT_ATTR_NAME_END, CLASS_TYPE_LETTERS, STAT_ATTR_NAME, EVENT_COPY },
  101. { STAT_ATTR_NAME_END, CLASS_TYPE_EQUALS, STAT_ATTR_VAL, EVENT_NONE },
  102. /* [34-35] handle attribute values, initial quote and spaces */
  103. { STAT_ATTR_VAL, CLASS_TYPE_QUOTE, STAT_ATTR_VAL2, EVENT_NONE },
  104. /* below handles initial spaces before quoted attribute value */
  105. { STAT_ATTR_VAL, CLASS_TYPE_SPACE, STAT_ATTR_VAL, EVENT_NONE },
  106. /* [36-37] handle actual attribute values */
  107. { STAT_ATTR_VAL2, CLASS_TYPE_QUOTE, STAT_START_TAGNAME_END, EVENT_VAL },
  108. { STAT_ATTR_VAL2, CLASS_TYPE_LETTERS, STAT_ATTR_VAL2, EVENT_COPY },
  109. { STAT_ATTR_VAL2, CLASS_TYPE_SLASH, STAT_ATTR_VAL2, EVENT_NONE },
  110. /* End of table marker */
  111. { STAT_ERROR, CLASS_TYPE_NONE, STAT_ERROR, EVENT_NONE }
  112. };
  113. struct rtgui_xml
  114. {
  115. /* event handler */
  116. rtgui_xml_event_handler_t event_handler;
  117. void* user;
  118. char* buffer; /* xml buffer */
  119. rt_size_t buffer_size; /* buffer size */
  120. rt_size_t position; /* current position in buffer */
  121. rt_uint16_t state, event; /* current state and event */
  122. rt_bool_t copy; /* copy text into tmp buffer */
  123. rt_bool_t halt; /* halt parsing of document */
  124. };
  125. rtgui_xml_t* rtgui_xml_create(rt_size_t buffer_size, rtgui_xml_event_handler_t handler,
  126. void* user)
  127. {
  128. rtgui_xml_t* xml = (rtgui_xml_t*) rtgui_malloc(sizeof(struct rtgui_xml));
  129. rt_memset(xml, 0, sizeof(rtgui_xml_t));
  130. xml->event_handler = handler;
  131. xml->user = user;
  132. /* create buffer */
  133. xml->buffer_size = buffer_size;
  134. xml->buffer = (char*)rtgui_malloc(xml->buffer_size);
  135. return xml;
  136. }
  137. void rtgui_xml_destroy(rtgui_xml_t* xml)
  138. {
  139. if(xml)
  140. {
  141. rtgui_free(xml->buffer);
  142. rtgui_free(xml);
  143. }
  144. }
  145. const char* rtgui_xml_event_str(rt_uint8_t event)
  146. {
  147. switch(event)
  148. {
  149. case EVENT_START:
  150. return "start tag";
  151. case EVENT_END:
  152. return "end tag";
  153. case EVENT_TEXT:
  154. return "text";
  155. case EVENT_NAME:
  156. return "attr name";
  157. case EVENT_VAL:
  158. return "attr val";
  159. case EVENT_END_DOC:
  160. return "end document";
  161. default:
  162. break;
  163. }
  164. return "err";
  165. }
  166. int rtgui_xml_parse(rtgui_xml_t* xml, const char* buf, rt_size_t len)
  167. {
  168. int i, j, c, match;
  169. #define is_space(ch) \
  170. ((rt_uint32_t)(ch - 9) < 5u || ch == ' ')
  171. #define is_alpha(ch) \
  172. ((rt_uint32_t)((ch | 0x20) - 'a') < 26u)
  173. #define is_digit(ch) \
  174. ((rt_uint32_t)(ch - '0') < 10u)
  175. #define is_letters(ch) \
  176. (is_alpha(ch) || is_digit(ch) || (ch == '.'))
  177. for(i=0; i<len; i++)
  178. {
  179. if(xml->halt) break;
  180. c = buf[i] & 0xff;
  181. /* search in state table */
  182. for(j=0, match = 0; RTGUI_XML_STATES[j].state != STAT_ERROR; j++)
  183. {
  184. if(RTGUI_XML_STATES[j].state != xml->state)
  185. continue;
  186. switch(RTGUI_XML_STATES[j].class_type)
  187. {
  188. case CLASS_TYPE_LETTERS:
  189. match = is_letters(c);
  190. break;
  191. case CLASS_TYPE_LEFT_ANGLE:
  192. match = (c == '<');
  193. break;
  194. case CLASS_TYPE_SLASH:
  195. match = (c == '/');
  196. break;
  197. case CLASS_TYPE_RIGHT_ANGLE:
  198. match = (c == '>');
  199. break;
  200. case CLASS_TYPE_EQUALS:
  201. match = (c == '=');
  202. break;
  203. case CLASS_TYPE_QUOTE:
  204. match = (c == '"');
  205. break;
  206. case CLASS_TYPE_SPACE:
  207. match = is_space(c);
  208. break;
  209. case CLASS_TYPE_ANY:
  210. match = 1;
  211. break;
  212. default:
  213. break;
  214. }
  215. /* we matched a character class */
  216. if(match)
  217. {
  218. if(RTGUI_XML_STATES[j].event == EVENT_COPY)
  219. {
  220. xml->copy = RT_TRUE;
  221. }
  222. else if(RTGUI_XML_STATES[j].event != EVENT_NONE)
  223. {
  224. if(xml->copy == RT_TRUE)
  225. {
  226. /* basically we are guaranteed never to have an event of
  227. type EVENT_COPY or EVENT_NONE here. */
  228. xml->event = RTGUI_XML_STATES[j].event;
  229. xml->buffer[xml->position] = 0; /* make a string */
  230. if(!xml->event_handler(RTGUI_XML_STATES[j].event,
  231. xml->buffer, xml->position ,
  232. xml->user))
  233. {
  234. xml->halt = 1; /* stop parsing from here out */
  235. }
  236. xml->position = 0;
  237. xml->copy = RT_FALSE;
  238. }
  239. }
  240. if(xml->copy == RT_TRUE)
  241. {
  242. /* check to see if we have room; one less for trailing
  243. nul */
  244. if(xml->position < xml->buffer_size-1)
  245. {
  246. xml->buffer[xml->position] = buf[i];
  247. xml->position++;
  248. }
  249. }
  250. xml->state = RTGUI_XML_STATES[j].next_state; /* change state */
  251. break; /* break out of loop though state search */
  252. }
  253. }
  254. }
  255. return !xml->halt;
  256. }