finsh_token.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585
  1. /*
  2. * File : finsh_token.c
  3. * This file is part of RT-Thread RTOS
  4. * COPYRIGHT (C) 2006 - 2010, RT-Thread Development Team
  5. *
  6. * The license and distribution terms for this file may be
  7. * found in the file LICENSE in this distribution or at
  8. * http://www.rt-thread.org/license/LICENSE
  9. *
  10. * Change Logs:
  11. * Date Author Notes
  12. * 2010-03-22 Bernard first version
  13. */
  14. #include <finsh.h>
  15. #include "finsh_token.h"
  16. #include "finsh_error.h"
  17. #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
  18. #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
  19. || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
  20. #define is_eof(self) (self)->eof
  21. struct name_table
  22. {
  23. char* name;
  24. enum finsh_token_type type;
  25. };
  26. /* keyword */
  27. static const struct name_table finsh_name_table[] =
  28. {
  29. {"void", finsh_token_type_void},
  30. {"char", finsh_token_type_char},
  31. {"short", finsh_token_type_short},
  32. {"int", finsh_token_type_int},
  33. {"long", finsh_token_type_long},
  34. {"unsigned", finsh_token_type_unsigned},
  35. {"NULL", finsh_token_type_value_null},
  36. {"null", finsh_token_type_value_null}
  37. };
  38. static char token_next_char(struct finsh_token* self);
  39. static void token_prev_char(struct finsh_token* self);
  40. static long token_spec_number(char* string, int length, int b);
  41. static void token_run(struct finsh_token* self);
  42. static int token_match_name(struct finsh_token* self, const char* str);
  43. static void token_proc_number(struct finsh_token* self);
  44. static u_char* token_proc_string(struct finsh_token* self);
  45. static void token_trim_space(struct finsh_token* self);
  46. static char token_proc_char(struct finsh_token* self);
  47. static int token_proc_escape(struct finsh_token* self);
  48. void finsh_token_init(struct finsh_token* self, u_char* line)
  49. {
  50. memset(self, 0, sizeof(struct finsh_token));
  51. self->line = line;
  52. }
  53. enum finsh_token_type finsh_token_token(struct finsh_token* self)
  54. {
  55. if ( self->replay ) self->replay = 0;
  56. else token_run(self);
  57. return (enum finsh_token_type)self->current_token;
  58. }
  59. void finsh_token_get_token(struct finsh_token* self, u_char* token)
  60. {
  61. strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
  62. }
  63. int token_get_string(struct finsh_token* self, u_char* str)
  64. {
  65. unsigned char *p=str;
  66. char ch;
  67. ch = token_next_char(self);
  68. if (is_eof(self)) return -1;
  69. str[0] = '\0';
  70. if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
  71. {
  72. token_prev_char(self);
  73. return -1;
  74. }
  75. while (!is_separator(ch) && !is_eof(self))
  76. {
  77. *p++ = ch;
  78. ch = token_next_char(self);
  79. }
  80. self->eof = 0;
  81. token_prev_char(self);
  82. *p = '\0';
  83. return 0;
  84. }
  85. /*
  86. get next character.
  87. */
  88. static char token_next_char(struct finsh_token* self)
  89. {
  90. if (self->eof) return '\0';
  91. if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
  92. {
  93. self->eof = 1;
  94. self->position = 0;
  95. return '\0';
  96. }
  97. return self->line[self->position++];
  98. }
  99. static void token_prev_char(struct finsh_token* self)
  100. {
  101. if ( self->eof ) return;
  102. if ( self->position == 0 ) return;
  103. else self->position--;
  104. }
  105. static void token_run(struct finsh_token* self)
  106. {
  107. char ch;
  108. token_trim_space(self); /* first trim space and tab. */
  109. token_get_string(self, &(self->string[0]));
  110. if ( is_eof(self) ) /*if it is eof, break;*/
  111. {
  112. self->current_token = finsh_token_type_eof;
  113. return ;
  114. }
  115. if (self->string[0] != '\0') /*It is a key word or a identifier.*/
  116. {
  117. if ( !token_match_name(self, (char*)self->string) )
  118. {
  119. self->current_token = finsh_token_type_identifier;
  120. }
  121. return;
  122. }
  123. else/*It is a operator character.*/
  124. {
  125. ch = token_next_char(self);
  126. switch ( ch )
  127. {
  128. case '(':
  129. self->current_token = finsh_token_type_left_paren;
  130. break;
  131. case ')':
  132. self->current_token = finsh_token_type_right_paren;
  133. break;
  134. case ',':
  135. self->current_token = finsh_token_type_comma;
  136. break;
  137. case ';':
  138. self->current_token = finsh_token_type_semicolon;
  139. break;
  140. case '&':
  141. self->current_token = finsh_token_type_and;
  142. break;
  143. case '*':
  144. self->current_token = finsh_token_type_mul;
  145. break;
  146. case '+':
  147. ch = token_next_char(self);
  148. if ( ch == '+' )
  149. {
  150. self->current_token = finsh_token_type_inc;
  151. }
  152. else
  153. {
  154. token_prev_char(self);
  155. self->current_token = finsh_token_type_add;
  156. }
  157. break;
  158. case '-':
  159. ch = token_next_char(self);
  160. if ( ch == '-' )
  161. {
  162. self->current_token = finsh_token_type_dec;
  163. }
  164. else
  165. {
  166. token_prev_char(self);
  167. self->current_token = finsh_token_type_sub;
  168. }
  169. break;
  170. case '/':
  171. ch = token_next_char(self);
  172. if (ch == '/')
  173. {
  174. /* line comments, set to end of file */
  175. self->current_token = finsh_token_type_eof;
  176. }
  177. else
  178. {
  179. token_prev_char(self);
  180. self->current_token = finsh_token_type_div;
  181. }
  182. break;
  183. case '<':
  184. ch = token_next_char(self);
  185. if ( ch == '<' )
  186. {
  187. self->current_token = finsh_token_type_shl;
  188. }
  189. else
  190. {
  191. token_prev_char(self);
  192. self->current_token = finsh_token_type_bad;
  193. }
  194. break;
  195. case '>':
  196. ch = token_next_char(self);
  197. if ( ch == '>' )
  198. {
  199. self->current_token = finsh_token_type_shr;
  200. }
  201. else
  202. {
  203. token_prev_char(self);
  204. self->current_token = finsh_token_type_bad;
  205. }
  206. break;
  207. case '|':
  208. self->current_token = finsh_token_type_or;
  209. break;
  210. case '%':
  211. self->current_token = finsh_token_type_mod;
  212. break;
  213. case '~':
  214. self->current_token = finsh_token_type_bitwise;
  215. break;
  216. case '^':
  217. self->current_token = finsh_token_type_xor;
  218. break;
  219. case '=':
  220. self->current_token = finsh_token_type_assign;
  221. break;
  222. case '\'':
  223. self->value.char_value = token_proc_char(self);
  224. self->current_token = finsh_token_type_value_char;
  225. break;
  226. case '"':
  227. token_proc_string(self);
  228. self->current_token = finsh_token_type_value_string;
  229. break;
  230. default:
  231. if ( is_digit(ch) )
  232. {
  233. token_prev_char(self);
  234. token_proc_number(self);
  235. break;
  236. }
  237. finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
  238. self->current_token = finsh_token_type_bad;
  239. break;
  240. }
  241. }
  242. }
  243. static int token_match_name(struct finsh_token* self, const char* str)
  244. {
  245. int i;
  246. for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
  247. {
  248. if ( strcmp(finsh_name_table[i].name, str)==0 )
  249. {
  250. self->current_token = finsh_name_table[i].type;
  251. return 1;
  252. }
  253. }
  254. return 0;
  255. }
  256. static void token_trim_space(struct finsh_token* self)
  257. {
  258. char ch;
  259. while ( (ch = token_next_char(self)) ==' ' || ch == '\t');
  260. token_prev_char(self);
  261. }
  262. static char token_proc_char(struct finsh_token* self)
  263. {
  264. char ch;
  265. char buf[4], *p;
  266. p = buf;
  267. ch = token_next_char(self);
  268. if ( ch == '\\' )
  269. {
  270. ch = token_next_char(self);
  271. switch ( ch )
  272. {
  273. case 'n': ch = '\n'; break;
  274. case 't': ch = '\t'; break;
  275. case 'v': ch = '\v'; break;
  276. case 'b': ch = '\b'; break;
  277. case 'r': ch = '\r'; break;
  278. case '\\': ch = '\\'; break;
  279. case '\'': ch = '\''; break;
  280. default :
  281. while ( is_digit(ch) )/*for '\113' char*/
  282. {
  283. ch = token_next_char(self);
  284. *p++ = ch;
  285. }
  286. token_prev_char(self);
  287. *p = '\0';
  288. ch = atoi(p);
  289. break;
  290. }
  291. }
  292. if ( token_next_char(self) != '\'' )
  293. {
  294. token_prev_char(self);
  295. finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
  296. return ch;
  297. }
  298. return ch;
  299. }
  300. static u_char* token_proc_string(struct finsh_token* self)
  301. {
  302. u_char* p;
  303. for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
  304. {
  305. char ch = token_next_char(self);
  306. if ( is_eof(self) )
  307. {
  308. finsh_error_set(FINSH_ERROR_UNEXPECT_END);
  309. return NULL;;
  310. }
  311. if ( ch == '\\' )
  312. {
  313. ch = token_proc_escape(self);
  314. }
  315. else if ( ch == '"' )/*end of string.*/
  316. {
  317. *p = '\0';
  318. return self->string;
  319. }
  320. *p++ = ch;
  321. }
  322. return NULL;
  323. }
  324. static int token_proc_escape(struct finsh_token* self)
  325. {
  326. char ch;
  327. int result=0;
  328. ch = token_next_char(self);
  329. switch (ch)
  330. {
  331. case 'n':
  332. result = '\n';
  333. break;
  334. case 't':
  335. result = '\t';
  336. break;
  337. case 'v':
  338. result = '\v';
  339. break;
  340. case 'b':
  341. result = '\b';
  342. break;
  343. case 'r':
  344. result = '\r';
  345. break;
  346. case 'f':
  347. result = '\f';
  348. break;
  349. case 'a':
  350. result = '\007';
  351. break;
  352. case 'x':
  353. result = 0;
  354. ch = token_next_char(self);
  355. while ( (ch - '0')<16u )
  356. {
  357. result = result*16 + ch - '0';
  358. ch = token_next_char(self);
  359. }
  360. token_prev_char(self);
  361. break;
  362. default:
  363. if ( (ch - '0') < 8u)
  364. {
  365. result = 0;
  366. while ( (ch - '0') < 8u )
  367. {
  368. result = result*8 + ch - '0';
  369. ch = token_next_char(self);
  370. }
  371. token_prev_char(self);
  372. }
  373. break;
  374. }
  375. return result;
  376. }
  377. /*
  378. (0|0x|0X|0b|0B)number+(l|L)
  379. */
  380. static void token_proc_number(struct finsh_token* self)
  381. {
  382. char ch;
  383. int b;
  384. char *p, buf[128];
  385. long value;
  386. value = 0;
  387. p = buf;
  388. b = 10;
  389. ch = token_next_char(self);
  390. if ( ch == '0' )
  391. {
  392. ch = token_next_char(self);
  393. if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
  394. {
  395. b = 16;
  396. ch = token_next_char(self);
  397. while ( is_digit(ch) || isalpha(ch) )
  398. {
  399. *p++ = ch;
  400. ch = token_next_char(self);
  401. }
  402. *p = '\0';
  403. }
  404. else if ( ch == 'b' || ch == 'B' )
  405. {
  406. b = 2;
  407. ch = token_next_char(self);
  408. while ( (ch=='0')||(ch=='1') )
  409. {
  410. *p++ = ch;
  411. ch = token_next_char(self);
  412. }
  413. *p = '\0';
  414. }
  415. else
  416. {
  417. b = 8;
  418. while ( is_digit(ch) )
  419. {
  420. *p++ = ch;
  421. ch = token_next_char(self);
  422. }
  423. *p = '\0';
  424. }
  425. self->value.int_value = token_spec_number(buf, strlen(buf), b);
  426. self->current_token = finsh_token_type_value_int;
  427. }
  428. else
  429. {
  430. while ( is_digit(ch) )
  431. {
  432. value = value*10 + ( ch - '0' );
  433. ch = token_next_char(self);
  434. }
  435. self->value.int_value = value;
  436. self->current_token = finsh_token_type_value_int;
  437. }
  438. switch ( ch )
  439. {
  440. case 'l':
  441. case 'L':
  442. self->current_token = finsh_token_type_value_long;
  443. break;
  444. default:
  445. token_prev_char(self);
  446. break;
  447. }
  448. }
  449. /*use 64 bit number*/
  450. #define BN_SIZE 2
  451. static long token_spec_number(char* string, int length, int b)
  452. {
  453. char* p;
  454. int t;
  455. int i, j, shift=1;
  456. unsigned int bn[BN_SIZE], v;
  457. long d;
  458. p = string;
  459. i = 0;
  460. switch ( b )
  461. {
  462. case 16: shift = 4;
  463. break;
  464. case 8: shift = 3;
  465. break;
  466. case 2: shift = 1;
  467. break;
  468. default: break;
  469. }
  470. for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
  471. while ( i<length )
  472. {
  473. t = *p++;
  474. if ( t>='a' && t <='f' )
  475. {
  476. t = t - 'a' +10;
  477. }
  478. else if ( t >='A' && t <='F' )
  479. {
  480. t = t - 'A' +10;
  481. }
  482. else t = t - '0';
  483. for ( j=0; j<BN_SIZE ; j++)
  484. {
  485. v = bn[j];
  486. bn[j] = (v<<shift) | t;
  487. t = v >> (32 - shift);
  488. }
  489. i++;
  490. }
  491. d = (long)bn[0];
  492. return d;
  493. }