finsh_token.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575
  1. /*
  2. * File : finsh_token.c
  3. * This file is part of RT-Thread RTOS
  4. * COPYRIGHT (C) 2006 - 2010, RT-Thread Development Team
  5. *
  6. * The license and distribution terms for this file may be
  7. * found in the file LICENSE in this distribution or at
  8. * http://www.rt-thread.org/license/LICENSE
  9. *
  10. * Change Logs:
  11. * Date Author Notes
  12. * 2010-03-22 Bernard first version
  13. */
  14. #include <finsh.h>
  15. #include "finsh_token.h"
  16. #include "finsh_error.h"
  17. #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
  18. #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
  19. || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
  20. #define is_eof(self) (self)->eof
  21. struct name_table
  22. {
  23. char* name;
  24. enum finsh_token_type type;
  25. };
  26. /* keyword */
  27. static const struct name_table finsh_name_table[] =
  28. {
  29. {"void", finsh_token_type_void},
  30. {"char", finsh_token_type_char},
  31. {"short", finsh_token_type_short},
  32. {"int", finsh_token_type_int},
  33. {"long", finsh_token_type_long},
  34. {"unsigned", finsh_token_type_unsigned},
  35. {"NULL", finsh_token_type_value_null},
  36. {"null", finsh_token_type_value_null}
  37. };
  38. static char token_next_char(struct finsh_token* self);
  39. static void token_prev_char(struct finsh_token* self);
  40. static long token_spec_number(char* string, int length, int b);
  41. static void token_run(struct finsh_token* self);
  42. static int token_match_name(struct finsh_token* self, const char* str);
  43. static void token_proc_number(struct finsh_token* self);
  44. static u_char* token_proc_string(struct finsh_token* self);
  45. static void token_trim_space(struct finsh_token* self);
  46. static char token_proc_char(struct finsh_token* self);
  47. static int token_proc_escape(struct finsh_token* self);
  48. void finsh_token_init(struct finsh_token* self, u_char* line)
  49. {
  50. memset(self, 0, sizeof(struct finsh_token));
  51. self->line = line;
  52. }
  53. enum finsh_token_type finsh_token_token(struct finsh_token* self)
  54. {
  55. if ( self->replay ) self->replay = 0;
  56. else token_run(self);
  57. return (enum finsh_token_type)self->current_token;
  58. }
  59. void finsh_token_get_token(struct finsh_token* self, u_char* token)
  60. {
  61. strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
  62. }
  63. int token_get_string(struct finsh_token* self, u_char* str)
  64. {
  65. unsigned char *p=str;
  66. char ch;
  67. ch = token_next_char(self);
  68. if (is_eof(self)) return -1;
  69. str[0] = '\0';
  70. if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
  71. {
  72. token_prev_char(self);
  73. return -1;
  74. }
  75. while (!is_separator(ch) && !is_eof(self))
  76. {
  77. *p++ = ch;
  78. ch = token_next_char(self);
  79. }
  80. self->eof = 0;
  81. token_prev_char(self);
  82. *p = '\0';
  83. return 0;
  84. }
  85. /*
  86. get next character.
  87. */
  88. static char token_next_char(struct finsh_token* self)
  89. {
  90. if (self->eof) return '\0';
  91. if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
  92. {
  93. self->eof = 1;
  94. self->position = 0;
  95. return '\0';
  96. }
  97. return self->line[self->position++];
  98. }
  99. static void token_prev_char(struct finsh_token* self)
  100. {
  101. if ( self->eof ) return;
  102. if ( self->position == 0 ) return;
  103. else self->position--;
  104. }
  105. static void token_run(struct finsh_token* self)
  106. {
  107. char ch;
  108. token_trim_space(self); /* first trim space and tab. */
  109. token_get_string(self, &(self->string[0]));
  110. if ( is_eof(self) ) /*if it is eof, break;*/
  111. {
  112. self->current_token = finsh_token_type_eof;
  113. return ;
  114. }
  115. if (self->string[0] != '\0') /*It is a key word or a identifier.*/
  116. {
  117. if ( !token_match_name(self, (char*)self->string) )
  118. {
  119. self->current_token = finsh_token_type_identifier;
  120. }
  121. return;
  122. }
  123. else/*It is a operator character.*/
  124. {
  125. ch = token_next_char(self);
  126. switch ( ch )
  127. {
  128. case '(':
  129. self->current_token = finsh_token_type_left_paren;
  130. break;
  131. case ')':
  132. self->current_token = finsh_token_type_right_paren;
  133. break;
  134. case ',':
  135. self->current_token = finsh_token_type_comma;
  136. break;
  137. case ';':
  138. self->current_token = finsh_token_type_semicolon;
  139. break;
  140. case '&':
  141. self->current_token = finsh_token_type_and;
  142. break;
  143. case '*':
  144. self->current_token = finsh_token_type_mul;
  145. break;
  146. case '+':
  147. ch = token_next_char(self);
  148. if ( ch == '+' )
  149. {
  150. self->current_token = finsh_token_type_inc;
  151. }
  152. else
  153. {
  154. token_prev_char(self);
  155. self->current_token = finsh_token_type_add;
  156. }
  157. break;
  158. case '-':
  159. ch = token_next_char(self);
  160. if ( ch == '-' )
  161. {
  162. self->current_token = finsh_token_type_dec;
  163. }
  164. else
  165. {
  166. token_prev_char(self);
  167. self->current_token = finsh_token_type_sub;
  168. }
  169. break;
  170. case '/':
  171. self->current_token = finsh_token_type_div;
  172. break;
  173. case '<':
  174. ch = token_next_char(self);
  175. if ( ch == '<' )
  176. {
  177. self->current_token = finsh_token_type_shl;
  178. }
  179. else
  180. {
  181. token_prev_char(self);
  182. self->current_token = finsh_token_type_bad;
  183. }
  184. break;
  185. case '>':
  186. ch = token_next_char(self);
  187. if ( ch == '>' )
  188. {
  189. self->current_token = finsh_token_type_shr;
  190. }
  191. else
  192. {
  193. token_prev_char(self);
  194. self->current_token = finsh_token_type_bad;
  195. }
  196. break;
  197. case '|':
  198. self->current_token = finsh_token_type_or;
  199. break;
  200. case '%':
  201. self->current_token = finsh_token_type_mod;
  202. break;
  203. case '~':
  204. self->current_token = finsh_token_type_bitwise;
  205. break;
  206. case '^':
  207. self->current_token = finsh_token_type_xor;
  208. break;
  209. case '=':
  210. self->current_token = finsh_token_type_assign;
  211. break;
  212. case '\'':
  213. self->value.char_value = token_proc_char(self);
  214. self->current_token = finsh_token_type_value_char;
  215. break;
  216. case '"':
  217. token_proc_string(self);
  218. self->current_token = finsh_token_type_value_string;
  219. break;
  220. default:
  221. if ( is_digit(ch) )
  222. {
  223. token_prev_char(self);
  224. token_proc_number(self);
  225. break;
  226. }
  227. finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
  228. self->current_token = finsh_token_type_bad;
  229. break;
  230. }
  231. }
  232. }
  233. static int token_match_name(struct finsh_token* self, const char* str)
  234. {
  235. int i;
  236. for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
  237. {
  238. if ( strcmp(finsh_name_table[i].name, str)==0 )
  239. {
  240. self->current_token = finsh_name_table[i].type;
  241. return 1;
  242. }
  243. }
  244. return 0;
  245. }
  246. static void token_trim_space(struct finsh_token* self)
  247. {
  248. char ch;
  249. while ( (ch = token_next_char(self)) ==' ' || ch == '\t');
  250. token_prev_char(self);
  251. }
  252. static char token_proc_char(struct finsh_token* self)
  253. {
  254. char ch;
  255. char buf[4], *p;
  256. p = buf;
  257. ch = token_next_char(self);
  258. if ( ch == '\\' )
  259. {
  260. ch = token_next_char(self);
  261. switch ( ch )
  262. {
  263. case 'n': ch = '\n'; break;
  264. case 't': ch = '\t'; break;
  265. case 'v': ch = '\v'; break;
  266. case 'b': ch = '\b'; break;
  267. case 'r': ch = '\r'; break;
  268. case '\\': ch = '\\'; break;
  269. case '\'': ch = '\''; break;
  270. default :
  271. while ( is_digit(ch) )/*for '\113' char*/
  272. {
  273. ch = token_next_char(self);
  274. *p++ = ch;
  275. }
  276. token_prev_char(self);
  277. *p = '\0';
  278. ch = atoi(p);
  279. break;
  280. }
  281. }
  282. if ( token_next_char(self) != '\'' )
  283. {
  284. token_prev_char(self);
  285. finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
  286. return ch;
  287. }
  288. return ch;
  289. }
  290. static u_char* token_proc_string(struct finsh_token* self)
  291. {
  292. u_char* p;
  293. for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
  294. {
  295. char ch = token_next_char(self);
  296. if ( is_eof(self) )
  297. {
  298. finsh_error_set(FINSH_ERROR_UNEXPECT_END);
  299. return NULL;;
  300. }
  301. if ( ch == '\\' )
  302. {
  303. ch = token_proc_escape(self);
  304. }
  305. else if ( ch == '"' )/*end of string.*/
  306. {
  307. *p = '\0';
  308. return self->string;
  309. }
  310. *p++ = ch;
  311. }
  312. return NULL;
  313. }
  314. static int token_proc_escape(struct finsh_token* self)
  315. {
  316. char ch;
  317. int result=0;
  318. ch = token_next_char(self);
  319. switch (ch)
  320. {
  321. case 'n':
  322. result = '\n';
  323. break;
  324. case 't':
  325. result = '\t';
  326. break;
  327. case 'v':
  328. result = '\v';
  329. break;
  330. case 'b':
  331. result = '\b';
  332. break;
  333. case 'r':
  334. result = '\r';
  335. break;
  336. case 'f':
  337. result = '\f';
  338. break;
  339. case 'a':
  340. result = '\007';
  341. break;
  342. case 'x':
  343. result = 0;
  344. ch = token_next_char(self);
  345. while ( (ch - '0')<16u )
  346. {
  347. result = result*16 + ch - '0';
  348. ch = token_next_char(self);
  349. }
  350. token_prev_char(self);
  351. break;
  352. default:
  353. if ( (ch - '0') < 8u)
  354. {
  355. result = 0;
  356. while ( (ch - '0') < 8u )
  357. {
  358. result = result*8 + ch - '0';
  359. ch = token_next_char(self);
  360. }
  361. token_prev_char(self);
  362. }
  363. break;
  364. }
  365. return result;
  366. }
  367. /*
  368. (0|0x|0X|0b|0B)number+(l|L)
  369. */
  370. static void token_proc_number(struct finsh_token* self)
  371. {
  372. char ch;
  373. int b;
  374. char *p, buf[128];
  375. long value;
  376. value = 0;
  377. p = buf;
  378. b = 10;
  379. ch = token_next_char(self);
  380. if ( ch == '0' )
  381. {
  382. ch = token_next_char(self);
  383. if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
  384. {
  385. b = 16;
  386. ch = token_next_char(self);
  387. while ( is_digit(ch) || isalpha(ch) )
  388. {
  389. *p++ = ch;
  390. ch = token_next_char(self);
  391. }
  392. *p = '\0';
  393. }
  394. else if ( ch == 'b' || ch == 'B' )
  395. {
  396. b = 2;
  397. ch = token_next_char(self);
  398. while ( (ch=='0')||(ch=='1') )
  399. {
  400. *p++ = ch;
  401. ch = token_next_char(self);
  402. }
  403. *p = '\0';
  404. }
  405. else
  406. {
  407. b = 8;
  408. while ( is_digit(ch) )
  409. {
  410. *p++ = ch;
  411. ch = token_next_char(self);
  412. }
  413. *p = '\0';
  414. }
  415. self->value.int_value = token_spec_number(buf, strlen(buf), b);
  416. self->current_token = finsh_token_type_value_int;
  417. }
  418. else
  419. {
  420. while ( is_digit(ch) )
  421. {
  422. value = value*10 + ( ch - '0' );
  423. ch = token_next_char(self);
  424. }
  425. self->value.int_value = value;
  426. self->current_token = finsh_token_type_value_int;
  427. }
  428. switch ( ch )
  429. {
  430. case 'l':
  431. case 'L':
  432. self->current_token = finsh_token_type_value_long;
  433. break;
  434. default:
  435. token_prev_char(self);
  436. break;
  437. }
  438. }
  439. /*use 64 bit number*/
  440. #define BN_SIZE 2
  441. static long token_spec_number(char* string, int length, int b)
  442. {
  443. char* p;
  444. int t;
  445. int i, j, shift=1;
  446. unsigned int bn[BN_SIZE], v;
  447. long d;
  448. p = string;
  449. i = 0;
  450. switch ( b )
  451. {
  452. case 16: shift = 4;
  453. break;
  454. case 8: shift = 3;
  455. break;
  456. case 2: shift = 1;
  457. break;
  458. default: break;
  459. }
  460. for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
  461. while ( i<length )
  462. {
  463. t = *p++;
  464. if ( t>='a' && t <='f' )
  465. {
  466. t = t - 'a' +10;
  467. }
  468. else if ( t >='A' && t <='F' )
  469. {
  470. t = t - 'A' +10;
  471. }
  472. else t = t - '0';
  473. for ( j=0; j<BN_SIZE ; j++)
  474. {
  475. v = bn[j];
  476. bn[j] = (v<<shift) | t;
  477. t = v >> (32 - shift);
  478. }
  479. i++;
  480. }
  481. d = (long)bn[0];
  482. return d;
  483. }