finsh_token.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. /*
  2. * token lex for finsh shell.
  3. *
  4. * COPYRIGHT (C) 2006 - 2013, RT-Thread Development Team
  5. *
  6. * This file is part of RT-Thread (http://www.rt-thread.org)
  7. * Maintainer: bernard.xiong <bernard.xiong at gmail.com>
  8. *
  9. * All rights reserved.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License along
  22. * with this program; if not, write to the Free Software Foundation, Inc.,
  23. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  24. *
  25. * Change Logs:
  26. * Date Author Notes
  27. * 2010-03-22 Bernard first version
  28. */
  29. #include <finsh.h>
  30. #include "finsh_token.h"
  31. #include "finsh_error.h"
  32. #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
  33. #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
  34. || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
  35. #define is_eof(self) (self)->eof
  36. struct name_table
  37. {
  38. char* name;
  39. enum finsh_token_type type;
  40. };
  41. /* keyword */
  42. static const struct name_table finsh_name_table[] =
  43. {
  44. {"void", finsh_token_type_void},
  45. {"char", finsh_token_type_char},
  46. {"short", finsh_token_type_short},
  47. {"int", finsh_token_type_int},
  48. {"long", finsh_token_type_long},
  49. {"unsigned", finsh_token_type_unsigned},
  50. {"NULL", finsh_token_type_value_null},
  51. {"null", finsh_token_type_value_null}
  52. };
  53. static char token_next_char(struct finsh_token* self);
  54. static void token_prev_char(struct finsh_token* self);
  55. static long token_spec_number(char* string, int length, int b);
  56. static void token_run(struct finsh_token* self);
  57. static int token_match_name(struct finsh_token* self, const char* str);
  58. static void token_proc_number(struct finsh_token* self);
  59. static u_char* token_proc_string(struct finsh_token* self);
  60. static void token_trim_space(struct finsh_token* self);
  61. static char token_proc_char(struct finsh_token* self);
  62. static int token_proc_escape(struct finsh_token* self);
  63. void finsh_token_init(struct finsh_token* self, u_char* line)
  64. {
  65. memset(self, 0, sizeof(struct finsh_token));
  66. self->line = line;
  67. }
  68. enum finsh_token_type finsh_token_token(struct finsh_token* self)
  69. {
  70. if ( self->replay ) self->replay = 0;
  71. else token_run(self);
  72. return (enum finsh_token_type)self->current_token;
  73. }
  74. void finsh_token_get_token(struct finsh_token* self, u_char* token)
  75. {
  76. strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
  77. }
  78. int token_get_string(struct finsh_token* self, u_char* str)
  79. {
  80. unsigned char *p=str;
  81. char ch;
  82. ch = token_next_char(self);
  83. if (is_eof(self)) return -1;
  84. str[0] = '\0';
  85. if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
  86. {
  87. token_prev_char(self);
  88. return -1;
  89. }
  90. while (!is_separator(ch) && !is_eof(self))
  91. {
  92. *p++ = ch;
  93. ch = token_next_char(self);
  94. }
  95. self->eof = 0;
  96. token_prev_char(self);
  97. *p = '\0';
  98. return 0;
  99. }
  100. /*
  101. get next character.
  102. */
  103. static char token_next_char(struct finsh_token* self)
  104. {
  105. if (self->eof) return '\0';
  106. if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
  107. {
  108. self->eof = 1;
  109. self->position = 0;
  110. return '\0';
  111. }
  112. return self->line[self->position++];
  113. }
  114. static void token_prev_char(struct finsh_token* self)
  115. {
  116. if ( self->eof ) return;
  117. if ( self->position == 0 ) return;
  118. else self->position--;
  119. }
  120. static void token_run(struct finsh_token* self)
  121. {
  122. char ch;
  123. token_trim_space(self); /* first trim space and tab. */
  124. token_get_string(self, &(self->string[0]));
  125. if ( is_eof(self) ) /*if it is eof, break;*/
  126. {
  127. self->current_token = finsh_token_type_eof;
  128. return ;
  129. }
  130. if (self->string[0] != '\0') /*It is a key word or a identifier.*/
  131. {
  132. if ( !token_match_name(self, (char*)self->string) )
  133. {
  134. self->current_token = finsh_token_type_identifier;
  135. }
  136. return;
  137. }
  138. else/*It is a operator character.*/
  139. {
  140. ch = token_next_char(self);
  141. switch ( ch )
  142. {
  143. case '(':
  144. self->current_token = finsh_token_type_left_paren;
  145. break;
  146. case ')':
  147. self->current_token = finsh_token_type_right_paren;
  148. break;
  149. case ',':
  150. self->current_token = finsh_token_type_comma;
  151. break;
  152. case ';':
  153. self->current_token = finsh_token_type_semicolon;
  154. break;
  155. case '&':
  156. self->current_token = finsh_token_type_and;
  157. break;
  158. case '*':
  159. self->current_token = finsh_token_type_mul;
  160. break;
  161. case '+':
  162. ch = token_next_char(self);
  163. if ( ch == '+' )
  164. {
  165. self->current_token = finsh_token_type_inc;
  166. }
  167. else
  168. {
  169. token_prev_char(self);
  170. self->current_token = finsh_token_type_add;
  171. }
  172. break;
  173. case '-':
  174. ch = token_next_char(self);
  175. if ( ch == '-' )
  176. {
  177. self->current_token = finsh_token_type_dec;
  178. }
  179. else
  180. {
  181. token_prev_char(self);
  182. self->current_token = finsh_token_type_sub;
  183. }
  184. break;
  185. case '/':
  186. ch = token_next_char(self);
  187. if (ch == '/')
  188. {
  189. /* line comments, set to end of file */
  190. self->current_token = finsh_token_type_eof;
  191. }
  192. else
  193. {
  194. token_prev_char(self);
  195. self->current_token = finsh_token_type_div;
  196. }
  197. break;
  198. case '<':
  199. ch = token_next_char(self);
  200. if ( ch == '<' )
  201. {
  202. self->current_token = finsh_token_type_shl;
  203. }
  204. else
  205. {
  206. token_prev_char(self);
  207. self->current_token = finsh_token_type_bad;
  208. }
  209. break;
  210. case '>':
  211. ch = token_next_char(self);
  212. if ( ch == '>' )
  213. {
  214. self->current_token = finsh_token_type_shr;
  215. }
  216. else
  217. {
  218. token_prev_char(self);
  219. self->current_token = finsh_token_type_bad;
  220. }
  221. break;
  222. case '|':
  223. self->current_token = finsh_token_type_or;
  224. break;
  225. case '%':
  226. self->current_token = finsh_token_type_mod;
  227. break;
  228. case '~':
  229. self->current_token = finsh_token_type_bitwise;
  230. break;
  231. case '^':
  232. self->current_token = finsh_token_type_xor;
  233. break;
  234. case '=':
  235. self->current_token = finsh_token_type_assign;
  236. break;
  237. case '\'':
  238. self->value.char_value = token_proc_char(self);
  239. self->current_token = finsh_token_type_value_char;
  240. break;
  241. case '"':
  242. token_proc_string(self);
  243. self->current_token = finsh_token_type_value_string;
  244. break;
  245. default:
  246. if ( is_digit(ch) )
  247. {
  248. token_prev_char(self);
  249. token_proc_number(self);
  250. break;
  251. }
  252. finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
  253. self->current_token = finsh_token_type_bad;
  254. break;
  255. }
  256. }
  257. }
  258. static int token_match_name(struct finsh_token* self, const char* str)
  259. {
  260. int i;
  261. for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
  262. {
  263. if ( strcmp(finsh_name_table[i].name, str)==0 )
  264. {
  265. self->current_token = finsh_name_table[i].type;
  266. return 1;
  267. }
  268. }
  269. return 0;
  270. }
  271. static void token_trim_space(struct finsh_token* self)
  272. {
  273. char ch;
  274. while ( (ch = token_next_char(self)) ==' ' || ch == '\t');
  275. token_prev_char(self);
  276. }
  277. static char token_proc_char(struct finsh_token* self)
  278. {
  279. char ch;
  280. char buf[4], *p;
  281. p = buf;
  282. ch = token_next_char(self);
  283. if ( ch == '\\' )
  284. {
  285. ch = token_next_char(self);
  286. switch ( ch )
  287. {
  288. case 'n': ch = '\n'; break;
  289. case 't': ch = '\t'; break;
  290. case 'v': ch = '\v'; break;
  291. case 'b': ch = '\b'; break;
  292. case 'r': ch = '\r'; break;
  293. case '\\': ch = '\\'; break;
  294. case '\'': ch = '\''; break;
  295. default :
  296. while ( is_digit(ch) )/*for '\113' char*/
  297. {
  298. ch = token_next_char(self);
  299. *p++ = ch;
  300. }
  301. token_prev_char(self);
  302. *p = '\0';
  303. ch = atoi(p);
  304. break;
  305. }
  306. }
  307. if ( token_next_char(self) != '\'' )
  308. {
  309. token_prev_char(self);
  310. finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
  311. return ch;
  312. }
  313. return ch;
  314. }
  315. static u_char* token_proc_string(struct finsh_token* self)
  316. {
  317. u_char* p;
  318. for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
  319. {
  320. char ch = token_next_char(self);
  321. if ( is_eof(self) )
  322. {
  323. finsh_error_set(FINSH_ERROR_UNEXPECT_END);
  324. return NULL;;
  325. }
  326. if ( ch == '\\' )
  327. {
  328. ch = token_proc_escape(self);
  329. }
  330. else if ( ch == '"' )/*end of string.*/
  331. {
  332. *p = '\0';
  333. return self->string;
  334. }
  335. *p++ = ch;
  336. }
  337. return NULL;
  338. }
  339. static int token_proc_escape(struct finsh_token* self)
  340. {
  341. char ch;
  342. int result=0;
  343. ch = token_next_char(self);
  344. switch (ch)
  345. {
  346. case 'n':
  347. result = '\n';
  348. break;
  349. case 't':
  350. result = '\t';
  351. break;
  352. case 'v':
  353. result = '\v';
  354. break;
  355. case 'b':
  356. result = '\b';
  357. break;
  358. case 'r':
  359. result = '\r';
  360. break;
  361. case 'f':
  362. result = '\f';
  363. break;
  364. case 'a':
  365. result = '\007';
  366. break;
  367. case 'x':
  368. result = 0;
  369. ch = token_next_char(self);
  370. while ( (ch - '0')<16u )
  371. {
  372. result = result*16 + ch - '0';
  373. ch = token_next_char(self);
  374. }
  375. token_prev_char(self);
  376. break;
  377. default:
  378. if ( (ch - '0') < 8u)
  379. {
  380. result = 0;
  381. while ( (ch - '0') < 8u )
  382. {
  383. result = result*8 + ch - '0';
  384. ch = token_next_char(self);
  385. }
  386. token_prev_char(self);
  387. }
  388. break;
  389. }
  390. return result;
  391. }
  392. /*
  393. (0|0x|0X|0b|0B)number+(l|L)
  394. */
  395. static void token_proc_number(struct finsh_token* self)
  396. {
  397. char ch;
  398. int b;
  399. char *p, buf[128];
  400. long value;
  401. value = 0;
  402. p = buf;
  403. b = 10;
  404. ch = token_next_char(self);
  405. if ( ch == '0' )
  406. {
  407. ch = token_next_char(self);
  408. if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
  409. {
  410. b = 16;
  411. ch = token_next_char(self);
  412. while ( is_digit(ch) || isalpha(ch) )
  413. {
  414. *p++ = ch;
  415. ch = token_next_char(self);
  416. }
  417. *p = '\0';
  418. }
  419. else if ( ch == 'b' || ch == 'B' )
  420. {
  421. b = 2;
  422. ch = token_next_char(self);
  423. while ( (ch=='0')||(ch=='1') )
  424. {
  425. *p++ = ch;
  426. ch = token_next_char(self);
  427. }
  428. *p = '\0';
  429. }
  430. else
  431. {
  432. b = 8;
  433. while ( is_digit(ch) )
  434. {
  435. *p++ = ch;
  436. ch = token_next_char(self);
  437. }
  438. *p = '\0';
  439. }
  440. self->value.int_value = token_spec_number(buf, strlen(buf), b);
  441. self->current_token = finsh_token_type_value_int;
  442. }
  443. else
  444. {
  445. while ( is_digit(ch) )
  446. {
  447. value = value*10 + ( ch - '0' );
  448. ch = token_next_char(self);
  449. }
  450. self->value.int_value = value;
  451. self->current_token = finsh_token_type_value_int;
  452. }
  453. switch ( ch )
  454. {
  455. case 'l':
  456. case 'L':
  457. self->current_token = finsh_token_type_value_long;
  458. break;
  459. default:
  460. token_prev_char(self);
  461. break;
  462. }
  463. }
  464. /*use 64 bit number*/
  465. #define BN_SIZE 2
  466. static long token_spec_number(char* string, int length, int b)
  467. {
  468. char* p;
  469. int t;
  470. int i, j, shift=1;
  471. unsigned int bn[BN_SIZE], v;
  472. long d;
  473. p = string;
  474. i = 0;
  475. switch ( b )
  476. {
  477. case 16: shift = 4;
  478. break;
  479. case 8: shift = 3;
  480. break;
  481. case 2: shift = 1;
  482. break;
  483. default: break;
  484. }
  485. for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
  486. while ( i<length )
  487. {
  488. t = *p++;
  489. if ( t>='a' && t <='f' )
  490. {
  491. t = t - 'a' +10;
  492. }
  493. else if ( t >='A' && t <='F' )
  494. {
  495. t = t - 'A' +10;
  496. }
  497. else t = t - '0';
  498. for ( j=0; j<BN_SIZE ; j++)
  499. {
  500. v = bn[j];
  501. bn[j] = (v<<shift) | t;
  502. t = v >> (32 - shift);
  503. }
  504. i++;
  505. }
  506. d = (long)bn[0];
  507. return d;
  508. }