finsh_token.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611
  1. /*
  2. * token lex for finsh shell.
  3. *
  4. * COPYRIGHT (C) 2006 - 2013, RT-Thread Development Team
  5. *
  6. * This file is part of RT-Thread (http://www.rt-thread.org)
  7. * Maintainer: bernard.xiong <bernard.xiong at gmail.com>
  8. *
  9. * All rights reserved.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License along
  22. * with this program; if not, write to the Free Software Foundation, Inc.,
  23. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  24. *
  25. * Change Logs:
  26. * Date Author Notes
  27. * 2010-03-22 Bernard first version
  28. * 2013-04-03 Bernard strip more characters.
  29. */
  30. #include <finsh.h>
  31. #include <stdlib.h>
  32. #include "finsh_token.h"
  33. #include "finsh_error.h"
  34. #define is_alpha(ch) ((ch | 0x20) - 'a') < 26u
  35. #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
  36. #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
  37. || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
  38. #define is_eof(self) (self)->eof
  39. struct name_table
  40. {
  41. char* name;
  42. enum finsh_token_type type;
  43. };
  44. /* keyword */
  45. static const struct name_table finsh_name_table[] =
  46. {
  47. {"void", finsh_token_type_void},
  48. {"char", finsh_token_type_char},
  49. {"short", finsh_token_type_short},
  50. {"int", finsh_token_type_int},
  51. {"long", finsh_token_type_long},
  52. {"unsigned", finsh_token_type_unsigned},
  53. {"NULL", finsh_token_type_value_null},
  54. {"null", finsh_token_type_value_null}
  55. };
  56. static char token_next_char(struct finsh_token* self);
  57. static void token_prev_char(struct finsh_token* self);
  58. static long token_spec_number(char* string, int length, int b);
  59. static void token_run(struct finsh_token* self);
  60. static int token_match_name(struct finsh_token* self, const char* str);
  61. static void token_proc_number(struct finsh_token* self);
  62. static u_char* token_proc_string(struct finsh_token* self);
  63. static void token_trim_space(struct finsh_token* self);
  64. static char token_proc_char(struct finsh_token* self);
  65. static int token_proc_escape(struct finsh_token* self);
  66. void finsh_token_init(struct finsh_token* self, u_char* line)
  67. {
  68. memset(self, 0, sizeof(struct finsh_token));
  69. self->line = line;
  70. }
  71. enum finsh_token_type finsh_token_token(struct finsh_token* self)
  72. {
  73. if ( self->replay ) self->replay = 0;
  74. else token_run(self);
  75. return (enum finsh_token_type)self->current_token;
  76. }
  77. void finsh_token_get_token(struct finsh_token* self, u_char* token)
  78. {
  79. strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
  80. }
  81. int token_get_string(struct finsh_token* self, u_char* str)
  82. {
  83. unsigned char *p=str;
  84. char ch;
  85. ch = token_next_char(self);
  86. if (is_eof(self)) return -1;
  87. str[0] = '\0';
  88. if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
  89. {
  90. token_prev_char(self);
  91. return -1;
  92. }
  93. while (!is_separator(ch) && !is_eof(self))
  94. {
  95. *p++ = ch;
  96. ch = token_next_char(self);
  97. }
  98. self->eof = 0;
  99. token_prev_char(self);
  100. *p = '\0';
  101. return 0;
  102. }
  103. /*
  104. get next character.
  105. */
  106. static char token_next_char(struct finsh_token* self)
  107. {
  108. if (self->eof) return '\0';
  109. if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
  110. {
  111. self->eof = 1;
  112. self->position = 0;
  113. return '\0';
  114. }
  115. return self->line[self->position++];
  116. }
  117. static void token_prev_char(struct finsh_token* self)
  118. {
  119. if ( self->eof ) return;
  120. if ( self->position == 0 ) return;
  121. else self->position--;
  122. }
  123. static void token_run(struct finsh_token* self)
  124. {
  125. char ch;
  126. token_trim_space(self); /* first trim space and tab. */
  127. token_get_string(self, &(self->string[0]));
  128. if ( is_eof(self) ) /*if it is eof, break;*/
  129. {
  130. self->current_token = finsh_token_type_eof;
  131. return ;
  132. }
  133. if (self->string[0] != '\0') /*It is a key word or a identifier.*/
  134. {
  135. if ( !token_match_name(self, (char*)self->string) )
  136. {
  137. self->current_token = finsh_token_type_identifier;
  138. }
  139. return;
  140. }
  141. else/*It is a operator character.*/
  142. {
  143. ch = token_next_char(self);
  144. switch ( ch )
  145. {
  146. case '(':
  147. self->current_token = finsh_token_type_left_paren;
  148. break;
  149. case ')':
  150. self->current_token = finsh_token_type_right_paren;
  151. break;
  152. case ',':
  153. self->current_token = finsh_token_type_comma;
  154. break;
  155. case ';':
  156. self->current_token = finsh_token_type_semicolon;
  157. break;
  158. case '&':
  159. self->current_token = finsh_token_type_and;
  160. break;
  161. case '*':
  162. self->current_token = finsh_token_type_mul;
  163. break;
  164. case '+':
  165. ch = token_next_char(self);
  166. if ( ch == '+' )
  167. {
  168. self->current_token = finsh_token_type_inc;
  169. }
  170. else
  171. {
  172. token_prev_char(self);
  173. self->current_token = finsh_token_type_add;
  174. }
  175. break;
  176. case '-':
  177. ch = token_next_char(self);
  178. if ( ch == '-' )
  179. {
  180. self->current_token = finsh_token_type_dec;
  181. }
  182. else
  183. {
  184. token_prev_char(self);
  185. self->current_token = finsh_token_type_sub;
  186. }
  187. break;
  188. case '/':
  189. ch = token_next_char(self);
  190. if (ch == '/')
  191. {
  192. /* line comments, set to end of file */
  193. self->current_token = finsh_token_type_eof;
  194. }
  195. else
  196. {
  197. token_prev_char(self);
  198. self->current_token = finsh_token_type_div;
  199. }
  200. break;
  201. case '<':
  202. ch = token_next_char(self);
  203. if ( ch == '<' )
  204. {
  205. self->current_token = finsh_token_type_shl;
  206. }
  207. else
  208. {
  209. token_prev_char(self);
  210. self->current_token = finsh_token_type_bad;
  211. }
  212. break;
  213. case '>':
  214. ch = token_next_char(self);
  215. if ( ch == '>' )
  216. {
  217. self->current_token = finsh_token_type_shr;
  218. }
  219. else
  220. {
  221. token_prev_char(self);
  222. self->current_token = finsh_token_type_bad;
  223. }
  224. break;
  225. case '|':
  226. self->current_token = finsh_token_type_or;
  227. break;
  228. case '%':
  229. self->current_token = finsh_token_type_mod;
  230. break;
  231. case '~':
  232. self->current_token = finsh_token_type_bitwise;
  233. break;
  234. case '^':
  235. self->current_token = finsh_token_type_xor;
  236. break;
  237. case '=':
  238. self->current_token = finsh_token_type_assign;
  239. break;
  240. case '\'':
  241. self->value.char_value = token_proc_char(self);
  242. self->current_token = finsh_token_type_value_char;
  243. break;
  244. case '"':
  245. token_proc_string(self);
  246. self->current_token = finsh_token_type_value_string;
  247. break;
  248. default:
  249. if ( is_digit(ch) )
  250. {
  251. token_prev_char(self);
  252. token_proc_number(self);
  253. break;
  254. }
  255. finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
  256. self->current_token = finsh_token_type_bad;
  257. break;
  258. }
  259. }
  260. }
  261. static int token_match_name(struct finsh_token* self, const char* str)
  262. {
  263. int i;
  264. for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
  265. {
  266. if ( strcmp(finsh_name_table[i].name, str)==0 )
  267. {
  268. self->current_token = finsh_name_table[i].type;
  269. return 1;
  270. }
  271. }
  272. return 0;
  273. }
  274. static void token_trim_space(struct finsh_token* self)
  275. {
  276. char ch;
  277. #if 0
  278. while ( (ch = token_next_char(self)) ==' ' ||
  279. ch == '\t' ||
  280. ch == '\r' ||
  281. ch == '\n');
  282. #else
  283. while ( (ch = token_next_char(self)) ==' ' ||
  284. ch == '\t');
  285. #endif
  286. token_prev_char(self);
  287. }
  288. static char token_proc_char(struct finsh_token* self)
  289. {
  290. char ch;
  291. char buf[4], *p;
  292. p = buf;
  293. ch = token_next_char(self);
  294. if ( ch == '\\' )
  295. {
  296. ch = token_next_char(self);
  297. switch ( ch )
  298. {
  299. case 'n': ch = '\n'; break;
  300. case 't': ch = '\t'; break;
  301. case 'v': ch = '\v'; break;
  302. case 'b': ch = '\b'; break;
  303. case 'r': ch = '\r'; break;
  304. case '\\': ch = '\\'; break;
  305. case '\'': ch = '\''; break;
  306. default :
  307. while ( is_digit(ch) )/*for '\113' char*/
  308. {
  309. ch = token_next_char(self);
  310. *p++ = ch;
  311. }
  312. token_prev_char(self);
  313. *p = '\0';
  314. ch = atoi(p);
  315. break;
  316. }
  317. }
  318. if ( token_next_char(self) != '\'' )
  319. {
  320. token_prev_char(self);
  321. finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
  322. return ch;
  323. }
  324. return ch;
  325. }
  326. static u_char* token_proc_string(struct finsh_token* self)
  327. {
  328. u_char* p;
  329. for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
  330. {
  331. char ch = token_next_char(self);
  332. if ( is_eof(self) )
  333. {
  334. finsh_error_set(FINSH_ERROR_UNEXPECT_END);
  335. return NULL;;
  336. }
  337. if ( ch == '\\' )
  338. {
  339. ch = token_proc_escape(self);
  340. }
  341. else if ( ch == '"' )/*end of string.*/
  342. {
  343. *p = '\0';
  344. return self->string;
  345. }
  346. *p++ = ch;
  347. }
  348. return NULL;
  349. }
  350. static int token_proc_escape(struct finsh_token* self)
  351. {
  352. char ch;
  353. int result=0;
  354. ch = token_next_char(self);
  355. switch (ch)
  356. {
  357. case 'n':
  358. result = '\n';
  359. break;
  360. case 't':
  361. result = '\t';
  362. break;
  363. case 'v':
  364. result = '\v';
  365. break;
  366. case 'b':
  367. result = '\b';
  368. break;
  369. case 'r':
  370. result = '\r';
  371. break;
  372. case 'f':
  373. result = '\f';
  374. break;
  375. case 'a':
  376. result = '\007';
  377. break;
  378. case 'x':
  379. result = 0;
  380. ch = token_next_char(self);
  381. while ( (ch - '0')<16u )
  382. {
  383. result = result*16 + ch - '0';
  384. ch = token_next_char(self);
  385. }
  386. token_prev_char(self);
  387. break;
  388. default:
  389. if ( (ch - '0') < 8u)
  390. {
  391. result = 0;
  392. while ( (ch - '0') < 8u )
  393. {
  394. result = result*8 + ch - '0';
  395. ch = token_next_char(self);
  396. }
  397. token_prev_char(self);
  398. }
  399. break;
  400. }
  401. return result;
  402. }
  403. /*
  404. (0|0x|0X|0b|0B)number+(l|L)
  405. */
  406. static void token_proc_number(struct finsh_token* self)
  407. {
  408. char ch;
  409. int b;
  410. char *p, buf[128];
  411. long value;
  412. value = 0;
  413. p = buf;
  414. b = 10;
  415. ch = token_next_char(self);
  416. if ( ch == '0' )
  417. {
  418. ch = token_next_char(self);
  419. if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
  420. {
  421. b = 16;
  422. ch = token_next_char(self);
  423. while ( is_digit(ch) || is_alpha(ch) )
  424. {
  425. *p++ = ch;
  426. ch = token_next_char(self);
  427. }
  428. *p = '\0';
  429. }
  430. else if ( ch == 'b' || ch == 'B' )
  431. {
  432. b = 2;
  433. ch = token_next_char(self);
  434. while ( (ch=='0')||(ch=='1') )
  435. {
  436. *p++ = ch;
  437. ch = token_next_char(self);
  438. }
  439. *p = '\0';
  440. }
  441. else
  442. {
  443. b = 8;
  444. while ( is_digit(ch) )
  445. {
  446. *p++ = ch;
  447. ch = token_next_char(self);
  448. }
  449. *p = '\0';
  450. }
  451. self->value.int_value = token_spec_number(buf, strlen(buf), b);
  452. self->current_token = finsh_token_type_value_int;
  453. }
  454. else
  455. {
  456. while ( is_digit(ch) )
  457. {
  458. value = value*10 + ( ch - '0' );
  459. ch = token_next_char(self);
  460. }
  461. self->value.int_value = value;
  462. self->current_token = finsh_token_type_value_int;
  463. }
  464. switch ( ch )
  465. {
  466. case 'l':
  467. case 'L':
  468. self->current_token = finsh_token_type_value_long;
  469. break;
  470. default:
  471. token_prev_char(self);
  472. break;
  473. }
  474. }
  475. /*use 64 bit number*/
  476. #define BN_SIZE 2
  477. static long token_spec_number(char* string, int length, int b)
  478. {
  479. char* p;
  480. int t;
  481. int i, j, shift=1;
  482. unsigned int bn[BN_SIZE], v;
  483. long d;
  484. p = string;
  485. i = 0;
  486. switch ( b )
  487. {
  488. case 16: shift = 4;
  489. break;
  490. case 8: shift = 3;
  491. break;
  492. case 2: shift = 1;
  493. break;
  494. default: break;
  495. }
  496. for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
  497. while ( i<length )
  498. {
  499. t = *p++;
  500. if ( t>='a' && t <='f' )
  501. {
  502. t = t - 'a' +10;
  503. }
  504. else if ( t >='A' && t <='F' )
  505. {
  506. t = t - 'A' +10;
  507. }
  508. else t = t - '0';
  509. for ( j=0; j<BN_SIZE ; j++)
  510. {
  511. v = bn[j];
  512. bn[j] = (v<<shift) | t;
  513. t = v >> (32 - shift);
  514. }
  515. i++;
  516. }
  517. d = (long)bn[0];
  518. return d;
  519. }