finsh_token.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617
  1. /*
  2. * token lex for finsh shell.
  3. *
  4. * COPYRIGHT (C) 2006 - 2013, RT-Thread Development Team
  5. *
  6. * This file is part of RT-Thread (http://www.rt-thread.org)
  7. * Maintainer: bernard.xiong <bernard.xiong at gmail.com>
  8. *
  9. * All rights reserved.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License along
  22. * with this program; if not, write to the Free Software Foundation, Inc.,
  23. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  24. *
  25. * Change Logs:
  26. * Date Author Notes
  27. * 2010-03-22 Bernard first version
  28. * 2013-04-03 Bernard strip more characters.
  29. */
  30. #include <finsh.h>
  31. #include <stdlib.h>
  32. #include "finsh_token.h"
  33. #include "finsh_error.h"
  34. #define is_alpha(ch) ((ch | 0x20) - 'a') < 26u
  35. #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
  36. #define is_xdigit(ch) (((ch) >= '0' && (ch) <= '9') || (((ch | 0x20) - 'a') < 6u))
  37. #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
  38. || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
  39. #define is_eof(self) (self)->eof
  40. struct name_table
  41. {
  42. char* name;
  43. enum finsh_token_type type;
  44. };
  45. /* keyword */
  46. static const struct name_table finsh_name_table[] =
  47. {
  48. {"void", finsh_token_type_void},
  49. {"char", finsh_token_type_char},
  50. {"short", finsh_token_type_short},
  51. {"int", finsh_token_type_int},
  52. {"long", finsh_token_type_long},
  53. {"unsigned", finsh_token_type_unsigned},
  54. {"NULL", finsh_token_type_value_null},
  55. {"null", finsh_token_type_value_null}
  56. };
  57. static char token_next_char(struct finsh_token* self);
  58. static void token_prev_char(struct finsh_token* self);
  59. static long token_spec_number(char* string, int length, int b);
  60. static void token_run(struct finsh_token* self);
  61. static int token_match_name(struct finsh_token* self, const char* str);
  62. static void token_proc_number(struct finsh_token* self);
  63. static u_char* token_proc_string(struct finsh_token* self);
  64. static void token_trim_space(struct finsh_token* self);
  65. static char token_proc_char(struct finsh_token* self);
  66. static int token_proc_escape(struct finsh_token* self);
  67. void finsh_token_init(struct finsh_token* self, u_char* line)
  68. {
  69. memset(self, 0, sizeof(struct finsh_token));
  70. self->line = line;
  71. }
  72. enum finsh_token_type finsh_token_token(struct finsh_token* self)
  73. {
  74. if ( self->replay ) self->replay = 0;
  75. else token_run(self);
  76. return (enum finsh_token_type)self->current_token;
  77. }
  78. void finsh_token_get_token(struct finsh_token* self, u_char* token)
  79. {
  80. strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
  81. }
  82. int token_get_string(struct finsh_token* self, u_char* str)
  83. {
  84. unsigned char *p=str;
  85. char ch;
  86. ch = token_next_char(self);
  87. if (is_eof(self)) return -1;
  88. str[0] = '\0';
  89. if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
  90. {
  91. token_prev_char(self);
  92. return -1;
  93. }
  94. while (!is_separator(ch) && !is_eof(self))
  95. {
  96. *p++ = ch;
  97. ch = token_next_char(self);
  98. }
  99. self->eof = 0;
  100. token_prev_char(self);
  101. *p = '\0';
  102. return 0;
  103. }
  104. /*
  105. get next character.
  106. */
  107. static char token_next_char(struct finsh_token* self)
  108. {
  109. if (self->eof) return '\0';
  110. if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
  111. {
  112. self->eof = 1;
  113. self->position = 0;
  114. return '\0';
  115. }
  116. return self->line[self->position++];
  117. }
  118. static void token_prev_char(struct finsh_token* self)
  119. {
  120. if ( self->eof ) return;
  121. if ( self->position == 0 ) return;
  122. else self->position--;
  123. }
  124. static void token_run(struct finsh_token* self)
  125. {
  126. char ch;
  127. token_trim_space(self); /* first trim space and tab. */
  128. token_get_string(self, &(self->string[0]));
  129. if ( is_eof(self) ) /*if it is eof, break;*/
  130. {
  131. self->current_token = finsh_token_type_eof;
  132. return ;
  133. }
  134. if (self->string[0] != '\0') /*It is a key word or a identifier.*/
  135. {
  136. if ( !token_match_name(self, (char*)self->string) )
  137. {
  138. self->current_token = finsh_token_type_identifier;
  139. }
  140. }
  141. else/*It is a operator character.*/
  142. {
  143. ch = token_next_char(self);
  144. switch ( ch )
  145. {
  146. case '(':
  147. self->current_token = finsh_token_type_left_paren;
  148. break;
  149. case ')':
  150. self->current_token = finsh_token_type_right_paren;
  151. break;
  152. case ',':
  153. self->current_token = finsh_token_type_comma;
  154. break;
  155. case ';':
  156. self->current_token = finsh_token_type_semicolon;
  157. break;
  158. case '&':
  159. self->current_token = finsh_token_type_and;
  160. break;
  161. case '*':
  162. self->current_token = finsh_token_type_mul;
  163. break;
  164. case '+':
  165. ch = token_next_char(self);
  166. if ( ch == '+' )
  167. {
  168. self->current_token = finsh_token_type_inc;
  169. }
  170. else
  171. {
  172. token_prev_char(self);
  173. self->current_token = finsh_token_type_add;
  174. }
  175. break;
  176. case '-':
  177. ch = token_next_char(self);
  178. if ( ch == '-' )
  179. {
  180. self->current_token = finsh_token_type_dec;
  181. }
  182. else
  183. {
  184. token_prev_char(self);
  185. self->current_token = finsh_token_type_sub;
  186. }
  187. break;
  188. case '/':
  189. ch = token_next_char(self);
  190. if (ch == '/')
  191. {
  192. /* line comments, set to end of file */
  193. self->current_token = finsh_token_type_eof;
  194. }
  195. else
  196. {
  197. token_prev_char(self);
  198. self->current_token = finsh_token_type_div;
  199. }
  200. break;
  201. case '<':
  202. ch = token_next_char(self);
  203. if ( ch == '<' )
  204. {
  205. self->current_token = finsh_token_type_shl;
  206. }
  207. else
  208. {
  209. token_prev_char(self);
  210. self->current_token = finsh_token_type_bad;
  211. }
  212. break;
  213. case '>':
  214. ch = token_next_char(self);
  215. if ( ch == '>' )
  216. {
  217. self->current_token = finsh_token_type_shr;
  218. }
  219. else
  220. {
  221. token_prev_char(self);
  222. self->current_token = finsh_token_type_bad;
  223. }
  224. break;
  225. case '|':
  226. self->current_token = finsh_token_type_or;
  227. break;
  228. case '%':
  229. self->current_token = finsh_token_type_mod;
  230. break;
  231. case '~':
  232. self->current_token = finsh_token_type_bitwise;
  233. break;
  234. case '^':
  235. self->current_token = finsh_token_type_xor;
  236. break;
  237. case '=':
  238. self->current_token = finsh_token_type_assign;
  239. break;
  240. case '\'':
  241. self->value.char_value = token_proc_char(self);
  242. self->current_token = finsh_token_type_value_char;
  243. break;
  244. case '"':
  245. token_proc_string(self);
  246. self->current_token = finsh_token_type_value_string;
  247. break;
  248. default:
  249. if ( is_digit(ch) )
  250. {
  251. token_prev_char(self);
  252. token_proc_number(self);
  253. break;
  254. }
  255. finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
  256. self->current_token = finsh_token_type_bad;
  257. break;
  258. }
  259. }
  260. }
  261. static int token_match_name(struct finsh_token* self, const char* str)
  262. {
  263. int i;
  264. for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
  265. {
  266. if ( strcmp(finsh_name_table[i].name, str)==0 )
  267. {
  268. self->current_token = finsh_name_table[i].type;
  269. return 1;
  270. }
  271. }
  272. return 0;
  273. }
  274. static void token_trim_space(struct finsh_token* self)
  275. {
  276. char ch;
  277. while ( (ch = token_next_char(self)) ==' ' ||
  278. ch == '\t' ||
  279. ch == '\r');
  280. token_prev_char(self);
  281. }
  282. static char token_proc_char(struct finsh_token* self)
  283. {
  284. char ch;
  285. char buf[4], *p;
  286. p = buf;
  287. ch = token_next_char(self);
  288. if ( ch == '\\' )
  289. {
  290. ch = token_next_char(self);
  291. switch ( ch )
  292. {
  293. case 'n': ch = '\n'; break;
  294. case 't': ch = '\t'; break;
  295. case 'v': ch = '\v'; break;
  296. case 'b': ch = '\b'; break;
  297. case 'r': ch = '\r'; break;
  298. case '\\': ch = '\\'; break;
  299. case '\'': ch = '\''; break;
  300. default :
  301. while ( is_digit(ch) )/*for '\113' char*/
  302. {
  303. ch = token_next_char(self);
  304. *p++ = ch;
  305. }
  306. token_prev_char(self);
  307. *p = '\0';
  308. ch = atoi(p);
  309. break;
  310. }
  311. }
  312. if ( token_next_char(self) != '\'' )
  313. {
  314. token_prev_char(self);
  315. finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
  316. return ch;
  317. }
  318. return ch;
  319. }
  320. static u_char* token_proc_string(struct finsh_token* self)
  321. {
  322. u_char* p;
  323. for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
  324. {
  325. char ch = token_next_char(self);
  326. if ( is_eof(self) )
  327. {
  328. finsh_error_set(FINSH_ERROR_UNEXPECT_END);
  329. return NULL;;
  330. }
  331. if ( ch == '\\' )
  332. {
  333. ch = token_proc_escape(self);
  334. }
  335. else if ( ch == '"' )/*end of string.*/
  336. {
  337. *p = '\0';
  338. return self->string;
  339. }
  340. *p++ = ch;
  341. }
  342. return NULL;
  343. }
  344. static int token_proc_escape(struct finsh_token* self)
  345. {
  346. char ch;
  347. int result=0;
  348. ch = token_next_char(self);
  349. switch (ch)
  350. {
  351. case 'n':
  352. result = '\n';
  353. break;
  354. case 't':
  355. result = '\t';
  356. break;
  357. case 'v':
  358. result = '\v';
  359. break;
  360. case 'b':
  361. result = '\b';
  362. break;
  363. case 'r':
  364. result = '\r';
  365. break;
  366. case 'f':
  367. result = '\f';
  368. break;
  369. case 'a':
  370. result = '\007';
  371. break;
  372. case '"':
  373. result = '"';
  374. break;
  375. case 'x':
  376. case 'X':
  377. result = 0;
  378. ch = token_next_char(self);
  379. while (is_xdigit(ch))
  380. {
  381. result = result * 16 + ((ch < 'A') ? (ch - '0') : (ch | 0x20) - 'a' + 10);
  382. ch = token_next_char(self);
  383. }
  384. token_prev_char(self);
  385. break;
  386. default:
  387. if ( (ch - '0') < 8u)
  388. {
  389. result = 0;
  390. while ( (ch - '0') < 8u )
  391. {
  392. result = result*8 + ch - '0';
  393. ch = token_next_char(self);
  394. }
  395. token_prev_char(self);
  396. }
  397. break;
  398. }
  399. return result;
  400. }
  401. /*
  402. (0|0x|0X|0b|0B)number+(l|L)
  403. */
  404. static void token_proc_number(struct finsh_token* self)
  405. {
  406. char ch;
  407. char *p, buf[128];
  408. long value;
  409. value = 0;
  410. p = buf;
  411. ch = token_next_char(self);
  412. if ( ch == '0' )
  413. {
  414. int b;
  415. ch = token_next_char(self);
  416. if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
  417. {
  418. b = 16;
  419. ch = token_next_char(self);
  420. while ( is_digit(ch) || is_alpha(ch) )
  421. {
  422. *p++ = ch;
  423. ch = token_next_char(self);
  424. }
  425. *p = '\0';
  426. }
  427. else if ( ch == 'b' || ch == 'B' )
  428. {
  429. b = 2;
  430. ch = token_next_char(self);
  431. while ( (ch=='0')||(ch=='1') )
  432. {
  433. *p++ = ch;
  434. ch = token_next_char(self);
  435. }
  436. *p = '\0';
  437. }
  438. else if ( '0' <= ch && ch <= '7' )
  439. {
  440. b = 8;
  441. while ( '0' <= ch && ch <= '7' )
  442. {
  443. *p++ = ch;
  444. ch = token_next_char(self);
  445. }
  446. *p = '\0';
  447. }
  448. else
  449. {
  450. token_prev_char(self);
  451. /* made as 0 value */
  452. self->value.int_value = 0;
  453. self->current_token = finsh_token_type_value_int;
  454. return;
  455. }
  456. self->value.int_value = token_spec_number(buf, strlen(buf), b);
  457. self->current_token = finsh_token_type_value_int;
  458. }
  459. else
  460. {
  461. while ( is_digit(ch) )
  462. {
  463. value = value*10 + ( ch - '0' );
  464. ch = token_next_char(self);
  465. }
  466. self->value.int_value = value;
  467. self->current_token = finsh_token_type_value_int;
  468. }
  469. switch ( ch )
  470. {
  471. case 'l':
  472. case 'L':
  473. self->current_token = finsh_token_type_value_long;
  474. break;
  475. default:
  476. token_prev_char(self);
  477. break;
  478. }
  479. }
  480. /*use 64 bit number*/
  481. #define BN_SIZE 2
  482. static long token_spec_number(char* string, int length, int b)
  483. {
  484. char* p;
  485. int t;
  486. int i, j, shift=1;
  487. unsigned int bn[BN_SIZE], v;
  488. long d;
  489. p = string;
  490. i = 0;
  491. switch ( b )
  492. {
  493. case 16: shift = 4;
  494. break;
  495. case 8: shift = 3;
  496. break;
  497. case 2: shift = 1;
  498. break;
  499. default: break;
  500. }
  501. for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
  502. while ( i<length )
  503. {
  504. t = *p++;
  505. if ( t>='a' && t <='f' )
  506. {
  507. t = t - 'a' +10;
  508. }
  509. else if ( t >='A' && t <='F' )
  510. {
  511. t = t - 'A' +10;
  512. }
  513. else t = t - '0';
  514. for ( j=0; j<BN_SIZE ; j++)
  515. {
  516. v = bn[j];
  517. bn[j] = (v<<shift) | t;
  518. t = v >> (32 - shift);
  519. }
  520. i++;
  521. }
  522. d = (long)bn[0];
  523. return d;
  524. }