finsh_token.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. /*
  2. * token lex for finsh shell.
  3. *
  4. * COPYRIGHT (C) 2006 - 2013, RT-Thread Development Team
  5. *
  6. * This file is part of RT-Thread (http://www.rt-thread.org)
  7. * Maintainer: bernard.xiong <bernard.xiong at gmail.com>
  8. *
  9. * All rights reserved.
  10. *
  11. * This program is free software; you can redistribute it and/or modify
  12. * it under the terms of the GNU General Public License as published by
  13. * the Free Software Foundation; either version 2 of the License, or
  14. * (at your option) any later version.
  15. *
  16. * This program is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  19. * GNU General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU General Public License along
  22. * with this program; if not, write to the Free Software Foundation, Inc.,
  23. * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  24. *
  25. * Change Logs:
  26. * Date Author Notes
  27. * 2010-03-22 Bernard first version
  28. * 2013-04-03 Bernard strip more characters.
  29. */
  30. #include <finsh.h>
  31. #include <stdlib.h>
  32. #include "finsh_token.h"
  33. #include "finsh_error.h"
  34. #define is_alpha(ch) ((ch | 0x20) - 'a') < 26u
  35. #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
  36. #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
  37. || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
  38. #define is_eof(self) (self)->eof
  39. struct name_table
  40. {
  41. char* name;
  42. enum finsh_token_type type;
  43. };
  44. /* keyword */
  45. static const struct name_table finsh_name_table[] =
  46. {
  47. {"void", finsh_token_type_void},
  48. {"char", finsh_token_type_char},
  49. {"short", finsh_token_type_short},
  50. {"int", finsh_token_type_int},
  51. {"long", finsh_token_type_long},
  52. {"unsigned", finsh_token_type_unsigned},
  53. {"NULL", finsh_token_type_value_null},
  54. {"null", finsh_token_type_value_null}
  55. };
  56. static char token_next_char(struct finsh_token* self);
  57. static void token_prev_char(struct finsh_token* self);
  58. static long token_spec_number(char* string, int length, int b);
  59. static void token_run(struct finsh_token* self);
  60. static int token_match_name(struct finsh_token* self, const char* str);
  61. static void token_proc_number(struct finsh_token* self);
  62. static u_char* token_proc_string(struct finsh_token* self);
  63. static void token_trim_space(struct finsh_token* self);
  64. static char token_proc_char(struct finsh_token* self);
  65. static int token_proc_escape(struct finsh_token* self);
  66. void finsh_token_init(struct finsh_token* self, u_char* line)
  67. {
  68. memset(self, 0, sizeof(struct finsh_token));
  69. self->line = line;
  70. }
  71. enum finsh_token_type finsh_token_token(struct finsh_token* self)
  72. {
  73. if ( self->replay ) self->replay = 0;
  74. else token_run(self);
  75. return (enum finsh_token_type)self->current_token;
  76. }
  77. void finsh_token_get_token(struct finsh_token* self, u_char* token)
  78. {
  79. strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
  80. }
  81. int token_get_string(struct finsh_token* self, u_char* str)
  82. {
  83. unsigned char *p=str;
  84. char ch;
  85. ch = token_next_char(self);
  86. if (is_eof(self)) return -1;
  87. str[0] = '\0';
  88. if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
  89. {
  90. token_prev_char(self);
  91. return -1;
  92. }
  93. while (!is_separator(ch) && !is_eof(self))
  94. {
  95. *p++ = ch;
  96. ch = token_next_char(self);
  97. }
  98. self->eof = 0;
  99. token_prev_char(self);
  100. *p = '\0';
  101. return 0;
  102. }
  103. /*
  104. get next character.
  105. */
  106. static char token_next_char(struct finsh_token* self)
  107. {
  108. if (self->eof) return '\0';
  109. if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
  110. {
  111. self->eof = 1;
  112. self->position = 0;
  113. return '\0';
  114. }
  115. return self->line[self->position++];
  116. }
  117. static void token_prev_char(struct finsh_token* self)
  118. {
  119. if ( self->eof ) return;
  120. if ( self->position == 0 ) return;
  121. else self->position--;
  122. }
  123. static void token_run(struct finsh_token* self)
  124. {
  125. char ch;
  126. token_trim_space(self); /* first trim space and tab. */
  127. token_get_string(self, &(self->string[0]));
  128. if ( is_eof(self) ) /*if it is eof, break;*/
  129. {
  130. self->current_token = finsh_token_type_eof;
  131. return ;
  132. }
  133. if (self->string[0] != '\0') /*It is a key word or a identifier.*/
  134. {
  135. if ( !token_match_name(self, (char*)self->string) )
  136. {
  137. self->current_token = finsh_token_type_identifier;
  138. }
  139. }
  140. else/*It is a operator character.*/
  141. {
  142. ch = token_next_char(self);
  143. switch ( ch )
  144. {
  145. case '(':
  146. self->current_token = finsh_token_type_left_paren;
  147. break;
  148. case ')':
  149. self->current_token = finsh_token_type_right_paren;
  150. break;
  151. case ',':
  152. self->current_token = finsh_token_type_comma;
  153. break;
  154. case ';':
  155. self->current_token = finsh_token_type_semicolon;
  156. break;
  157. case '&':
  158. self->current_token = finsh_token_type_and;
  159. break;
  160. case '*':
  161. self->current_token = finsh_token_type_mul;
  162. break;
  163. case '+':
  164. ch = token_next_char(self);
  165. if ( ch == '+' )
  166. {
  167. self->current_token = finsh_token_type_inc;
  168. }
  169. else
  170. {
  171. token_prev_char(self);
  172. self->current_token = finsh_token_type_add;
  173. }
  174. break;
  175. case '-':
  176. ch = token_next_char(self);
  177. if ( ch == '-' )
  178. {
  179. self->current_token = finsh_token_type_dec;
  180. }
  181. else
  182. {
  183. token_prev_char(self);
  184. self->current_token = finsh_token_type_sub;
  185. }
  186. break;
  187. case '/':
  188. ch = token_next_char(self);
  189. if (ch == '/')
  190. {
  191. /* line comments, set to end of file */
  192. self->current_token = finsh_token_type_eof;
  193. }
  194. else
  195. {
  196. token_prev_char(self);
  197. self->current_token = finsh_token_type_div;
  198. }
  199. break;
  200. case '<':
  201. ch = token_next_char(self);
  202. if ( ch == '<' )
  203. {
  204. self->current_token = finsh_token_type_shl;
  205. }
  206. else
  207. {
  208. token_prev_char(self);
  209. self->current_token = finsh_token_type_bad;
  210. }
  211. break;
  212. case '>':
  213. ch = token_next_char(self);
  214. if ( ch == '>' )
  215. {
  216. self->current_token = finsh_token_type_shr;
  217. }
  218. else
  219. {
  220. token_prev_char(self);
  221. self->current_token = finsh_token_type_bad;
  222. }
  223. break;
  224. case '|':
  225. self->current_token = finsh_token_type_or;
  226. break;
  227. case '%':
  228. self->current_token = finsh_token_type_mod;
  229. break;
  230. case '~':
  231. self->current_token = finsh_token_type_bitwise;
  232. break;
  233. case '^':
  234. self->current_token = finsh_token_type_xor;
  235. break;
  236. case '=':
  237. self->current_token = finsh_token_type_assign;
  238. break;
  239. case '\'':
  240. self->value.char_value = token_proc_char(self);
  241. self->current_token = finsh_token_type_value_char;
  242. break;
  243. case '"':
  244. token_proc_string(self);
  245. self->current_token = finsh_token_type_value_string;
  246. break;
  247. default:
  248. if ( is_digit(ch) )
  249. {
  250. token_prev_char(self);
  251. token_proc_number(self);
  252. break;
  253. }
  254. finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
  255. self->current_token = finsh_token_type_bad;
  256. break;
  257. }
  258. }
  259. }
  260. static int token_match_name(struct finsh_token* self, const char* str)
  261. {
  262. int i;
  263. for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
  264. {
  265. if ( strcmp(finsh_name_table[i].name, str)==0 )
  266. {
  267. self->current_token = finsh_name_table[i].type;
  268. return 1;
  269. }
  270. }
  271. return 0;
  272. }
  273. static void token_trim_space(struct finsh_token* self)
  274. {
  275. char ch;
  276. while ( (ch = token_next_char(self)) ==' ' ||
  277. ch == '\t' ||
  278. ch == '\r');
  279. token_prev_char(self);
  280. }
  281. static char token_proc_char(struct finsh_token* self)
  282. {
  283. char ch;
  284. char buf[4], *p;
  285. p = buf;
  286. ch = token_next_char(self);
  287. if ( ch == '\\' )
  288. {
  289. ch = token_next_char(self);
  290. switch ( ch )
  291. {
  292. case 'n': ch = '\n'; break;
  293. case 't': ch = '\t'; break;
  294. case 'v': ch = '\v'; break;
  295. case 'b': ch = '\b'; break;
  296. case 'r': ch = '\r'; break;
  297. case '\\': ch = '\\'; break;
  298. case '\'': ch = '\''; break;
  299. default :
  300. while ( is_digit(ch) )/*for '\113' char*/
  301. {
  302. ch = token_next_char(self);
  303. *p++ = ch;
  304. }
  305. token_prev_char(self);
  306. *p = '\0';
  307. ch = atoi(p);
  308. break;
  309. }
  310. }
  311. if ( token_next_char(self) != '\'' )
  312. {
  313. token_prev_char(self);
  314. finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
  315. return ch;
  316. }
  317. return ch;
  318. }
  319. static u_char* token_proc_string(struct finsh_token* self)
  320. {
  321. u_char* p;
  322. for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
  323. {
  324. char ch = token_next_char(self);
  325. if ( is_eof(self) )
  326. {
  327. finsh_error_set(FINSH_ERROR_UNEXPECT_END);
  328. return NULL;;
  329. }
  330. if ( ch == '\\' )
  331. {
  332. ch = token_proc_escape(self);
  333. }
  334. else if ( ch == '"' )/*end of string.*/
  335. {
  336. *p = '\0';
  337. return self->string;
  338. }
  339. *p++ = ch;
  340. }
  341. return NULL;
  342. }
  343. static int token_proc_escape(struct finsh_token* self)
  344. {
  345. char ch;
  346. int result=0;
  347. ch = token_next_char(self);
  348. switch (ch)
  349. {
  350. case 'n':
  351. result = '\n';
  352. break;
  353. case 't':
  354. result = '\t';
  355. break;
  356. case 'v':
  357. result = '\v';
  358. break;
  359. case 'b':
  360. result = '\b';
  361. break;
  362. case 'r':
  363. result = '\r';
  364. break;
  365. case 'f':
  366. result = '\f';
  367. break;
  368. case 'a':
  369. result = '\007';
  370. break;
  371. case 'x':
  372. result = 0;
  373. ch = token_next_char(self);
  374. while ( (ch - '0')<16u )
  375. {
  376. result = result*16 + ch - '0';
  377. ch = token_next_char(self);
  378. }
  379. token_prev_char(self);
  380. break;
  381. default:
  382. if ( (ch - '0') < 8u)
  383. {
  384. result = 0;
  385. while ( (ch - '0') < 8u )
  386. {
  387. result = result*8 + ch - '0';
  388. ch = token_next_char(self);
  389. }
  390. token_prev_char(self);
  391. }
  392. break;
  393. }
  394. return result;
  395. }
  396. /*
  397. (0|0x|0X|0b|0B)number+(l|L)
  398. */
  399. static void token_proc_number(struct finsh_token* self)
  400. {
  401. char ch;
  402. char *p, buf[128];
  403. long value;
  404. value = 0;
  405. p = buf;
  406. ch = token_next_char(self);
  407. if ( ch == '0' )
  408. {
  409. int b;
  410. ch = token_next_char(self);
  411. if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
  412. {
  413. b = 16;
  414. ch = token_next_char(self);
  415. while ( is_digit(ch) || is_alpha(ch) )
  416. {
  417. *p++ = ch;
  418. ch = token_next_char(self);
  419. }
  420. *p = '\0';
  421. }
  422. else if ( ch == 'b' || ch == 'B' )
  423. {
  424. b = 2;
  425. ch = token_next_char(self);
  426. while ( (ch=='0')||(ch=='1') )
  427. {
  428. *p++ = ch;
  429. ch = token_next_char(self);
  430. }
  431. *p = '\0';
  432. }
  433. else if ( '0' <= ch && ch <= '7' )
  434. {
  435. b = 8;
  436. while ( '0' <= ch && ch <= '7' )
  437. {
  438. *p++ = ch;
  439. ch = token_next_char(self);
  440. }
  441. *p = '\0';
  442. }
  443. else
  444. {
  445. token_prev_char(self);
  446. /* made as 0 value */
  447. self->value.int_value = 0;
  448. self->current_token = finsh_token_type_value_int;
  449. return;
  450. }
  451. self->value.int_value = token_spec_number(buf, strlen(buf), b);
  452. self->current_token = finsh_token_type_value_int;
  453. }
  454. else
  455. {
  456. while ( is_digit(ch) )
  457. {
  458. value = value*10 + ( ch - '0' );
  459. ch = token_next_char(self);
  460. }
  461. self->value.int_value = value;
  462. self->current_token = finsh_token_type_value_int;
  463. }
  464. switch ( ch )
  465. {
  466. case 'l':
  467. case 'L':
  468. self->current_token = finsh_token_type_value_long;
  469. break;
  470. default:
  471. token_prev_char(self);
  472. break;
  473. }
  474. }
  475. /*use 64 bit number*/
  476. #define BN_SIZE 2
  477. static long token_spec_number(char* string, int length, int b)
  478. {
  479. char* p;
  480. int t;
  481. int i, j, shift=1;
  482. unsigned int bn[BN_SIZE], v;
  483. long d;
  484. p = string;
  485. i = 0;
  486. switch ( b )
  487. {
  488. case 16: shift = 4;
  489. break;
  490. case 8: shift = 3;
  491. break;
  492. case 2: shift = 1;
  493. break;
  494. default: break;
  495. }
  496. for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
  497. while ( i<length )
  498. {
  499. t = *p++;
  500. if ( t>='a' && t <='f' )
  501. {
  502. t = t - 'a' +10;
  503. }
  504. else if ( t >='A' && t <='F' )
  505. {
  506. t = t - 'A' +10;
  507. }
  508. else t = t - '0';
  509. for ( j=0; j<BN_SIZE ; j++)
  510. {
  511. v = bn[j];
  512. bn[j] = (v<<shift) | t;
  513. t = v >> (32 - shift);
  514. }
  515. i++;
  516. }
  517. d = (long)bn[0];
  518. return d;
  519. }