finsh_token.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562
  1. #include <finsh.h>
  2. #include "finsh_token.h"
  3. #include "finsh_error.h"
  4. #define is_digit(ch) ((ch) >= '0' && (ch) <= '9')
  5. #define is_separator(ch) !(((ch) >= 'a' && (ch) <= 'z') \
  6. || ((ch) >= 'A' && (ch) <= 'Z') || ((ch) >= '0' && (ch) <= '9') || ((ch) == '_'))
  7. #define is_eof(self) (self)->eof
  8. struct name_table
  9. {
  10. char* name;
  11. enum finsh_token_type type;
  12. };
  13. /* keyword */
  14. static struct name_table finsh_name_table[] =
  15. {
  16. {"void", finsh_token_type_void},
  17. {"char", finsh_token_type_char},
  18. {"short", finsh_token_type_short},
  19. {"int", finsh_token_type_int},
  20. {"long", finsh_token_type_long},
  21. {"unsigned", finsh_token_type_unsigned},
  22. {"NULL", finsh_token_type_value_null},
  23. {"null", finsh_token_type_value_null}
  24. };
  25. static char token_next_char(struct finsh_token* self);
  26. static void token_prev_char(struct finsh_token* self);
  27. static long token_spec_number(char* string, int length, int b);
  28. static void token_run(struct finsh_token* self);
  29. static int token_match_name(struct finsh_token* self, const char* str);
  30. static void token_proc_number(struct finsh_token* self);
  31. static u_char* token_proc_string(struct finsh_token* self);
  32. static void token_trim_space(struct finsh_token* self);
  33. static char token_proc_char(struct finsh_token* self);
  34. static int token_proc_escape(struct finsh_token* self);
  35. void finsh_token_init(struct finsh_token* self, u_char* line)
  36. {
  37. memset(self, 0, sizeof(struct finsh_token));
  38. self->line = line;
  39. }
  40. enum finsh_token_type finsh_token_token(struct finsh_token* self)
  41. {
  42. if ( self->replay ) self->replay = 0;
  43. else token_run(self);
  44. return (enum finsh_token_type)self->current_token;
  45. }
  46. void finsh_token_get_token(struct finsh_token* self, u_char* token)
  47. {
  48. strncpy((char*)token, (char*)self->string, FINSH_NAME_MAX);
  49. }
  50. int token_get_string(struct finsh_token* self, u_char* str)
  51. {
  52. unsigned char *p=str;
  53. char ch;
  54. ch = token_next_char(self);
  55. if (is_eof(self)) return -1;
  56. str[0] = '\0';
  57. if ( is_digit(ch) )/*the first character of identifier is not a digit.*/
  58. {
  59. token_prev_char(self);
  60. return -1;
  61. }
  62. while (!is_separator(ch) && !is_eof(self))
  63. {
  64. *p++ = ch;
  65. ch = token_next_char(self);
  66. }
  67. self->eof = 0;
  68. token_prev_char(self);
  69. *p = '\0';
  70. return 0;
  71. }
  72. /*
  73. get next character.
  74. */
  75. static char token_next_char(struct finsh_token* self)
  76. {
  77. if (self->eof) return '\0';
  78. if (self->position == (int)strlen((char*)self->line) || self->line[self->position] =='\n')
  79. {
  80. self->eof = 1;
  81. self->position = 0;
  82. return '\0';
  83. }
  84. return self->line[self->position++];
  85. }
  86. static void token_prev_char(struct finsh_token* self)
  87. {
  88. if ( self->eof ) return;
  89. if ( self->position == 0 ) return;
  90. else self->position--;
  91. }
  92. static void token_run(struct finsh_token* self)
  93. {
  94. char ch;
  95. token_trim_space(self); /* first trim space and tab. */
  96. token_get_string(self, &(self->string[0]));
  97. if ( is_eof(self) ) /*if it is eof, break;*/
  98. {
  99. self->current_token = finsh_token_type_eof;
  100. return ;
  101. }
  102. if (self->string[0] != '\0') /*It is a key word or a identifier.*/
  103. {
  104. if ( !token_match_name(self, (char*)self->string) )
  105. {
  106. self->current_token = finsh_token_type_identifier;
  107. }
  108. return;
  109. }
  110. else/*It is a operator character.*/
  111. {
  112. ch = token_next_char(self);
  113. switch ( ch )
  114. {
  115. case '(':
  116. self->current_token = finsh_token_type_left_paren;
  117. break;
  118. case ')':
  119. self->current_token = finsh_token_type_right_paren;
  120. break;
  121. case ',':
  122. self->current_token = finsh_token_type_comma;
  123. break;
  124. case ';':
  125. self->current_token = finsh_token_type_semicolon;
  126. break;
  127. case '&':
  128. self->current_token = finsh_token_type_and;
  129. break;
  130. case '*':
  131. self->current_token = finsh_token_type_mul;
  132. break;
  133. case '+':
  134. ch = token_next_char(self);
  135. if ( ch == '+' )
  136. {
  137. self->current_token = finsh_token_type_inc;
  138. }
  139. else
  140. {
  141. token_prev_char(self);
  142. self->current_token = finsh_token_type_add;
  143. }
  144. break;
  145. case '-':
  146. ch = token_next_char(self);
  147. if ( ch == '-' )
  148. {
  149. self->current_token = finsh_token_type_dec;
  150. }
  151. else
  152. {
  153. token_prev_char(self);
  154. self->current_token = finsh_token_type_sub;
  155. }
  156. break;
  157. case '/':
  158. self->current_token = finsh_token_type_div;
  159. break;
  160. case '<':
  161. ch = token_next_char(self);
  162. if ( ch == '<' )
  163. {
  164. self->current_token = finsh_token_type_shl;
  165. }
  166. else
  167. {
  168. token_prev_char(self);
  169. self->current_token = finsh_token_type_bad;
  170. }
  171. break;
  172. case '>':
  173. ch = token_next_char(self);
  174. if ( ch == '>' )
  175. {
  176. self->current_token = finsh_token_type_shr;
  177. }
  178. else
  179. {
  180. token_prev_char(self);
  181. self->current_token = finsh_token_type_bad;
  182. }
  183. break;
  184. case '|':
  185. self->current_token = finsh_token_type_or;
  186. break;
  187. case '%':
  188. self->current_token = finsh_token_type_mod;
  189. break;
  190. case '~':
  191. self->current_token = finsh_token_type_bitwise;
  192. break;
  193. case '^':
  194. self->current_token = finsh_token_type_xor;
  195. break;
  196. case '=':
  197. self->current_token = finsh_token_type_assign;
  198. break;
  199. case '\'':
  200. self->value.char_value = token_proc_char(self);
  201. self->current_token = finsh_token_type_value_char;
  202. break;
  203. case '"':
  204. token_proc_string(self);
  205. self->current_token = finsh_token_type_value_string;
  206. break;
  207. default:
  208. if ( is_digit(ch) )
  209. {
  210. token_prev_char(self);
  211. token_proc_number(self);
  212. break;
  213. }
  214. finsh_error_set(FINSH_ERROR_UNKNOWN_TOKEN);
  215. self->current_token = finsh_token_type_bad;
  216. break;
  217. }
  218. }
  219. }
  220. static int token_match_name(struct finsh_token* self, const char* str)
  221. {
  222. int i;
  223. for (i = 0; i < sizeof(finsh_name_table)/sizeof(struct name_table); i++)
  224. {
  225. if ( strcmp(finsh_name_table[i].name, str)==0 )
  226. {
  227. self->current_token = finsh_name_table[i].type;
  228. return 1;
  229. }
  230. }
  231. return 0;
  232. }
  233. static void token_trim_space(struct finsh_token* self)
  234. {
  235. char ch;
  236. while ( (ch = token_next_char(self)) ==' ' || ch == '\t');
  237. token_prev_char(self);
  238. }
  239. static char token_proc_char(struct finsh_token* self)
  240. {
  241. char ch;
  242. char buf[4], *p;
  243. p = buf;
  244. ch = token_next_char(self);
  245. if ( ch == '\\' )
  246. {
  247. ch = token_next_char(self);
  248. switch ( ch )
  249. {
  250. case 'n': ch = '\n'; break;
  251. case 't': ch = '\t'; break;
  252. case 'v': ch = '\v'; break;
  253. case 'b': ch = '\b'; break;
  254. case 'r': ch = '\r'; break;
  255. case '\\': ch = '\\'; break;
  256. case '\'': ch = '\''; break;
  257. default :
  258. while ( is_digit(ch) )/*for '\113' char*/
  259. {
  260. ch = token_next_char(self);
  261. *p++ = ch;
  262. }
  263. token_prev_char(self);
  264. *p = '\0';
  265. ch = atoi(p);
  266. break;
  267. }
  268. }
  269. if ( token_next_char(self) != '\'' )
  270. {
  271. token_prev_char(self);
  272. finsh_error_set(FINSH_ERROR_EXPECT_CHAR);
  273. return ch;
  274. }
  275. return ch;
  276. }
  277. static u_char* token_proc_string(struct finsh_token* self)
  278. {
  279. u_char* p;
  280. for ( p = &self->string[0]; p - &(self->string[0]) < FINSH_STRING_MAX; )
  281. {
  282. char ch = token_next_char(self);
  283. if ( is_eof(self) )
  284. {
  285. finsh_error_set(FINSH_ERROR_UNEXPECT_END);
  286. return NULL;;
  287. }
  288. if ( ch == '\\' )
  289. {
  290. ch = token_proc_escape(self);
  291. }
  292. else if ( ch == '"' )/*end of string.*/
  293. {
  294. *p = '\0';
  295. return self->string;
  296. }
  297. *p++ = ch;
  298. }
  299. return NULL;
  300. }
  301. static int token_proc_escape(struct finsh_token* self)
  302. {
  303. char ch;
  304. int result=0;
  305. ch = token_next_char(self);
  306. switch (ch)
  307. {
  308. case 'n':
  309. result = '\n';
  310. break;
  311. case 't':
  312. result = '\t';
  313. break;
  314. case 'v':
  315. result = '\v';
  316. break;
  317. case 'b':
  318. result = '\b';
  319. break;
  320. case 'r':
  321. result = '\r';
  322. break;
  323. case 'f':
  324. result = '\f';
  325. break;
  326. case 'a':
  327. result = '\007';
  328. break;
  329. case 'x':
  330. result = 0;
  331. ch = token_next_char(self);
  332. while ( (ch - '0')<16u )
  333. {
  334. result = result*16 + ch - '0';
  335. ch = token_next_char(self);
  336. }
  337. token_prev_char(self);
  338. break;
  339. default:
  340. if ( (ch - '0') < 8u)
  341. {
  342. result = 0;
  343. while ( (ch - '0') < 8u )
  344. {
  345. result = result*8 + ch - '0';
  346. ch = token_next_char(self);
  347. }
  348. token_prev_char(self);
  349. }
  350. break;
  351. }
  352. return result;
  353. }
  354. /*
  355. (0|0x|0X|0b|0B)number+(l|L)
  356. */
  357. static void token_proc_number(struct finsh_token* self)
  358. {
  359. char ch;
  360. int b;
  361. char *p, buf[128];
  362. long value;
  363. value = 0;
  364. p = buf;
  365. b = 10;
  366. ch = token_next_char(self);
  367. if ( ch == '0' )
  368. {
  369. ch = token_next_char(self);
  370. if ( ch == 'x' || ch == 'X' )/*it's a hex number*/
  371. {
  372. b = 16;
  373. ch = token_next_char(self);
  374. while ( is_digit(ch) || isalpha(ch) )
  375. {
  376. *p++ = ch;
  377. ch = token_next_char(self);
  378. }
  379. *p = '\0';
  380. }
  381. else if ( ch == 'b' || ch == 'B' )
  382. {
  383. b = 2;
  384. ch = token_next_char(self);
  385. while ( (ch=='0')||(ch=='1') )
  386. {
  387. *p++ = ch;
  388. ch = token_next_char(self);
  389. }
  390. *p = '\0';
  391. }
  392. else
  393. {
  394. b = 8;
  395. while ( is_digit(ch) )
  396. {
  397. *p++ = ch;
  398. ch = token_next_char(self);
  399. }
  400. *p = '\0';
  401. }
  402. self->value.int_value = token_spec_number(buf, strlen(buf), b);
  403. self->current_token = finsh_token_type_value_int;
  404. }
  405. else
  406. {
  407. while ( is_digit(ch) )
  408. {
  409. value = value*10 + ( ch - '0' );
  410. ch = token_next_char(self);
  411. }
  412. self->value.int_value = value;
  413. self->current_token = finsh_token_type_value_int;
  414. }
  415. switch ( ch )
  416. {
  417. case 'l':
  418. case 'L':
  419. self->current_token = finsh_token_type_value_long;
  420. break;
  421. default:
  422. token_prev_char(self);
  423. break;
  424. }
  425. }
  426. /*use 64 bit number*/
  427. #define BN_SIZE 2
  428. static long token_spec_number(char* string, int length, int b)
  429. {
  430. char* p;
  431. int t;
  432. int i, j, shift=1;
  433. unsigned int bn[BN_SIZE], v;
  434. long d;
  435. p = string;
  436. i = 0;
  437. switch ( b )
  438. {
  439. case 16: shift = 4;
  440. break;
  441. case 8: shift = 3;
  442. break;
  443. case 2: shift = 1;
  444. break;
  445. default: break;
  446. }
  447. for ( j=0; j<BN_SIZE ; j++) bn[j] = 0;
  448. while ( i<length )
  449. {
  450. t = *p++;
  451. if ( t>='a' && t <='f' )
  452. {
  453. t = t - 'a' +10;
  454. }
  455. else if ( t >='A' && t <='F' )
  456. {
  457. t = t - 'A' +10;
  458. }
  459. else t = t - '0';
  460. for ( j=0; j<BN_SIZE ; j++)
  461. {
  462. v = bn[j];
  463. bn[j] = (v<<shift) | t;
  464. t = v >> (32 - shift);
  465. }
  466. i++;
  467. }
  468. d = (long)bn[0];
  469. return d;
  470. }