?login_element?

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download

  1. /*
  2. ** $Id: llex.c $
  3. ** Lexical Analyzer
  4. ** See Copyright Notice in lua.h
  5. */
  6.  
  7. #define llex_c
  8. #define LUA_CORE
  9.  
  10. #include "lprefix.h"
  11.  
  12.  
  13. #include <locale.h>
  14. #include <string.h>
  15.  
  16. #include "lua.h"
  17.  
  18. #include "lctype.h"
  19. #include "ldebug.h"
  20. #include "ldo.h"
  21. #include "lgc.h"
  22. #include "llex.h"
  23. #include "lobject.h"
  24. #include "lparser.h"
  25. #include "lstate.h"
  26. #include "lstring.h"
  27. #include "ltable.h"
  28. #include "lzio.h"
  29.  
  30.  
  31.  
  32. #define next(ls)        (ls->current = zgetc(ls->z))
  33.  
  34.  
  35.  
  36. #define currIsNewline(ls)       (ls->current == '\n' || ls->current == '\r')
  37.  
  38.  
  39. /* ORDER RESERVED */
  40. static const char *const luaX_tokens [] = {
  41.     "and", "break", "do", "else", "elseif",
  42.     "end", "false", "for", "function", "goto", "if",
  43.     "in", "local", "nil", "not", "or", "repeat",
  44.     "return", "then", "true", "until", "while",
  45.     "//", "..", "...", "==", ">=", "<=", "~=",
  46.     "<<", ">>", "::", "<eof>",
  47.     "<number>", "<integer>", "<name>", "<string>"
  48. };
  49.  
  50.  
  51. #define save_and_next(ls) (save(ls, ls->current), next(ls))
  52.  
  53.  
  54. static l_noret lexerror (LexState *ls, const char *msg, int token);
  55.  
  56.  
  57. static void save (LexState *ls, int c) {
  58.   Mbuffer *b = ls->buff;
  59.   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
  60.     size_t newsize;
  61.     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
  62.       lexerror(ls, "lexical element too long", 0);
  63.     newsize = luaZ_sizebuffer(b) * 2;
  64.     luaZ_resizebuffer(ls->L, b, newsize);
  65.   }
  66.   b->buffer[luaZ_bufflen(b)++] = cast_char(c);
  67. }
  68.  
  69.  
  70. void luaX_init (lua_State *L) {
  71.   int i;
  72.   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
  73.   luaC_fix(L, obj2gco(e));  /* never collect this name */
  74.   for (i=0; i<NUM_RESERVED; i++) {
  75.     TString *ts = luaS_new(L, luaX_tokens[i]);
  76.     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
  77.     ts->extra = cast_byte(i+1);  /* reserved word */
  78.   }
  79. }
  80.  
  81.  
  82. const char *luaX_token2str (LexState *ls, int token) {
  83.   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
  84.     if (lisprint(token))
  85.       return luaO_pushfstring(ls->L, "'%c'", token);
  86.     else  /* control character */
  87.       return luaO_pushfstring(ls->L, "'<\\%d>'", token);
  88.   }
  89.   else {
  90.     const char *s = luaX_tokens[token - FIRST_RESERVED];
  91.     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
  92.       return luaO_pushfstring(ls->L, "'%s'", s);
  93.     else  /* names, strings, and numerals */
  94.       return s;
  95.   }
  96. }
  97.  
  98.  
  99. static const char *txtToken (LexState *ls, int token) {
  100.   switch (token) {
  101.     case TK_NAME: case TK_STRING:
  102.     case TK_FLT: case TK_INT:
  103.       save(ls, '\0');
  104.       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
  105.     default:
  106.       return luaX_token2str(ls, token);
  107.   }
  108. }
  109.  
  110.  
  111. static l_noret lexerror (LexState *ls, const char *msg, int token) {
  112.   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
  113.   if (token)
  114.     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
  115.   luaD_throw(ls->L, LUA_ERRSYNTAX);
  116. }
  117.  
  118.  
  119. l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
  120.   lexerror(ls, msg, ls->t.token);
  121. }
  122.  
  123.  
  124. /*
  125. ** Creates a new string and anchors it in scanner's table so that it
  126. ** will not be collected until the end of the compilation; by that time
  127. ** it should be anchored somewhere. It also internalizes long strings,
  128. ** ensuring there is only one copy of each unique string.  The table
  129. ** here is used as a set: the string enters as the key, while its value
  130. ** is irrelevant. We use the string itself as the value only because it
  131. ** is a TValue readly available. Later, the code generation can change
  132. ** this value.
  133. */
  134. TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
  135.   lua_State *L = ls->L;
  136.   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
  137.   const TValue *o = luaH_getstr(ls->h, ts);
  138.   if (!ttisnil(o))  /* string already present? */
  139.     ts = keystrval(nodefromval(o));  /* get saved copy */
  140.   else {  /* not in use yet */
  141.     TValue *stv = s2v(L->top++);  /* reserve stack space for string */
  142.     setsvalue(L, stv, ts);  /* temporarily anchor the string */
  143.     luaH_finishset(L, ls->h, stv, o, stv);  /* t[string] = string */
  144.     /* table is not a metatable, so it does not need to invalidate cache */
  145.     luaC_checkGC(L);
  146.     L->top--;  /* remove string from stack */
  147.   }
  148.   return ts;
  149. }
  150.  
  151.  
  152. /*
  153. ** increment line number and skips newline sequence (any of
  154. ** \n, \r, \n\r, or \r\n)
  155. */
  156. static void inclinenumber (LexState *ls) {
  157.   int old = ls->current;
  158.   lua_assert(currIsNewline(ls));
  159.   next(ls);  /* skip '\n' or '\r' */
  160.   if (currIsNewline(ls) && ls->current != old)
  161.     next(ls);  /* skip '\n\r' or '\r\n' */
  162.   if (++ls->linenumber >= MAX_INT)
  163.     lexerror(ls, "chunk has too many lines", 0);
  164. }
  165.  
  166.  
  167. void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
  168.                     int firstchar) {
  169.   ls->t.token = 0;
  170.   ls->L = L;
  171.   ls->current = firstchar;
  172.   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
  173.   ls->z = z;
  174.   ls->fs = NULL;
  175.   ls->linenumber = 1;
  176.   ls->lastline = 1;
  177.   ls->source = source;
  178.   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
  179.   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
  180. }
  181.  
  182.  
  183.  
  184. /*
  185. ** =======================================================
  186. ** LEXICAL ANALYZER
  187. ** =======================================================
  188. */
  189.  
  190.  
  191. static int check_next1 (LexState *ls, int c) {
  192.   if (ls->current == c) {
  193.     next(ls);
  194.     return 1;
  195.   }
  196.   else return 0;
  197. }
  198.  
  199.  
  200. /*
  201. ** Check whether current char is in set 'set' (with two chars) and
  202. ** saves it
  203. */
  204. static int check_next2 (LexState *ls, const char *set) {
  205.   lua_assert(set[2] == '\0');
  206.   if (ls->current == set[0] || ls->current == set[1]) {
  207.     save_and_next(ls);
  208.     return 1;
  209.   }
  210.   else return 0;
  211. }
  212.  
  213.  
  214. /* LUA_NUMBER */
  215. /*
  216. ** This function is quite liberal in what it accepts, as 'luaO_str2num'
  217. ** will reject ill-formed numerals. Roughly, it accepts the following
  218. ** pattern:
  219. **
  220. **   %d(%x|%.|([Ee][+-]?))* | 0[Xx](%x|%.|([Pp][+-]?))*
  221. **
  222. ** The only tricky part is to accept [+-] only after a valid exponent
  223. ** mark, to avoid reading '3-4' or '0xe+1' as a single number.
  224. **
  225. ** The caller might have already read an initial dot.
  226. */
  227. static int read_numeral (LexState *ls, SemInfo *seminfo) {
  228.   TValue obj;
  229.   const char *expo = "Ee";
  230.   int first = ls->current;
  231.   lua_assert(lisdigit(ls->current));
  232.   save_and_next(ls);
  233.   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
  234.     expo = "Pp";
  235.   for (;;) {
  236.     if (check_next2(ls, expo))  /* exponent mark? */
  237.       check_next2(ls, "-+");  /* optional exponent sign */
  238.     else if (lisxdigit(ls->current) || ls->current == '.')  /* '%x|%.' */
  239.       save_and_next(ls);
  240.     else break;
  241.   }
  242.   if (lislalpha(ls->current))  /* is numeral touching a letter? */
  243.     save_and_next(ls);  /* force an error */
  244.   save(ls, '\0');
  245.   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
  246.     lexerror(ls, "malformed number", TK_FLT);
  247.   if (ttisinteger(&obj)) {
  248.     seminfo->i = ivalue(&obj);
  249.     return TK_INT;
  250.   }
  251.   else {
  252.     lua_assert(ttisfloat(&obj));
  253.     seminfo->r = fltvalue(&obj);
  254.     return TK_FLT;
  255.   }
  256. }
  257.  
  258.  
  259. /*
  260. ** read a sequence '[=*[' or ']=*]', leaving the last bracket. If
  261. ** sequence is well formed, return its number of '='s + 2; otherwise,
  262. ** return 1 if it is a single bracket (no '='s and no 2nd bracket);
  263. ** otherwise (an unfinished '[==...') return 0.
  264. */
  265. static size_t skip_sep (LexState *ls) {
  266.   size_t count = 0;
  267.   int s = ls->current;
  268.   lua_assert(s == '[' || s == ']');
  269.   save_and_next(ls);
  270.   while (ls->current == '=') {
  271.     save_and_next(ls);
  272.     count++;
  273.   }
  274.   return (ls->current == s) ? count + 2
  275.          : (count == 0) ? 1
  276.          : 0;
  277. }
  278.  
  279.  
  280. static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
  281.   int line = ls->linenumber;  /* initial line (for error message) */
  282.   save_and_next(ls);  /* skip 2nd '[' */
  283.   if (currIsNewline(ls))  /* string starts with a newline? */
  284.     inclinenumber(ls);  /* skip it */
  285.   for (;;) {
  286.     switch (ls->current) {
  287.       case EOZ: {  /* error */
  288.         const char *what = (seminfo ? "string" : "comment");
  289.         const char *msg = luaO_pushfstring(ls->L,
  290.                      "unfinished long %s (starting at line %d)", what, line);
  291.         lexerror(ls, msg, TK_EOS);
  292.         break;  /* to avoid warnings */
  293.       }
  294.       case ']': {
  295.         if (skip_sep(ls) == sep) {
  296.           save_and_next(ls);  /* skip 2nd ']' */
  297.           goto endloop;
  298.         }
  299.         break;
  300.       }
  301.       case '\n': case '\r': {
  302.         save(ls, '\n');
  303.         inclinenumber(ls);
  304.         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
  305.         break;
  306.       }
  307.       default: {
  308.         if (seminfo) save_and_next(ls);
  309.         else next(ls);
  310.       }
  311.     }
  312.   } endloop:
  313.   if (seminfo)
  314.     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
  315.                                      luaZ_bufflen(ls->buff) - 2 * sep);
  316. }
  317.  
  318.  
  319. static void esccheck (LexState *ls, int c, const char *msg) {
  320.   if (!c) {
  321.     if (ls->current != EOZ)
  322.       save_and_next(ls);  /* add current to buffer for error message */
  323.     lexerror(ls, msg, TK_STRING);
  324.   }
  325. }
  326.  
  327.  
  328. static int gethexa (LexState *ls) {
  329.   save_and_next(ls);
  330.   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
  331.   return luaO_hexavalue(ls->current);
  332. }
  333.  
  334.  
  335. static int readhexaesc (LexState *ls) {
  336.   int r = gethexa(ls);
  337.   r = (r << 4) + gethexa(ls);
  338.   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
  339.   return r;
  340. }
  341.  
  342.  
  343. static unsigned long readutf8esc (LexState *ls) {
  344.   unsigned long r;
  345.   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
  346.   save_and_next(ls);  /* skip 'u' */
  347.   esccheck(ls, ls->current == '{', "missing '{'");
  348.   r = gethexa(ls);  /* must have at least one digit */
  349.   while (cast_void(save_and_next(ls)), lisxdigit(ls->current)) {
  350.     i++;
  351.     esccheck(ls, r <= (0x7FFFFFFFu >> 4), "UTF-8 value too large");
  352.     r = (r << 4) + luaO_hexavalue(ls->current);
  353.   }
  354.   esccheck(ls, ls->current == '}', "missing '}'");
  355.   next(ls);  /* skip '}' */
  356.   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
  357.   return r;
  358. }
  359.  
  360.  
  361. static void utf8esc (LexState *ls) {
  362.   char buff[UTF8BUFFSZ];
  363.   int n = luaO_utf8esc(buff, readutf8esc(ls));
  364.   for (; n > 0; n--)  /* add 'buff' to string */
  365.     save(ls, buff[UTF8BUFFSZ - n]);
  366. }
  367.  
  368.  
  369. static int readdecesc (LexState *ls) {
  370.   int i;
  371.   int r = 0;  /* result accumulator */
  372.   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
  373.     r = 10*r + ls->current - '0';
  374.     save_and_next(ls);
  375.   }
  376.   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
  377.   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
  378.   return r;
  379. }
  380.  
  381.  
  382. static void read_string (LexState *ls, int del, SemInfo *seminfo) {
  383.   save_and_next(ls);  /* keep delimiter (for error messages) */
  384.   while (ls->current != del) {
  385.     switch (ls->current) {
  386.       case EOZ:
  387.         lexerror(ls, "unfinished string", TK_EOS);
  388.         break;  /* to avoid warnings */
  389.       case '\n':
  390.       case '\r':
  391.         lexerror(ls, "unfinished string", TK_STRING);
  392.         break;  /* to avoid warnings */
  393.       case '\\': {  /* escape sequences */
  394.         int c;  /* final character to be saved */
  395.         save_and_next(ls);  /* keep '\\' for error messages */
  396.         switch (ls->current) {
  397.           case 'a': c = '\a'; goto read_save;
  398.           case 'b': c = '\b'; goto read_save;
  399.           case 'f': c = '\f'; goto read_save;
  400.           case 'n': c = '\n'; goto read_save;
  401.           case 'r': c = '\r'; goto read_save;
  402.           case 't': c = '\t'; goto read_save;
  403.           case 'v': c = '\v'; goto read_save;
  404.           case 'x': c = readhexaesc(ls); goto read_save;
  405.           case 'u': utf8esc(ls);  goto no_save;
  406.           case '\n': case '\r':
  407.             inclinenumber(ls); c = '\n'; goto only_save;
  408.           case '\\': case '\"': case '\'':
  409.             c = ls->current; goto read_save;
  410.           case EOZ: goto no_save;  /* will raise an error next loop */
  411.           case 'z': {  /* zap following span of spaces */
  412.             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
  413.             next(ls);  /* skip the 'z' */
  414.             while (lisspace(ls->current)) {
  415.               if (currIsNewline(ls)) inclinenumber(ls);
  416.               else next(ls);
  417.             }
  418.             goto no_save;
  419.           }
  420.           default: {
  421.             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
  422.             c = readdecesc(ls);  /* digital escape '\ddd' */
  423.             goto only_save;
  424.           }
  425.         }
  426.        read_save:
  427.          next(ls);
  428.          /* go through */
  429.        only_save:
  430.          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
  431.          save(ls, c);
  432.          /* go through */
  433.        no_save: break;
  434.       }
  435.       default:
  436.         save_and_next(ls);
  437.     }
  438.   }
  439.   save_and_next(ls);  /* skip delimiter */
  440.   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
  441.                                    luaZ_bufflen(ls->buff) - 2);
  442. }
  443.  
  444.  
  445. static int llex (LexState *ls, SemInfo *seminfo) {
  446.   luaZ_resetbuffer(ls->buff);
  447.   for (;;) {
  448.     switch (ls->current) {
  449.       case '\n': case '\r': {  /* line breaks */
  450.         inclinenumber(ls);
  451.         break;
  452.       }
  453.       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
  454.         next(ls);
  455.         break;
  456.       }
  457.       case '-': {  /* '-' or '--' (comment) */
  458.         next(ls);
  459.         if (ls->current != '-') return '-';
  460.         /* else is a comment */
  461.         next(ls);
  462.         if (ls->current == '[') {  /* long comment? */
  463.           size_t sep = skip_sep(ls);
  464.           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
  465.           if (sep >= 2) {
  466.             read_long_string(ls, NULL, sep);  /* skip long comment */
  467.             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
  468.             break;
  469.           }
  470.         }
  471.         /* else short comment */
  472.         while (!currIsNewline(ls) && ls->current != EOZ)
  473.           next(ls);  /* skip until end of line (or end of file) */
  474.         break;
  475.       }
  476.       case '[': {  /* long string or simply '[' */
  477.         size_t sep = skip_sep(ls);
  478.         if (sep >= 2) {
  479.           read_long_string(ls, seminfo, sep);
  480.           return TK_STRING;
  481.         }
  482.         else if (sep == 0)  /* '[=...' missing second bracket? */
  483.           lexerror(ls, "invalid long string delimiter", TK_STRING);
  484.         return '[';
  485.       }
  486.       case '=': {
  487.         next(ls);
  488.         if (check_next1(ls, '=')) return TK_EQ;  /* '==' */
  489.         else return '=';
  490.       }
  491.       case '<': {
  492.         next(ls);
  493.         if (check_next1(ls, '=')) return TK_LE;  /* '<=' */
  494.         else if (check_next1(ls, '<')) return TK_SHL;  /* '<<' */
  495.         else return '<';
  496.       }
  497.       case '>': {
  498.         next(ls);
  499.         if (check_next1(ls, '=')) return TK_GE;  /* '>=' */
  500.         else if (check_next1(ls, '>')) return TK_SHR;  /* '>>' */
  501.         else return '>';
  502.       }
  503.       case '/': {
  504.         next(ls);
  505.         if (check_next1(ls, '/')) return TK_IDIV;  /* '//' */
  506.         else return '/';
  507.       }
  508.       case '~': {
  509.         next(ls);
  510.         if (check_next1(ls, '=')) return TK_NE;  /* '~=' */
  511.         else return '~';
  512.       }
  513.       case ':': {
  514.         next(ls);
  515.         if (check_next1(ls, ':')) return TK_DBCOLON;  /* '::' */
  516.         else return ':';
  517.       }
  518.       case '"': case '\'': {  /* short literal strings */
  519.         read_string(ls, ls->current, seminfo);
  520.         return TK_STRING;
  521.       }
  522.       case '.': {  /* '.', '..', '...', or number */
  523.         save_and_next(ls);
  524.         if (check_next1(ls, '.')) {
  525.           if (check_next1(ls, '.'))
  526.             return TK_DOTS;   /* '...' */
  527.           else return TK_CONCAT;   /* '..' */
  528.         }
  529.         else if (!lisdigit(ls->current)) return '.';
  530.         else return read_numeral(ls, seminfo);
  531.       }
  532.       case '0': case '1': case '2': case '3': case '4':
  533.       case '5': case '6': case '7': case '8': case '9': {
  534.         return read_numeral(ls, seminfo);
  535.       }
  536.       case EOZ: {
  537.         return TK_EOS;
  538.       }
  539.       default: {
  540.         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
  541.           TString *ts;
  542.           do {
  543.             save_and_next(ls);
  544.           } while (lislalnum(ls->current));
  545.           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
  546.                                   luaZ_bufflen(ls->buff));
  547.           seminfo->ts = ts;
  548.           if (isreserved(ts))  /* reserved word? */
  549.             return ts->extra - 1 + FIRST_RESERVED;
  550.           else {
  551.             return TK_NAME;
  552.           }
  553.         }
  554.         else {  /* single-char tokens ('+', '*', '%', '{', '}', ...) */
  555.           int c = ls->current;
  556.           next(ls);
  557.           return c;
  558.         }
  559.       }
  560.     }
  561.   }
  562. }
  563.  
  564.  
  565. void luaX_next (LexState *ls) {
  566.   ls->lastline = ls->linenumber;
  567.   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
  568.     ls->t = ls->lookahead;  /* use this one */
  569.     ls->lookahead.token = TK_EOS;  /* and discharge it */
  570.   }
  571.   else
  572.     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
  573. }
  574.  
  575.  
  576. int luaX_lookahead (LexState *ls) {
  577.   lua_assert(ls->lookahead.token == TK_EOS);
  578.   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
  579.   return ls->lookahead.token;
  580. }
  581.  
  582.