?login_element?

Subversion Repositories NedoOS

Rev

Blame | Last modification | View Log | Download

  1. /*
  2. ** $Id: llex.c,v 2.96.1.1 2017/04/19 17:20:42 roberto Exp $
  3. ** Lexical Analyzer
  4. ** See Copyright Notice in lua.h
  5. */
  6.  
  7. #define llex_c
  8. #define LUA_CORE
  9.  
  10. #include "lprefix.h"
  11.  
  12.  
  13. #include <locale.h>
  14. #include <string.h>
  15.  
  16. #include "lua.h"
  17.  
  18. #include "lctype.h"
  19. #include "ldebug.h"
  20. #include "ldo.h"
  21. #include "lgc.h"
  22. #include "llex.h"
  23. #include "lobject.h"
  24. #include "lparser.h"
  25. #include "lstate.h"
  26. #include "lstring.h"
  27. #include "ltable.h"
  28. #include "lzio.h"
  29.  
  30.  
  31.  
  32. #define next(ls) (ls->current = zgetc(ls->z))
  33.  
  34.  
  35.  
  36. #define currIsNewline(ls)       (ls->current == '\n' || ls->current == '\r')
  37.  
  38.  
  39. /* ORDER RESERVED */
  40. static const char *const luaX_tokens [] = {
  41.     "and", "break", "do", "else", "elseif",
  42.     "end", "false", "for", "function", "goto", "if",
  43.     "in", "local", "nil", "not", "or", "repeat",
  44.     "return", "then", "true", "until", "while",
  45.     "//", "..", "...", "==", ">=", "<=", "~=",
  46.     "<<", ">>", "::", "<eof>",
  47.     "<number>", "<integer>", "<name>", "<string>"
  48. };
  49.  
  50.  
  51. #define save_and_next(ls) (save(ls, ls->current), next(ls))
  52.  
  53.  
  54. static l_noret lexerror (LexState *ls, const char *msg, int token);
  55.  
  56.  
  57. static void save (LexState *ls, int c) {
  58.   Mbuffer *b = ls->buff;
  59.   if (luaZ_bufflen(b) + 1 > luaZ_sizebuffer(b)) {
  60.     size_t newsize;
  61.     if (luaZ_sizebuffer(b) >= MAX_SIZE/2)
  62.       lexerror(ls, "lexical element too long", 0);
  63.     newsize = luaZ_sizebuffer(b) * 2;
  64.     luaZ_resizebuffer(ls->L, b, newsize);
  65.   }
  66.   b->buffer[luaZ_bufflen(b)++] = cast(char, c);
  67. }
  68.  
  69.  
  70. void luaX_init (lua_State *L) {
  71.   int i;
  72.   TString *e = luaS_newliteral(L, LUA_ENV);  /* create env name */
  73.   luaC_fix(L, obj2gco(e));  /* never collect this name */
  74.   for (i=0; i<NUM_RESERVED; i++) {
  75.     TString *ts = luaS_new(L, luaX_tokens[i]);
  76.     luaC_fix(L, obj2gco(ts));  /* reserved words are never collected */
  77.     ts->extra = cast_byte(i+1);  /* reserved word */
  78.   }
  79. }
  80.  
  81.  
  82. const char *luaX_token2str (LexState *ls, int token) {
  83.   if (token < FIRST_RESERVED) {  /* single-byte symbols? */
  84.     lua_assert(token == cast_uchar(token));
  85.     return luaO_pushfstring(ls->L, "'%c'", token);
  86.   }
  87.   else {
  88.     const char *s = luaX_tokens[token - FIRST_RESERVED];
  89.     if (token < TK_EOS)  /* fixed format (symbols and reserved words)? */
  90.       return luaO_pushfstring(ls->L, "'%s'", s);
  91.     else  /* names, strings, and numerals */
  92.       return s;
  93.   }
  94. }
  95.  
  96.  
  97. static const char *txtToken (LexState *ls, int token) {
  98.   switch (token) {
  99.     case TK_NAME: case TK_STRING:
  100.     case TK_FLT: case TK_INT:
  101.       save(ls, '\0');
  102.       return luaO_pushfstring(ls->L, "'%s'", luaZ_buffer(ls->buff));
  103.     default:
  104.       return luaX_token2str(ls, token);
  105.   }
  106. }
  107.  
  108.  
  109. static l_noret lexerror (LexState *ls, const char *msg, int token) {
  110.   msg = luaG_addinfo(ls->L, msg, ls->source, ls->linenumber);
  111.   if (token)
  112.     luaO_pushfstring(ls->L, "%s near %s", msg, txtToken(ls, token));
  113.   luaD_throw(ls->L, LUA_ERRSYNTAX);
  114. }
  115.  
  116.  
  117. l_noret luaX_syntaxerror (LexState *ls, const char *msg) {
  118.   lexerror(ls, msg, ls->t.token);
  119. }
  120.  
  121.  
  122. /*
  123. ** creates a new string and anchors it in scanner's table so that
  124. ** it will not be collected until the end of the compilation
  125. ** (by that time it should be anchored somewhere)
  126. */
  127. TString *luaX_newstring (LexState *ls, const char *str, size_t l) {
  128.   lua_State *L = ls->L;
  129.   TValue *o;  /* entry for 'str' */
  130.   TString *ts = luaS_newlstr(L, str, l);  /* create new string */
  131.   setsvalue2s(L, L->top++, ts);  /* temporarily anchor it in stack */
  132.   o = luaH_set(L, ls->h, L->top - 1);
  133.   if (ttisnil(o)) {  /* not in use yet? */
  134.     /* boolean value does not need GC barrier;
  135.        table has no metatable, so it does not need to invalidate cache */
  136.     setbvalue(o, 1);  /* t[string] = true */
  137.     luaC_checkGC(L);
  138.   }
  139.   else {  /* string already present */
  140.     ts = tsvalue(keyfromval(o));  /* re-use value previously stored */
  141.   }
  142.   L->top--;  /* remove string from stack */
  143.   return ts;
  144. }
  145.  
  146.  
  147. /*
  148. ** increment line number and skips newline sequence (any of
  149. ** \n, \r, \n\r, or \r\n)
  150. */
  151. static void inclinenumber (LexState *ls) {
  152.   int old = ls->current;
  153.   lua_assert(currIsNewline(ls));
  154.   next(ls);  /* skip '\n' or '\r' */
  155.   if (currIsNewline(ls) && ls->current != old)
  156.     next(ls);  /* skip '\n\r' or '\r\n' */
  157.   if (++ls->linenumber >= MAX_INT)
  158.     lexerror(ls, "chunk has too many lines", 0);
  159. }
  160.  
  161.  
  162. void luaX_setinput (lua_State *L, LexState *ls, ZIO *z, TString *source,
  163.                     int firstchar) {
  164.   ls->t.token = 0;
  165.   ls->L = L;
  166.   ls->current = firstchar;
  167.   ls->lookahead.token = TK_EOS;  /* no look-ahead token */
  168.   ls->z = z;
  169.   ls->fs = NULL;
  170.   ls->linenumber = 1;
  171.   ls->lastline = 1;
  172.   ls->source = source;
  173.   ls->envn = luaS_newliteral(L, LUA_ENV);  /* get env name */
  174.   luaZ_resizebuffer(ls->L, ls->buff, LUA_MINBUFFER);  /* initialize buffer */
  175. }
  176.  
  177.  
  178.  
  179. /*
  180. ** =======================================================
  181. ** LEXICAL ANALYZER
  182. ** =======================================================
  183. */
  184.  
  185.  
  186. static int check_next1 (LexState *ls, int c) {
  187.   if (ls->current == c) {
  188.     next(ls);
  189.     return 1;
  190.   }
  191.   else return 0;
  192. }
  193.  
  194.  
  195. /*
  196. ** Check whether current char is in set 'set' (with two chars) and
  197. ** saves it
  198. */
  199. static int check_next2 (LexState *ls, const char *set) {
  200.   lua_assert(set[2] == '\0');
  201.   if (ls->current == set[0] || ls->current == set[1]) {
  202.     save_and_next(ls);
  203.     return 1;
  204.   }
  205.   else return 0;
  206. }
  207.  
  208.  
  209. /* LUA_NUMBER */
  210. /*
  211. ** this function is quite liberal in what it accepts, as 'luaO_str2num'
  212. ** will reject ill-formed numerals.
  213. */
  214. static int read_numeral (LexState *ls, SemInfo *seminfo) {
  215.   TValue obj;
  216.   const char *expo = "Ee";
  217.   int first = ls->current;
  218.   lua_assert(lisdigit(ls->current));
  219.   save_and_next(ls);
  220.   if (first == '0' && check_next2(ls, "xX"))  /* hexadecimal? */
  221.     expo = "Pp";
  222.   for (;;) {
  223.     if (check_next2(ls, expo))  /* exponent part? */
  224.       check_next2(ls, "-+");  /* optional exponent sign */
  225.     if (lisxdigit(ls->current))
  226.       save_and_next(ls);
  227.     else if (ls->current == '.')
  228.       save_and_next(ls);
  229.     else break;
  230.   }
  231.   save(ls, '\0');
  232.   if (luaO_str2num(luaZ_buffer(ls->buff), &obj) == 0)  /* format error? */
  233.     lexerror(ls, "malformed number", TK_FLT);
  234.   if (ttisinteger(&obj)) {
  235.     seminfo->i = ivalue(&obj);
  236.     return TK_INT;
  237.   }
  238.   else {
  239.     lua_assert(ttisfloat(&obj));
  240.     seminfo->r = fltvalue(&obj);
  241.     return TK_FLT;
  242.   }
  243. }
  244.  
  245.  
  246. /*
  247. ** reads a sequence '[=*[' or ']=*]', leaving the last bracket.
  248. ** If sequence is well formed, return its number of '='s + 2; otherwise,
  249. ** return 1 if there is no '='s or 0 otherwise (an unfinished '[==...').
  250. */
  251. static size_t skip_sep (LexState *ls) {
  252.   size_t count = 0;
  253.   int s = ls->current;
  254.   lua_assert(s == '[' || s == ']');
  255.   save_and_next(ls);
  256.   while (ls->current == '=') {
  257.     save_and_next(ls);
  258.     count++;
  259.   }
  260.   return (ls->current == s) ? count + 2
  261.          : (count == 0) ? 1
  262.          : 0;
  263.  
  264. }
  265.  
  266.  
  267. static void read_long_string (LexState *ls, SemInfo *seminfo, size_t sep) {
  268.   int line = ls->linenumber;  /* initial line (for error message) */
  269.   save_and_next(ls);  /* skip 2nd '[' */
  270.   if (currIsNewline(ls))  /* string starts with a newline? */
  271.     inclinenumber(ls);  /* skip it */
  272.   for (;;) {
  273.     switch (ls->current) {
  274.       case EOZ: {  /* error */
  275.         const char *what = (seminfo ? "string" : "comment");
  276.         const char *msg = luaO_pushfstring(ls->L,
  277.                      "unfinished long %s (starting at line %d)", what, line);
  278.         lexerror(ls, msg, TK_EOS);
  279.         break;  /* to avoid warnings */
  280.       }
  281.       case ']': {
  282.         if (skip_sep(ls) == sep) {
  283.           save_and_next(ls);  /* skip 2nd ']' */
  284.           goto endloop;
  285.         }
  286.         break;
  287.       }
  288.       case '\n': case '\r': {
  289.         save(ls, '\n');
  290.         inclinenumber(ls);
  291.         if (!seminfo) luaZ_resetbuffer(ls->buff);  /* avoid wasting space */
  292.         break;
  293.       }
  294.       default: {
  295.         if (seminfo) save_and_next(ls);
  296.         else next(ls);
  297.       }
  298.     }
  299.   } endloop:
  300.   if (seminfo)
  301.     seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + sep,
  302.                                      luaZ_bufflen(ls->buff) - 2 * sep);
  303. }
  304.  
  305.  
  306. static void esccheck (LexState *ls, int c, const char *msg) {
  307.   if (!c) {
  308.     if (ls->current != EOZ)
  309.       save_and_next(ls);  /* add current to buffer for error message */
  310.     lexerror(ls, msg, TK_STRING);
  311.   }
  312. }
  313.  
  314.  
  315. static int gethexa (LexState *ls) {
  316.   save_and_next(ls);
  317.   esccheck (ls, lisxdigit(ls->current), "hexadecimal digit expected");
  318.   return luaO_hexavalue(ls->current);
  319. }
  320.  
  321.  
  322. static int readhexaesc (LexState *ls) {
  323.   int r = gethexa(ls);
  324.   r = (r << 4) + gethexa(ls);
  325.   luaZ_buffremove(ls->buff, 2);  /* remove saved chars from buffer */
  326.   return r;
  327. }
  328.  
  329.  
  330. static unsigned long readutf8esc (LexState *ls) {
  331.   unsigned long r;
  332.   int i = 4;  /* chars to be removed: '\', 'u', '{', and first digit */
  333.   save_and_next(ls);  /* skip 'u' */
  334.   esccheck(ls, ls->current == '{', "missing '{'");
  335.   r = gethexa(ls);  /* must have at least one digit */
  336.   while ((save_and_next(ls), lisxdigit(ls->current))) {
  337.     i++;
  338.     r = (r << 4) + luaO_hexavalue(ls->current);
  339.     esccheck(ls, r <= 0x10FFFF, "UTF-8 value too large");
  340.   }
  341.   esccheck(ls, ls->current == '}', "missing '}'");
  342.   next(ls);  /* skip '}' */
  343.   luaZ_buffremove(ls->buff, i);  /* remove saved chars from buffer */
  344.   return r;
  345. }
  346.  
  347.  
  348. static void utf8esc (LexState *ls) {
  349.   char buff[UTF8BUFFSZ];
  350.   int n = luaO_utf8esc(buff, readutf8esc(ls));
  351.   for (; n > 0; n--)  /* add 'buff' to string */
  352.     save(ls, buff[UTF8BUFFSZ - n]);
  353. }
  354.  
  355.  
  356. static int readdecesc (LexState *ls) {
  357.   int i;
  358.   int r = 0;  /* result accumulator */
  359.   for (i = 0; i < 3 && lisdigit(ls->current); i++) {  /* read up to 3 digits */
  360.     r = 10*r + ls->current - '0';
  361.     save_and_next(ls);
  362.   }
  363.   esccheck(ls, r <= UCHAR_MAX, "decimal escape too large");
  364.   luaZ_buffremove(ls->buff, i);  /* remove read digits from buffer */
  365.   return r;
  366. }
  367.  
  368.  
  369. static void read_string (LexState *ls, int del, SemInfo *seminfo) {
  370.   save_and_next(ls);  /* keep delimiter (for error messages) */
  371.   while (ls->current != del) {
  372.     switch (ls->current) {
  373.       case EOZ:
  374.         lexerror(ls, "unfinished string", TK_EOS);
  375.         break;  /* to avoid warnings */
  376.       case '\n':
  377.       case '\r':
  378.         lexerror(ls, "unfinished string", TK_STRING);
  379.         break;  /* to avoid warnings */
  380.       case '\\': {  /* escape sequences */
  381.         int c;  /* final character to be saved */
  382.         save_and_next(ls);  /* keep '\\' for error messages */
  383.         switch (ls->current) {
  384.           case 'a': c = '\a'; goto read_save;
  385.           case 'b': c = '\b'; goto read_save;
  386.           case 'f': c = '\f'; goto read_save;
  387.           case 'n': c = '\n'; goto read_save;
  388.           case 'r': c = '\r'; goto read_save;
  389.           case 't': c = '\t'; goto read_save;
  390.           case 'v': c = '\v'; goto read_save;
  391.           case 'x': c = readhexaesc(ls); goto read_save;
  392.           case 'u': utf8esc(ls);  goto no_save;
  393.           case '\n': case '\r':
  394.             inclinenumber(ls); c = '\n'; goto only_save;
  395.           case '\\': case '\"': case '\'':
  396.             c = ls->current; goto read_save;
  397.           case EOZ: goto no_save;  /* will raise an error next loop */
  398.           case 'z': {  /* zap following span of spaces */
  399.             luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
  400.             next(ls);  /* skip the 'z' */
  401.             while (lisspace(ls->current)) {
  402.               if (currIsNewline(ls)) inclinenumber(ls);
  403.               else next(ls);
  404.             }
  405.             goto no_save;
  406.           }
  407.           default: {
  408.             esccheck(ls, lisdigit(ls->current), "invalid escape sequence");
  409.             c = readdecesc(ls);  /* digital escape '\ddd' */
  410.             goto only_save;
  411.           }
  412.         }
  413.        read_save:
  414.          next(ls);
  415.          /* go through */
  416.        only_save:
  417.          luaZ_buffremove(ls->buff, 1);  /* remove '\\' */
  418.          save(ls, c);
  419.          /* go through */
  420.        no_save: break;
  421.       }
  422.       default:
  423.         save_and_next(ls);
  424.     }
  425.   }
  426.   save_and_next(ls);  /* skip delimiter */
  427.   seminfo->ts = luaX_newstring(ls, luaZ_buffer(ls->buff) + 1,
  428.                                    luaZ_bufflen(ls->buff) - 2);
  429. }
  430.  
  431.  
  432. static int llex (LexState *ls, SemInfo *seminfo) {
  433.   luaZ_resetbuffer(ls->buff);
  434.   for (;;) {
  435.     switch (ls->current) {
  436.       case '\n': case '\r': {  /* line breaks */
  437.         inclinenumber(ls);
  438.         break;
  439.       }
  440.       case ' ': case '\f': case '\t': case '\v': {  /* spaces */
  441.         next(ls);
  442.         break;
  443.       }
  444.       case '-': {  /* '-' or '--' (comment) */
  445.         next(ls);
  446.         if (ls->current != '-') return '-';
  447.         /* else is a comment */
  448.         next(ls);
  449.         if (ls->current == '[') {  /* long comment? */
  450.           size_t sep = skip_sep(ls);
  451.           luaZ_resetbuffer(ls->buff);  /* 'skip_sep' may dirty the buffer */
  452.           if (sep >= 2) {
  453.             read_long_string(ls, NULL, sep);  /* skip long comment */
  454.             luaZ_resetbuffer(ls->buff);  /* previous call may dirty the buff. */
  455.             break;
  456.           }
  457.         }
  458.         /* else short comment */
  459.         while (!currIsNewline(ls) && ls->current != EOZ)
  460.           next(ls);  /* skip until end of line (or end of file) */
  461.         break;
  462.       }
  463.       case '[': {  /* long string or simply '[' */
  464.         size_t sep = skip_sep(ls);
  465.         if (sep >= 2) {
  466.           read_long_string(ls, seminfo, sep);
  467.           return TK_STRING;
  468.         }
  469.         else if (sep == 0)  /* '[=...' missing second bracket */
  470.           lexerror(ls, "invalid long string delimiter", TK_STRING);
  471.         return '[';
  472.       }
  473.       case '=': {
  474.         next(ls);
  475.         if (check_next1(ls, '=')) return TK_EQ;
  476.         else return '=';
  477.       }
  478.       case '<': {
  479.         next(ls);
  480.         if (check_next1(ls, '=')) return TK_LE;
  481.         else if (check_next1(ls, '<')) return TK_SHL;
  482.         else return '<';
  483.       }
  484.       case '>': {
  485.         next(ls);
  486.         if (check_next1(ls, '=')) return TK_GE;
  487.         else if (check_next1(ls, '>')) return TK_SHR;
  488.         else return '>';
  489.       }
  490.       case '/': {
  491.         next(ls);
  492.         if (check_next1(ls, '/')) return TK_IDIV;
  493.         else return '/';
  494.       }
  495.       case '~': {
  496.         next(ls);
  497.         if (check_next1(ls, '=')) return TK_NE;
  498.         else return '~';
  499.       }
  500.       case ':': {
  501.         next(ls);
  502.         if (check_next1(ls, ':')) return TK_DBCOLON;
  503.         else return ':';
  504.       }
  505.       case '"': case '\'': {  /* short literal strings */
  506.         read_string(ls, ls->current, seminfo);
  507.         return TK_STRING;
  508.       }
  509.       case '.': {  /* '.', '..', '...', or number */
  510.         save_and_next(ls);
  511.         if (check_next1(ls, '.')) {
  512.           if (check_next1(ls, '.'))
  513.             return TK_DOTS;   /* '...' */
  514.           else return TK_CONCAT;   /* '..' */
  515.         }
  516.         else if (!lisdigit(ls->current)) return '.';
  517.         else return read_numeral(ls, seminfo);
  518.       }
  519.       case '0': case '1': case '2': case '3': case '4':
  520.       case '5': case '6': case '7': case '8': case '9': {
  521.         return read_numeral(ls, seminfo);
  522.       }
  523.       case EOZ: {
  524.         return TK_EOS;
  525.       }
  526.       default: {
  527.         if (lislalpha(ls->current)) {  /* identifier or reserved word? */
  528.           TString *ts;
  529.           do {
  530.             save_and_next(ls);
  531.           } while (lislalnum(ls->current));
  532.           ts = luaX_newstring(ls, luaZ_buffer(ls->buff),
  533.                                   luaZ_bufflen(ls->buff));
  534.           seminfo->ts = ts;
  535.           if (isreserved(ts))  /* reserved word? */
  536.             return ts->extra - 1 + FIRST_RESERVED;
  537.           else {
  538.             return TK_NAME;
  539.           }
  540.         }
  541.         else {  /* single-char tokens (+ - / ...) */
  542.           int c = ls->current;
  543.           next(ls);
  544.           return c;
  545.         }
  546.       }
  547.     }
  548.   }
  549. }
  550.  
  551.  
  552. void luaX_next (LexState *ls) {
  553.   ls->lastline = ls->linenumber;
  554.   if (ls->lookahead.token != TK_EOS) {  /* is there a look-ahead token? */
  555.     ls->t = ls->lookahead;  /* use this one */
  556.     ls->lookahead.token = TK_EOS;  /* and discharge it */
  557.   }
  558.   else
  559.     ls->t.token = llex(ls, &ls->t.seminfo);  /* read next token */
  560. }
  561.  
  562.  
  563. int luaX_lookahead (LexState *ls) {
  564.   lua_assert(ls->lookahead.token == TK_EOS);
  565.   ls->lookahead.token = llex(ls, &ls->lookahead.seminfo);
  566.   return ls->lookahead.token;
  567. }
  568.  
  569.