[270] | 1 | /*
|
---|
| 2 | ** parse.y - mruby parser
|
---|
| 3 | **
|
---|
| 4 | ** See Copyright Notice in mruby.h
|
---|
| 5 | */
|
---|
| 6 | using System;
|
---|
| 7 | using System.Collections.Generic;
|
---|
| 8 | using Bridge.Html5;
|
---|
| 9 |
|
---|
| 10 | namespace WebMrbc
|
---|
| 11 | {
|
---|
| 12 | interface IEvaluatable
|
---|
| 13 | {
|
---|
| 14 | node evaluate(string method, node[] args);
|
---|
| 15 | }
|
---|
| 16 |
|
---|
| 17 | class kwtable
|
---|
| 18 | {
|
---|
| 19 | public Uint8Array name;
|
---|
| 20 | public MrbTokens id0;
|
---|
| 21 | public MrbTokens id1;
|
---|
| 22 | public mrb_lex_state_enum state;
|
---|
| 23 |
|
---|
| 24 | public kwtable(string name, MrbTokens id0, MrbTokens id1, mrb_lex_state_enum state)
|
---|
| 25 | {
|
---|
| 26 | this.name = MrbParser.UTF8StringToArray(name);
|
---|
| 27 | this.id0 = id0;
|
---|
| 28 | this.id1 = id1;
|
---|
| 29 | this.state = state;
|
---|
| 30 | }
|
---|
| 31 |
|
---|
| 32 | public kwtable(string name)
|
---|
| 33 | {
|
---|
| 34 | this.name = MrbParser.UTF8StringToArray(name);
|
---|
| 35 | }
|
---|
| 36 | }
|
---|
| 37 |
|
---|
| 38 | delegate int partial_hook_t(MrbParser p);
|
---|
| 39 |
|
---|
| 40 | public class MrbToken
|
---|
| 41 | {
|
---|
| 42 | private string m_Filename;
|
---|
| 43 | private MrbTokens m_Kind;
|
---|
| 44 | private string m_Token;
|
---|
| 45 | private object m_Value;
|
---|
| 46 |
|
---|
| 47 | public string Filename { get { return m_Filename; } }
|
---|
| 48 | public MrbTokens Kind { get { return m_Kind; } }
|
---|
| 49 | public object Value { get { return m_Value; } }
|
---|
| 50 |
|
---|
| 51 | public node nd { get { return (node)m_Value; } set { this.m_Value = value; } }
|
---|
| 52 | public mrb_sym id { get { return (mrb_sym)m_Value; } set { this.m_Value = value; } }
|
---|
| 53 | public int num { get { return (int)m_Value; } set { this.m_Value = value; } }
|
---|
| 54 | public stack_type stack { get { return (stack_type)m_Value; } set { this.m_Value = value; } }
|
---|
| 55 | //public vtable vars { get { return (vtable)value; } set { this.value = value; } }
|
---|
| 56 |
|
---|
| 57 | public MrbToken(string filename)
|
---|
| 58 | {
|
---|
| 59 | m_Filename = filename;
|
---|
| 60 | }
|
---|
| 61 |
|
---|
| 62 | internal void SetToken(MrbTokens kind, string token)
|
---|
| 63 | {
|
---|
| 64 | m_Kind = kind;
|
---|
| 65 | m_Token = token;
|
---|
| 66 | }
|
---|
| 67 | }
|
---|
| 68 |
|
---|
| 69 | public partial class MrbParser : IMrbParser, MrbParser.yyInput, MrbParser.yyConsoleOut
|
---|
| 70 | {
|
---|
| 71 | const int MRB_PARSER_TOKBUF_MAX = 65536;
|
---|
| 72 | const int MRB_PARSER_TOKBUF_SIZE = 256;
|
---|
| 73 |
|
---|
| 74 | Uint8Array s;
|
---|
| 75 | int sp;
|
---|
| 76 | public string filename {
|
---|
| 77 | get {
|
---|
| 78 | if (current_filename_index < filename_table.Length)
|
---|
| 79 | return filename_table[current_filename_index];
|
---|
| 80 | else
|
---|
| 81 | return "(null)";
|
---|
| 82 | }
|
---|
| 83 | }
|
---|
| 84 | public int lineno { get; set; }
|
---|
| 85 | public int column { get; set; }
|
---|
| 86 |
|
---|
| 87 | mrb_lex_state_enum lstate;
|
---|
| 88 | node lex_strterm;
|
---|
| 89 |
|
---|
| 90 | stack_type cond_stack;
|
---|
| 91 | stack_type cmdarg_stack;
|
---|
| 92 | int paren_nest;
|
---|
| 93 | int lpar_beg;
|
---|
| 94 | int in_def, in_single;
|
---|
| 95 | bool cmd_start;
|
---|
| 96 | locals_node locals;
|
---|
| 97 |
|
---|
| 98 | node pb;
|
---|
| 99 | Uint8Array buf = new Uint8Array(MRB_PARSER_TOKBUF_SIZE);
|
---|
| 100 | Uint8Array tokbuf;
|
---|
| 101 | int tidx;
|
---|
| 102 | int tsiz;
|
---|
| 103 |
|
---|
| 104 | node all_heredocs;
|
---|
| 105 | node heredocs_from_nextline;
|
---|
| 106 | node parsing_heredoc;
|
---|
| 107 | node lex_strterm_before_heredoc;
|
---|
| 108 |
|
---|
| 109 | internal node tree;
|
---|
| 110 |
|
---|
| 111 | string[] filename_table = new string[0];
|
---|
| 112 | int filename_table_length { get { return filename_table.Length; } }
|
---|
| 113 | public int current_filename_index;
|
---|
| 114 |
|
---|
| 115 | internal partial_hook_t partial_hook;
|
---|
| 116 | internal object partial_data;
|
---|
| 117 |
|
---|
| 118 | MrbToken yylval;
|
---|
| 119 |
|
---|
| 120 | public MrbParser()
|
---|
| 121 | {
|
---|
| 122 | yyConsole = this;
|
---|
| 123 | }
|
---|
| 124 |
|
---|
| 125 | void mrb_assert(bool cond)
|
---|
| 126 | {
|
---|
| 127 | if (!cond) throw new Exception();
|
---|
| 128 | }
|
---|
| 129 |
|
---|
| 130 | const int ERANGE = 1;
|
---|
| 131 | int errno;
|
---|
| 132 | static int memcmp(Uint8Array a, int aofs, Uint8Array b, int bofs, int len)
|
---|
| 133 | {
|
---|
| 134 | int result;
|
---|
| 135 | for (int i = 0; i < len; i++) {
|
---|
| 136 | result = a[i] - b[i];
|
---|
| 137 | if (result != 0)
|
---|
| 138 | return result;
|
---|
| 139 | }
|
---|
| 140 | return 0;
|
---|
| 141 | }
|
---|
| 142 |
|
---|
| 143 | static bool isdigit(int c)
|
---|
| 144 | {
|
---|
| 145 | return (c >= '0' && c <= '9');
|
---|
| 146 | }
|
---|
| 147 |
|
---|
| 148 | static bool isalnum(int c)
|
---|
| 149 | {
|
---|
| 150 | return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
---|
| 151 | }
|
---|
| 152 |
|
---|
| 153 | static int tolower(int c)
|
---|
| 154 | {
|
---|
| 155 | if (c >= 'A' && c <= 'Z')
|
---|
| 156 | return c - 'A' + 'a';
|
---|
| 157 | return c;
|
---|
| 158 | }
|
---|
| 159 |
|
---|
| 160 | static int strlen(Uint8Array a, int ofs)
|
---|
| 161 | {
|
---|
| 162 | int i = ofs;
|
---|
| 163 | for (; i < a.Length; i++) {
|
---|
| 164 | if (a[i] == 0)
|
---|
| 165 | break;
|
---|
| 166 | }
|
---|
| 167 | return i - ofs;
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | static int strncmp(Uint8Array a, int aofs, Uint8Array b, int bofs, int len)
|
---|
| 171 | {
|
---|
| 172 | int result;
|
---|
| 173 | for (int i = 0; i < len; i++) {
|
---|
| 174 | if (a[i + aofs] == 0) {
|
---|
| 175 | if (b[i + bofs] == 0) {
|
---|
| 176 | return 0;
|
---|
| 177 | }
|
---|
| 178 | return byte.MaxValue;
|
---|
| 179 | }
|
---|
| 180 | if (b[i + bofs] == 0) {
|
---|
| 181 | return -byte.MaxValue;
|
---|
| 182 | }
|
---|
| 183 |
|
---|
| 184 | result = a[i + aofs] - b[i + bofs];
|
---|
| 185 | if (result != 0)
|
---|
| 186 | return result;
|
---|
| 187 | }
|
---|
| 188 | return 0;
|
---|
| 189 | }
|
---|
| 190 |
|
---|
| 191 | static int strchr(Uint8Array s, int ofs, int c)
|
---|
| 192 | {
|
---|
| 193 | int i = ofs;
|
---|
| 194 | for (; i < s.Length; i++) {
|
---|
| 195 | if (s[i] == c)
|
---|
| 196 | break;
|
---|
| 197 | }
|
---|
| 198 | return i;
|
---|
| 199 | }
|
---|
| 200 |
|
---|
| 201 | public static Uint8Array strndup(Uint8Array s, int ofs, int len)
|
---|
| 202 | {
|
---|
| 203 | return s.SubArray(ofs, len + 1);
|
---|
| 204 | }
|
---|
| 205 |
|
---|
| 206 | public static Uint8Array strdup(Uint8Array s, int ofs)
|
---|
| 207 | {
|
---|
| 208 | return s.SubArray(ofs, strlen(s, ofs) + 1);
|
---|
| 209 | }
|
---|
| 210 |
|
---|
| 211 | private static string escape(byte c)
|
---|
| 212 | {
|
---|
| 213 | switch ((char)c) {
|
---|
| 214 | case '\\': return "\\";
|
---|
| 215 | case '\n': return "\\n";
|
---|
| 216 | case '\t': return "\\t";
|
---|
| 217 | case '\r': return "\\r";
|
---|
| 218 | case '\f': return "\\f";
|
---|
| 219 | case '\v': return "\\v";
|
---|
| 220 | case '\a': return "\\a";
|
---|
| 221 | case '\x27': return "\\e";
|
---|
| 222 | case '\b': return "\\b";
|
---|
| 223 | case ' ': return "\\s";
|
---|
| 224 | case '\0': return "\\0";
|
---|
| 225 | default: return $"\\x{c:X}";
|
---|
| 226 | }
|
---|
| 227 | }
|
---|
| 228 |
|
---|
| 229 | internal static string UTF8ArrayToString(Uint8Array data, int idx)
|
---|
| 230 | {
|
---|
| 231 | bool esc;
|
---|
| 232 | return UTF8ArrayToStringEsc(data, idx, out esc);
|
---|
| 233 | }
|
---|
| 234 |
|
---|
| 235 | internal static string UTF8ArrayToStringEsc(Uint8Array data, int idx, out bool esc)
|
---|
| 236 | {
|
---|
| 237 | var str = "";
|
---|
| 238 | int c = 0, t = 0, end = data.Length;
|
---|
| 239 | var temp = new byte[6];
|
---|
| 240 |
|
---|
| 241 | esc = false;
|
---|
| 242 | if (end > 0 && data[end - 1] == '\x0')
|
---|
| 243 | end--;
|
---|
| 244 |
|
---|
| 245 | for (int i = idx; i < end; i++) {
|
---|
| 246 | var d = data[i];
|
---|
| 247 | temp[c] = d;
|
---|
| 248 | if (t == 0) {
|
---|
| 249 | // 1Byteコード
|
---|
| 250 | if ((d & 0x80) == 0) {
|
---|
| 251 | // 表示可能なコード
|
---|
| 252 | if (d >= 0x20 && d < 0x7F)
|
---|
| 253 | str += (char)d;
|
---|
| 254 | // 表示不可ならエスケープ
|
---|
| 255 | else {
|
---|
| 256 | esc = true;
|
---|
| 257 | str += escape(d);
|
---|
| 258 | }
|
---|
| 259 | continue;
|
---|
| 260 | }
|
---|
| 261 | // 2Byteコード
|
---|
| 262 | else if ((d & 0xE0) == 0xC0) {
|
---|
| 263 | t = 2;
|
---|
| 264 | }
|
---|
| 265 | // 3Byteコード
|
---|
| 266 | else if ((d & 0xF0) == 0xE0) {
|
---|
| 267 | t = 3;
|
---|
| 268 | }
|
---|
| 269 | // 4Byteコード
|
---|
| 270 | else if ((d & 0xF8) == 0xF0) {
|
---|
| 271 | t = 4;
|
---|
| 272 | }
|
---|
| 273 | // 5Byteコード
|
---|
| 274 | else if ((d & 0xFC) == 0xF8) {
|
---|
| 275 | t = 5;
|
---|
| 276 | }
|
---|
| 277 | // 6Byteコード
|
---|
| 278 | else if ((d & 0xFE) == 0xFC) {
|
---|
| 279 | t = 6;
|
---|
| 280 | }
|
---|
| 281 | // 表示不可ならエスケープ
|
---|
| 282 | else {
|
---|
| 283 | esc = true;
|
---|
| 284 | str += escape(d);
|
---|
| 285 | continue;
|
---|
| 286 | }
|
---|
| 287 | c = 1;
|
---|
| 288 | }
|
---|
| 289 | else {
|
---|
| 290 | // 表示不可ならエスケープ
|
---|
| 291 | if ((d & 0xC0) != 0x80) {
|
---|
| 292 | for (int j = 0; j < c; j++) {
|
---|
| 293 | esc = true;
|
---|
| 294 | str += escape(temp[j]);
|
---|
| 295 | }
|
---|
| 296 | t = 0;
|
---|
| 297 | c = 0;
|
---|
| 298 | continue;
|
---|
| 299 | }
|
---|
| 300 | c++;
|
---|
| 301 | // 表示可能なコード
|
---|
| 302 | if (c == t) {
|
---|
| 303 | switch (t) {
|
---|
| 304 | case 2:
|
---|
| 305 | str += ConvertFromUtf32(((temp[0] & 0x1F) << 6) | (temp[1] & 0x3F));
|
---|
| 306 | break;
|
---|
| 307 | case 3:
|
---|
| 308 | str += ConvertFromUtf32(((temp[0] & 0x0F) << 12) | ((temp[1] & 0x3F) << 6) | (temp[2] & 0x3F));
|
---|
| 309 | break;
|
---|
| 310 | case 4:
|
---|
| 311 | str += ConvertFromUtf32(((temp[0] & 0x07) << 18) | ((temp[1] & 0x3F) << 12) | ((temp[2] & 0x3F) << 6) | (temp[3] & 0x3F));
|
---|
| 312 | break;
|
---|
| 313 | case 5:
|
---|
| 314 | str += ConvertFromUtf32(((temp[0] & 0x03) << 24) | ((temp[1] & 0x3F) << 18) | ((temp[2] & 0x3F) << 12) | ((temp[3] & 0x3F) << 6) | (temp[4] & 0x3F));
|
---|
| 315 | break;
|
---|
| 316 | case 6:
|
---|
| 317 | str += ConvertFromUtf32(((temp[0] & 0x01) << 30) | ((temp[1] & 0x3F) << 24) | ((temp[2] & 0x3F) << 18) | ((temp[3] & 0x3F) << 12) | ((temp[4] & 0x3F) << 6) | (temp[5] & 0x3F));
|
---|
| 318 | break;
|
---|
| 319 | }
|
---|
| 320 | t = 0;
|
---|
| 321 | c = 0;
|
---|
| 322 | continue;
|
---|
| 323 | }
|
---|
| 324 | }
|
---|
| 325 | }
|
---|
| 326 |
|
---|
| 327 | if (c > 0)
|
---|
| 328 | esc = true;
|
---|
| 329 | for (int i = 0; i < c; i++) {
|
---|
| 330 | str += escape(temp[i]);
|
---|
| 331 | }
|
---|
| 332 |
|
---|
| 333 | return str;
|
---|
| 334 | }
|
---|
| 335 |
|
---|
| 336 | // from Emscripten (http://kripken.github.io/emscripten-site/)
|
---|
| 337 | // Gotcha: fromCharCode constructs a character from a UTF-16 encoded code (pair), not from a Unicode code point! So encode the code point to UTF-16 for constructing.
|
---|
| 338 | // See http://unicode.org/faq/utf_bom.html#utf16-3
|
---|
| 339 | public static string ConvertFromUtf32(int utf32)
|
---|
| 340 | {
|
---|
| 341 | var str = "";
|
---|
| 342 | if (utf32 >= 0x10000) {
|
---|
| 343 | var ch = utf32 - 0x10000;
|
---|
| 344 | str += String.FromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
|
---|
| 345 | }
|
---|
| 346 | else {
|
---|
| 347 | str += String.FromCharCode(utf32);
|
---|
| 348 | }
|
---|
| 349 | return str;
|
---|
| 350 | }
|
---|
| 351 |
|
---|
| 352 | // from Emscripten (http://kripken.github.io/emscripten-site/)
|
---|
| 353 | // Copies the given Javascript String object 'str' to the given byte array at address 'outIdx',
|
---|
| 354 | // encoded in UTF8 form and null-terminated. The copy will require at most str.length*4+1 bytes of space in the HEAP.
|
---|
| 355 | // Use the function lengthBytesUTF8() to compute the exact number of bytes (excluding null terminator) that this function will write.
|
---|
| 356 | // Parameters:
|
---|
| 357 | // str: the Javascript string to copy.
|
---|
| 358 | // outU8Array: the array to copy to. Each index in this array is assumed to be one 8-byte element.
|
---|
| 359 | // outIdx: The starting offset in the array to begin the copying.
|
---|
| 360 | // maxBytesToWrite: The maximum number of bytes this function can write to the array. This count should include the null
|
---|
| 361 | // terminator, i.e. if maxBytesToWrite=1, only the null terminator will be written and nothing else.
|
---|
| 362 | // maxBytesToWrite=0 does not write any bytes to the output, not even the null terminator.
|
---|
| 363 | // Returns the number of bytes written, EXCLUDING the null terminator.
|
---|
| 364 | internal static int stringToUTF8Array(string str, Uint8Array outU8Array, int outIdx, int maxBytesToWrite)
|
---|
| 365 | {
|
---|
| 366 | if (!(maxBytesToWrite > 0)) // Parameter maxBytesToWrite is not optional. Negative values, 0, null, undefined and false each don't write out any bytes.
|
---|
| 367 | return 0;
|
---|
| 368 |
|
---|
| 369 | var startIdx = outIdx;
|
---|
| 370 | var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
|
---|
| 371 | for (var i = 0; i < str.Length; ++i) {
|
---|
| 372 | // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
|
---|
| 373 | // See http://unicode.org/faq/utf_bom.html#utf16-3
|
---|
| 374 | // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629
|
---|
| 375 | var u = str[i]; // possibly a lead surrogate
|
---|
| 376 | if (u >= 0xD800 && u <= 0xDFFF) u = (char)(0x10000 + ((u & 0x3FF) << 10) | (str[++i] & 0x3FF));
|
---|
| 377 | if (u <= 0x7F) {
|
---|
| 378 | if (outIdx >= endIdx) break;
|
---|
| 379 | outU8Array[outIdx++] = (byte)u;
|
---|
| 380 | }
|
---|
| 381 | else if (u <= 0x7FF) {
|
---|
| 382 | if (outIdx + 1 >= endIdx) break;
|
---|
| 383 | outU8Array[outIdx++] = (byte)(0xC0 | (u >> 6));
|
---|
| 384 | outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
|
---|
| 385 | }
|
---|
| 386 | else if (u <= 0xFFFF) {
|
---|
| 387 | if (outIdx + 2 >= endIdx) break;
|
---|
| 388 | outU8Array[outIdx++] = (byte)(0xE0 | (u >> 12));
|
---|
| 389 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
|
---|
| 390 | outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
|
---|
| 391 | }
|
---|
| 392 | else if (u <= 0x1FFFFF) {
|
---|
| 393 | if (outIdx + 3 >= endIdx) break;
|
---|
| 394 | outU8Array[outIdx++] = (byte)(0xF0 | (u >> 18));
|
---|
| 395 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 12) & 63));
|
---|
| 396 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
|
---|
| 397 | outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
|
---|
| 398 | }
|
---|
| 399 | else if (u <= 0x3FFFFFF) {
|
---|
| 400 | if (outIdx + 4 >= endIdx) break;
|
---|
| 401 | outU8Array[outIdx++] = (byte)(0xF8 | (u >> 24));
|
---|
| 402 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 18) & 63));
|
---|
| 403 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 12) & 63));
|
---|
| 404 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
|
---|
| 405 | outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
|
---|
| 406 | }
|
---|
| 407 | else {
|
---|
| 408 | if (outIdx + 5 >= endIdx) break;
|
---|
| 409 | outU8Array[outIdx++] = (byte)(0xFC | (u >> 30));
|
---|
| 410 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 24) & 63));
|
---|
| 411 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 18) & 63));
|
---|
| 412 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 12) & 63));
|
---|
| 413 | outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
|
---|
| 414 | outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
|
---|
| 415 | }
|
---|
| 416 | }
|
---|
| 417 | // Null-terminate the pointer to the buffer.
|
---|
| 418 | outU8Array[outIdx] = 0;
|
---|
| 419 | return outIdx - startIdx;
|
---|
| 420 | }
|
---|
| 421 |
|
---|
| 422 | // from Emscripten (http://kripken.github.io/emscripten-site/)
|
---|
| 423 | // Returns the number of bytes the given Javascript string takes if encoded as a UTF8 byte array, EXCLUDING the null terminator byte.
|
---|
| 424 | internal static int lengthBytesUTF8(string str)
|
---|
| 425 | {
|
---|
| 426 | var len = 0;
|
---|
| 427 | for (var i = 0; i < str.Length; ++i) {
|
---|
| 428 | // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
|
---|
| 429 | // See http://unicode.org/faq/utf_bom.html#utf16-3
|
---|
| 430 | var u = str[i]; // possibly a lead surrogate
|
---|
| 431 | if (u >= 0xD800 && u <= 0xDFFF) u = (char)(0x10000 + ((u & 0x3FF) << 10) | (str[++i] & 0x3FF));
|
---|
| 432 | if (u <= 0x7F) {
|
---|
| 433 | ++len;
|
---|
| 434 | }
|
---|
| 435 | else if (u <= 0x7FF) {
|
---|
| 436 | len += 2;
|
---|
| 437 | }
|
---|
| 438 | else if (u <= 0xFFFF) {
|
---|
| 439 | len += 3;
|
---|
| 440 | }
|
---|
| 441 | else if (u <= 0x1FFFFF) {
|
---|
| 442 | len += 4;
|
---|
| 443 | }
|
---|
| 444 | else if (u <= 0x3FFFFFF) {
|
---|
| 445 | len += 5;
|
---|
| 446 | }
|
---|
| 447 | else {
|
---|
| 448 | len += 6;
|
---|
| 449 | }
|
---|
| 450 | }
|
---|
| 451 | return len;
|
---|
| 452 | }
|
---|
| 453 |
|
---|
| 454 | internal static Uint8Array UTF8StringToArray(string str)
|
---|
| 455 | {
|
---|
| 456 | var len = lengthBytesUTF8(str) + 1;
|
---|
| 457 | var result = new Uint8Array(len);
|
---|
| 458 | stringToUTF8Array(str, result, 0, len);
|
---|
| 459 | return result;
|
---|
| 460 | }
|
---|
| 461 |
|
---|
| 462 | static ulong strtoul(Uint8Array s, int ofs, out Uint8Array endptr, int _base_)
|
---|
| 463 | {
|
---|
| 464 | if (_base_ != 10) throw new AggregateException();
|
---|
| 465 | ulong result;
|
---|
| 466 | if (UInt64.TryParse(UTF8ArrayToString(s.SubArray(ofs, s.Length - ofs + 1), 0), out result)) {
|
---|
| 467 | endptr = null;
|
---|
| 468 | }
|
---|
| 469 | else {
|
---|
| 470 | endptr = s;
|
---|
| 471 | }
|
---|
| 472 | return result;
|
---|
| 473 | }
|
---|
| 474 |
|
---|
| 475 | static double mrb_float_read(Uint8Array s, int ofs, out Uint8Array endptr)
|
---|
| 476 | {
|
---|
| 477 | double result;
|
---|
| 478 | if (Double.TryParse(UTF8ArrayToString(s.SubArray(ofs, s.Length - ofs + 1), 0), out result)) {
|
---|
| 479 | endptr = null;
|
---|
| 480 | }
|
---|
| 481 | else {
|
---|
| 482 | endptr = s;
|
---|
| 483 | }
|
---|
| 484 | return result;
|
---|
| 485 | }
|
---|
| 486 |
|
---|
| 487 | bool identchar(int c) { return (ISALNUM(c) || (c) == '_' || !ISASCII(c)); }
|
---|
| 488 |
|
---|
| 489 | void BITSTACK_PUSH(ref stack_type stack, uint n) { stack = (stack_type)(((uint)stack << 1) | (n & 1)); }
|
---|
| 490 | void BITSTACK_POP(ref stack_type stack) { stack = (stack_type)((uint)stack >> 1); }
|
---|
| 491 | void BITSTACK_LEXPOP(ref stack_type stack) { stack = (stack_type)(((uint)stack >> 1) | ((uint)stack & 1)); }
|
---|
| 492 | stack_type BITSTACK_SET_P(ref stack_type stack) { return (stack_type)((uint)stack & 1); }
|
---|
| 493 |
|
---|
| 494 | void COND_PUSH(uint n) { BITSTACK_PUSH(ref cond_stack, (n)); }
|
---|
| 495 | void COND_POP() { BITSTACK_POP(ref cond_stack); }
|
---|
| 496 | void COND_LEXPOP() { BITSTACK_LEXPOP(ref cond_stack); }
|
---|
| 497 | stack_type COND_P() { return BITSTACK_SET_P(ref cond_stack); }
|
---|
| 498 |
|
---|
| 499 | void CMDARG_PUSH(uint n) { BITSTACK_PUSH(ref cmdarg_stack, (n)); }
|
---|
| 500 | void CMDARG_POP() { BITSTACK_POP(ref cmdarg_stack); }
|
---|
| 501 | void CMDARG_LEXPOP() { BITSTACK_LEXPOP(ref cmdarg_stack); }
|
---|
| 502 | stack_type CMDARG_P() { return BITSTACK_SET_P(ref cmdarg_stack); }
|
---|
| 503 |
|
---|
| 504 | string[] syms = new string[0];
|
---|
| 505 |
|
---|
| 506 | private mrb_sym get_sym(string str)
|
---|
| 507 | {
|
---|
| 508 | int i = syms.IndexOf(str);
|
---|
| 509 | if (i < 0) {
|
---|
| 510 | i = syms.Length;
|
---|
| 511 | syms.Push(str);
|
---|
| 512 | }
|
---|
| 513 | return (mrb_sym)(i + 1);
|
---|
| 514 | }
|
---|
| 515 |
|
---|
| 516 | public string sym2name(mrb_sym sym)
|
---|
| 517 | {
|
---|
| 518 | int i = (int)sym - 1;
|
---|
| 519 | if ((i < 0) || (i >= syms.Length))
|
---|
| 520 | return ((int)sym).ToString();
|
---|
| 521 | return syms[i];
|
---|
| 522 | }
|
---|
| 523 |
|
---|
| 524 | mrb_sym mrb_intern(Uint8Array s, int len)
|
---|
| 525 | {
|
---|
| 526 | string str = UTF8ArrayToString(s.SubArray(0, len + 1), 0);
|
---|
| 527 | return get_sym(str);
|
---|
| 528 | }
|
---|
| 529 |
|
---|
| 530 | mrb_sym intern_cstr(Uint8Array s)
|
---|
| 531 | {
|
---|
| 532 | string str = UTF8ArrayToString(s.SubArray(0, strlen(s, 0) + 1), 0);
|
---|
| 533 | return get_sym(str);
|
---|
| 534 | }
|
---|
| 535 |
|
---|
| 536 | mrb_sym intern(string s, int len)
|
---|
| 537 | {
|
---|
| 538 | string str = s.Substring(0, len);
|
---|
| 539 | return get_sym(str);
|
---|
| 540 | }
|
---|
| 541 |
|
---|
| 542 | mrb_sym intern_c(char c)
|
---|
| 543 | {
|
---|
| 544 | string str = c.ToString();
|
---|
| 545 | return get_sym(str);
|
---|
| 546 | }
|
---|
| 547 |
|
---|
| 548 | public node cons(object car, object cdr)
|
---|
| 549 | {
|
---|
| 550 | return node.cons(this, car, cdr);
|
---|
| 551 | }
|
---|
| 552 |
|
---|
| 553 | public node list1(object a)
|
---|
| 554 | {
|
---|
| 555 | return cons(a, null);
|
---|
| 556 | }
|
---|
| 557 |
|
---|
| 558 | public node list2(object a, object b)
|
---|
| 559 | {
|
---|
| 560 | return cons(a, cons(b, null));
|
---|
| 561 | }
|
---|
| 562 |
|
---|
| 563 | public node list3(object a, object b, object c)
|
---|
| 564 | {
|
---|
| 565 | return cons(a, cons(b, cons(c, null)));
|
---|
| 566 | }
|
---|
| 567 |
|
---|
| 568 | public node list4(object a, object b, object c, object d)
|
---|
| 569 | {
|
---|
| 570 | return cons(a, cons(b, cons(c, cons(d, null))));
|
---|
| 571 | }
|
---|
| 572 |
|
---|
| 573 | public node list5(object a, object b, object c, object d, object e)
|
---|
| 574 | {
|
---|
| 575 | return cons(a, cons(b, cons(c, cons(d, cons(e, null)))));
|
---|
| 576 | }
|
---|
| 577 |
|
---|
| 578 | node append(node a, node b)
|
---|
| 579 | {
|
---|
| 580 | if (a == null) return b;
|
---|
| 581 | a.append(b);
|
---|
| 582 | return a;
|
---|
| 583 | }
|
---|
| 584 |
|
---|
| 585 | node push(node a, object b)
|
---|
| 586 | {
|
---|
| 587 | return append(a, list1(b));
|
---|
| 588 | }
|
---|
| 589 |
|
---|
| 590 | /* xxx ----------------------------- */
|
---|
| 591 |
|
---|
| 592 | locals_node local_switch()
|
---|
| 593 | {
|
---|
| 594 | var prev = this.locals;
|
---|
| 595 | this.locals = new locals_node(null);
|
---|
| 596 | return prev;
|
---|
| 597 | }
|
---|
| 598 |
|
---|
| 599 | void local_resume(locals_node prev)
|
---|
| 600 | {
|
---|
| 601 | this.locals = prev;
|
---|
| 602 | }
|
---|
| 603 |
|
---|
| 604 | void local_nest()
|
---|
| 605 | {
|
---|
| 606 | this.locals = new locals_node(this.locals);
|
---|
| 607 | }
|
---|
| 608 |
|
---|
| 609 | void local_unnest()
|
---|
| 610 | {
|
---|
| 611 | if (this.locals != null) {
|
---|
| 612 | this.locals = this.locals.cdr;
|
---|
| 613 | }
|
---|
| 614 | }
|
---|
| 615 |
|
---|
| 616 | bool local_var_p(mrb_sym sym)
|
---|
| 617 | {
|
---|
| 618 | locals_node l = this.locals;
|
---|
| 619 |
|
---|
| 620 | while (l != null) {
|
---|
| 621 | if (l.symList.Contains(sym))
|
---|
| 622 | return true;
|
---|
| 623 | l = l.cdr;
|
---|
| 624 | }
|
---|
| 625 | return false;
|
---|
| 626 | }
|
---|
| 627 |
|
---|
| 628 | void local_add_f(mrb_sym sym)
|
---|
| 629 | {
|
---|
| 630 | if (this.locals != null) {
|
---|
| 631 | this.locals.push(sym);
|
---|
| 632 | }
|
---|
| 633 | }
|
---|
| 634 |
|
---|
| 635 | void local_add(mrb_sym sym)
|
---|
| 636 | {
|
---|
| 637 | if (!local_var_p(sym)) {
|
---|
| 638 | local_add_f(sym);
|
---|
| 639 | }
|
---|
| 640 | }
|
---|
| 641 |
|
---|
| 642 | public mrb_sym[] locals_node()
|
---|
| 643 | {
|
---|
| 644 | return this.locals != null ? this.locals.symList : null;
|
---|
| 645 | }
|
---|
| 646 |
|
---|
| 647 | /* (:scope (vars..) (prog...)) */
|
---|
| 648 | scope_node new_scope(node body)
|
---|
| 649 | {
|
---|
| 650 | return new scope_node(this, body);
|
---|
| 651 | }
|
---|
| 652 |
|
---|
| 653 | /* (:begin prog...) */
|
---|
| 654 | begin_node new_begin(node body)
|
---|
| 655 | {
|
---|
| 656 | return new begin_node(this, body);
|
---|
| 657 | }
|
---|
| 658 |
|
---|
| 659 | node newline_node(node n)
|
---|
| 660 | {
|
---|
| 661 | return n;
|
---|
| 662 | }
|
---|
| 663 |
|
---|
| 664 | /* (:rescue body rescue else) */
|
---|
| 665 | rescue_node new_rescue(node body, node resq, node els)
|
---|
| 666 | {
|
---|
| 667 | return new rescue_node(this, body, resq, els);
|
---|
| 668 | }
|
---|
| 669 |
|
---|
| 670 | rescue_node new_mod_rescue(node body, node resq)
|
---|
| 671 | {
|
---|
| 672 | return new_rescue(body, list1(list3(null, null, resq)), null);
|
---|
| 673 | }
|
---|
| 674 |
|
---|
| 675 | /* (:ensure body ensure) */
|
---|
| 676 | ensure_node new_ensure(node a, node b)
|
---|
| 677 | {
|
---|
| 678 | return new ensure_node(this, a, b);
|
---|
| 679 | }
|
---|
| 680 |
|
---|
| 681 | /* (:nil) */
|
---|
| 682 | nil_node new_nil()
|
---|
| 683 | {
|
---|
| 684 | return new nil_node(this);
|
---|
| 685 | }
|
---|
| 686 |
|
---|
| 687 | /* (:true) */
|
---|
| 688 | true_node new_true()
|
---|
| 689 | {
|
---|
| 690 | return new true_node(this);
|
---|
| 691 | }
|
---|
| 692 |
|
---|
| 693 | /* (:false) */
|
---|
| 694 | false_node new_false()
|
---|
| 695 | {
|
---|
| 696 | return new false_node(this);
|
---|
| 697 | }
|
---|
| 698 |
|
---|
| 699 | /* (:alias new old) */
|
---|
| 700 | alias_node new_alias(mrb_sym a, mrb_sym b)
|
---|
| 701 | {
|
---|
| 702 | return new alias_node(this, a, b);
|
---|
| 703 | }
|
---|
| 704 |
|
---|
| 705 | /* (:if cond then else) */
|
---|
| 706 | if_node new_if(node a, node b, node c, bool inline = false)
|
---|
| 707 | {
|
---|
| 708 | return new if_node(this, a, b, c, inline);
|
---|
| 709 | }
|
---|
| 710 |
|
---|
| 711 | /* (:unless cond then else) */
|
---|
| 712 | unless_node new_unless(node a, node b, node c)
|
---|
| 713 | {
|
---|
| 714 | return new unless_node(this, a, b, c);
|
---|
| 715 | }
|
---|
| 716 |
|
---|
| 717 | /* (:while cond body) */
|
---|
| 718 | while_node new_while(node a, node b)
|
---|
| 719 | {
|
---|
| 720 | return new while_node(this, a, b);
|
---|
| 721 | }
|
---|
| 722 |
|
---|
| 723 | /* (:until cond body) */
|
---|
| 724 | until_node new_until(node a, node b)
|
---|
| 725 | {
|
---|
| 726 | return new until_node(this, a, b);
|
---|
| 727 | }
|
---|
| 728 |
|
---|
| 729 | /* (:for var obj body) */
|
---|
| 730 | for_node new_for(node v, node o, node b)
|
---|
| 731 | {
|
---|
| 732 | return new for_node(this, v, o, b);
|
---|
| 733 | }
|
---|
| 734 |
|
---|
| 735 | /* (:case a ((when ...) body) ((when...) body)) */
|
---|
| 736 | case_node new_case(node a, node b)
|
---|
| 737 | {
|
---|
| 738 | return new case_node(this, a, b);
|
---|
| 739 | }
|
---|
| 740 |
|
---|
| 741 | /* (:postexe a) */
|
---|
| 742 | postexe_node new_postexe(node a)
|
---|
| 743 | {
|
---|
| 744 | return new postexe_node(this, a);
|
---|
| 745 | }
|
---|
| 746 |
|
---|
| 747 | /* (:self) */
|
---|
| 748 | internal self_node new_self()
|
---|
| 749 | {
|
---|
| 750 | return new self_node(this);
|
---|
| 751 | }
|
---|
| 752 |
|
---|
| 753 | /* (:call a b c) */
|
---|
| 754 | call_node new_call(node a, mrb_sym b, node c, MrbTokens pass)
|
---|
| 755 | {
|
---|
| 756 | return new call_node(this, a, b, c, pass);
|
---|
| 757 | }
|
---|
| 758 |
|
---|
| 759 | /* (:fcall self mid args) */
|
---|
| 760 | fcall_node new_fcall(mrb_sym b, node c)
|
---|
| 761 | {
|
---|
| 762 | return new fcall_node(this, b, c);
|
---|
| 763 | }
|
---|
| 764 |
|
---|
| 765 | /* (:super . c) */
|
---|
| 766 | super_node new_super(node c)
|
---|
| 767 | {
|
---|
| 768 | return new super_node(this, c);
|
---|
| 769 | }
|
---|
| 770 |
|
---|
| 771 | /* (:zsuper) */
|
---|
| 772 | zsuper_node new_zsuper()
|
---|
| 773 | {
|
---|
| 774 | return new zsuper_node(this);
|
---|
| 775 | }
|
---|
| 776 |
|
---|
| 777 | /* (:yield . c) */
|
---|
| 778 | yield_node new_yield(node c)
|
---|
| 779 | {
|
---|
| 780 | return new yield_node(this, c);
|
---|
| 781 | }
|
---|
| 782 |
|
---|
| 783 | /* (:return . c) */
|
---|
| 784 | return_node new_return(node c)
|
---|
| 785 | {
|
---|
| 786 | return new return_node(this, c);
|
---|
| 787 | }
|
---|
| 788 |
|
---|
| 789 | /* (:break . c) */
|
---|
| 790 | break_node new_break(node c)
|
---|
| 791 | {
|
---|
| 792 | return new break_node(this, c);
|
---|
| 793 | }
|
---|
| 794 |
|
---|
| 795 | /* (:next . c) */
|
---|
| 796 | next_node new_next(node c)
|
---|
| 797 | {
|
---|
| 798 | return new next_node(this, c);
|
---|
| 799 | }
|
---|
| 800 |
|
---|
| 801 | /* (:redo) */
|
---|
| 802 | redo_node new_redo()
|
---|
| 803 | {
|
---|
| 804 | return new redo_node(this);
|
---|
| 805 | }
|
---|
| 806 |
|
---|
| 807 | /* (:retry) */
|
---|
| 808 | retry_node new_retry()
|
---|
| 809 | {
|
---|
| 810 | return new retry_node(this);
|
---|
| 811 | }
|
---|
| 812 |
|
---|
| 813 | /* (:dot2 a b) */
|
---|
| 814 | dot2_node new_dot2(node a, node b)
|
---|
| 815 | {
|
---|
| 816 | return new dot2_node(this, a, b);
|
---|
| 817 | }
|
---|
| 818 |
|
---|
| 819 | /* (:dot3 a b) */
|
---|
| 820 | dot3_node new_dot3(node a, node b)
|
---|
| 821 | {
|
---|
| 822 | return new dot3_node(this, a, b);
|
---|
| 823 | }
|
---|
| 824 |
|
---|
| 825 | /* (:colon2 b c) */
|
---|
| 826 | colon2_node new_colon2(node b, mrb_sym c)
|
---|
| 827 | {
|
---|
| 828 | return new colon2_node(this, b, c);
|
---|
| 829 | }
|
---|
| 830 |
|
---|
| 831 | /* (:colon3 . c) */
|
---|
| 832 | colon3_node new_colon3(mrb_sym c)
|
---|
| 833 | {
|
---|
| 834 | return new colon3_node(this, c);
|
---|
| 835 | }
|
---|
| 836 |
|
---|
| 837 | /* (:and a b) */
|
---|
| 838 | and_node new_and(node a, node b)
|
---|
| 839 | {
|
---|
| 840 | return new and_node(this, a, b);
|
---|
| 841 | }
|
---|
| 842 |
|
---|
| 843 | /* (:or a b) */
|
---|
| 844 | or_node new_or(node a, node b)
|
---|
| 845 | {
|
---|
| 846 | return new or_node(this, a, b);
|
---|
| 847 | }
|
---|
| 848 |
|
---|
| 849 | /* (:array a...) */
|
---|
| 850 | array_node new_array(node a)
|
---|
| 851 | {
|
---|
| 852 | return new array_node(this, a);
|
---|
| 853 | }
|
---|
| 854 |
|
---|
| 855 | /* (:splat . a) */
|
---|
| 856 | splat_node new_splat(node a)
|
---|
| 857 | {
|
---|
| 858 | return new splat_node(this, a);
|
---|
| 859 | }
|
---|
| 860 |
|
---|
| 861 | /* (:hash (k . v) (k . v)...) */
|
---|
| 862 | hash_node new_hash(node a)
|
---|
| 863 | {
|
---|
| 864 | return new hash_node(this, a);
|
---|
| 865 | }
|
---|
| 866 |
|
---|
| 867 | /* (:sym . a) */
|
---|
| 868 | sym_node new_sym(mrb_sym sym)
|
---|
| 869 | {
|
---|
| 870 | return new sym_node(this, sym);
|
---|
| 871 | }
|
---|
| 872 |
|
---|
| 873 | mrb_sym new_strsym(node str)
|
---|
| 874 | {
|
---|
| 875 | Uint8Array s;
|
---|
| 876 | int len;
|
---|
| 877 |
|
---|
| 878 | if (str is str_node) {
|
---|
| 879 | s = ((str_node)str).str;
|
---|
| 880 | len = ((str_node)str).len;
|
---|
| 881 | }
|
---|
| 882 | else {
|
---|
| 883 | s = (Uint8Array)((node)str.cdr).car;
|
---|
| 884 | len = (int)((node)str.cdr).cdr;
|
---|
| 885 | }
|
---|
| 886 |
|
---|
| 887 | return mrb_intern(s, len);
|
---|
| 888 | }
|
---|
| 889 |
|
---|
| 890 | /* (:lvar . a) */
|
---|
| 891 | lvar_node new_lvar(mrb_sym sym)
|
---|
| 892 | {
|
---|
| 893 | return new lvar_node(this, sym);
|
---|
| 894 | }
|
---|
| 895 |
|
---|
| 896 | /* (:gvar . a) */
|
---|
| 897 | gvar_node new_gvar(mrb_sym sym)
|
---|
| 898 | {
|
---|
| 899 | return new gvar_node(this, sym);
|
---|
| 900 | }
|
---|
| 901 |
|
---|
| 902 | /* (:ivar . a) */
|
---|
| 903 | ivar_node new_ivar(mrb_sym sym)
|
---|
| 904 | {
|
---|
| 905 | return new ivar_node(this, sym);
|
---|
| 906 | }
|
---|
| 907 |
|
---|
| 908 | /* (:cvar . a) */
|
---|
| 909 | cvar_node new_cvar(mrb_sym sym)
|
---|
| 910 | {
|
---|
| 911 | return new cvar_node(this, sym);
|
---|
| 912 | }
|
---|
| 913 |
|
---|
| 914 | /* (:const . a) */
|
---|
| 915 | const_node new_const(mrb_sym sym)
|
---|
| 916 | {
|
---|
| 917 | return new const_node(this, sym);
|
---|
| 918 | }
|
---|
| 919 |
|
---|
| 920 | /* (:undef a...) */
|
---|
| 921 | undef_node new_undef(mrb_sym sym)
|
---|
| 922 | {
|
---|
| 923 | return new undef_node(this, sym);
|
---|
| 924 | }
|
---|
| 925 |
|
---|
| 926 | /* (:class class super body) */
|
---|
| 927 | class_node new_class(node c, node s, node b)
|
---|
| 928 | {
|
---|
| 929 | return new class_node(this, c, s, b);
|
---|
| 930 | }
|
---|
| 931 |
|
---|
| 932 | /* (:sclass obj body) */
|
---|
| 933 | sclass_node new_sclass(node o, node b)
|
---|
| 934 | {
|
---|
| 935 | return new sclass_node(this, o, b);
|
---|
| 936 | }
|
---|
| 937 |
|
---|
| 938 | /* (:module module body) */
|
---|
| 939 | module_node new_module(node m, node b)
|
---|
| 940 | {
|
---|
| 941 | return new module_node(this, m, b);
|
---|
| 942 | }
|
---|
| 943 |
|
---|
| 944 | /* (:def m lv (arg . body)) */
|
---|
| 945 | def_node new_def(mrb_sym m, node a, node b)
|
---|
| 946 | {
|
---|
| 947 | return new def_node(this, m, a, b);
|
---|
| 948 | }
|
---|
| 949 |
|
---|
| 950 | /* (:sdef obj m lv (arg . body)) */
|
---|
| 951 | sdef_node new_sdef(node o, mrb_sym m, node a, node b)
|
---|
| 952 | {
|
---|
| 953 | return new sdef_node(this, o, m, a, b);
|
---|
| 954 | }
|
---|
| 955 |
|
---|
| 956 | /* (:arg . sym) */
|
---|
| 957 | arg_node new_arg(mrb_sym sym)
|
---|
| 958 | {
|
---|
| 959 | return new arg_node(this, sym);
|
---|
| 960 | }
|
---|
| 961 |
|
---|
| 962 | /* (m o r m2 b) */
|
---|
| 963 | /* m: (a b c) */
|
---|
| 964 | /* o: ((a . e1) (b . e2)) */
|
---|
| 965 | /* r: a */
|
---|
| 966 | /* m2: (a b c) */
|
---|
| 967 | /* b: a */
|
---|
| 968 | node new_args(node m, node opt, mrb_sym rest, node m2, mrb_sym blk)
|
---|
| 969 | {
|
---|
| 970 | node n;
|
---|
| 971 |
|
---|
| 972 | n = cons(m2, blk);
|
---|
| 973 | n = cons(rest, n);
|
---|
| 974 | n = cons(opt, n);
|
---|
| 975 | return cons(m, n);
|
---|
| 976 | }
|
---|
| 977 |
|
---|
| 978 | /* (:block_arg . a) */
|
---|
| 979 | block_arg_node new_block_arg(node a)
|
---|
| 980 | {
|
---|
| 981 | return new block_arg_node(this, a);
|
---|
| 982 | }
|
---|
| 983 |
|
---|
| 984 | /* (:block arg body) */
|
---|
| 985 | block_node new_block(node a, node b, bool brace)
|
---|
| 986 | {
|
---|
| 987 | return new block_node(this, a, b, brace);
|
---|
| 988 | }
|
---|
| 989 |
|
---|
| 990 | /* (:lambda arg body) */
|
---|
| 991 | lambda_node new_lambda(node a, node b)
|
---|
| 992 | {
|
---|
| 993 | return new lambda_node(this, a, b);
|
---|
| 994 | }
|
---|
| 995 |
|
---|
| 996 | /* (:asgn lhs rhs) */
|
---|
| 997 | asgn_node new_asgn(node a, node b)
|
---|
| 998 | {
|
---|
| 999 | return new asgn_node(this, a, b);
|
---|
| 1000 | }
|
---|
| 1001 |
|
---|
| 1002 | /* (:masgn mlhs=(pre rest post) mrhs) */
|
---|
| 1003 | masgn_node new_masgn(node a, node b)
|
---|
| 1004 | {
|
---|
| 1005 | return new masgn_node(this, a, b);
|
---|
| 1006 | }
|
---|
| 1007 |
|
---|
| 1008 | /* (:asgn lhs rhs) */
|
---|
| 1009 | op_asgn_node new_op_asgn(node a, mrb_sym op, node b)
|
---|
| 1010 | {
|
---|
| 1011 | return new op_asgn_node(this, a, op, b);
|
---|
| 1012 | }
|
---|
| 1013 |
|
---|
| 1014 | /* (:int . i) */
|
---|
| 1015 | int_node new_int(Uint8Array s, int _base)
|
---|
| 1016 | {
|
---|
| 1017 | return new int_node(this, s, _base);
|
---|
| 1018 | }
|
---|
| 1019 |
|
---|
| 1020 | int_node new_int(string s, int _base) { return new_int(MrbParser.UTF8StringToArray(s), _base); }
|
---|
| 1021 |
|
---|
| 1022 | /* (:float . i) */
|
---|
| 1023 | float_node new_float(Uint8Array s)
|
---|
| 1024 | {
|
---|
| 1025 | return new float_node(this, s);
|
---|
| 1026 | }
|
---|
| 1027 |
|
---|
| 1028 | /* (:str . (s . len)) */
|
---|
| 1029 | str_node new_str(Uint8Array s, int len)
|
---|
| 1030 | {
|
---|
| 1031 | return new str_node(this, s, len);
|
---|
| 1032 | }
|
---|
| 1033 |
|
---|
| 1034 | str_node new_str(string s, int len) { return new_str(MrbParser.UTF8StringToArray(s), len); }
|
---|
| 1035 |
|
---|
| 1036 | /* (:dstr . a) */
|
---|
| 1037 | internal dstr_node new_dstr(node a)
|
---|
| 1038 | {
|
---|
| 1039 | return new dstr_node(this, a);
|
---|
| 1040 | }
|
---|
| 1041 |
|
---|
| 1042 | /* (:str . (s . len)) */
|
---|
| 1043 | xstr_node new_xstr(Uint8Array s, int len)
|
---|
| 1044 | {
|
---|
| 1045 | return new xstr_node(this, s, len);
|
---|
| 1046 | }
|
---|
| 1047 |
|
---|
| 1048 | /* (:xstr . a) */
|
---|
| 1049 | dxstr_node new_dxstr(node a)
|
---|
| 1050 | {
|
---|
| 1051 | return new dxstr_node(this, a);
|
---|
| 1052 | }
|
---|
| 1053 |
|
---|
| 1054 | /* (:dsym . a) */
|
---|
| 1055 | dsym_node new_dsym(node a)
|
---|
| 1056 | {
|
---|
| 1057 | return new dsym_node(this, a);
|
---|
| 1058 | }
|
---|
| 1059 |
|
---|
| 1060 | /* (:str . (a . a)) */
|
---|
| 1061 | regx_node new_regx(Uint8Array p1, Uint8Array p2, Uint8Array p3)
|
---|
| 1062 | {
|
---|
| 1063 | return new regx_node(this, p1, p2, p3);
|
---|
| 1064 | }
|
---|
| 1065 |
|
---|
| 1066 | /* (:dregx . a) */
|
---|
| 1067 | dregx_node new_dregx(node a, node b)
|
---|
| 1068 | {
|
---|
| 1069 | return new dregx_node(this, a, b);
|
---|
| 1070 | }
|
---|
| 1071 |
|
---|
| 1072 | /* (:backref . n) */
|
---|
| 1073 | back_ref_node new_back_ref(int n)
|
---|
| 1074 | {
|
---|
| 1075 | return new back_ref_node(this, n);
|
---|
| 1076 | }
|
---|
| 1077 |
|
---|
| 1078 | /* (:nthref . n) */
|
---|
| 1079 | nth_ref_node new_nth_ref(int n)
|
---|
| 1080 | {
|
---|
| 1081 | return new nth_ref_node(this, n);
|
---|
| 1082 | }
|
---|
| 1083 |
|
---|
| 1084 | /* (:heredoc . a) */
|
---|
| 1085 | heredoc_node new_heredoc()
|
---|
| 1086 | {
|
---|
| 1087 | return new heredoc_node(this);
|
---|
| 1088 | }
|
---|
| 1089 |
|
---|
| 1090 | void new_bv(mrb_sym id)
|
---|
| 1091 | {
|
---|
| 1092 | }
|
---|
| 1093 |
|
---|
| 1094 | literal_delim_node new_literal_delim()
|
---|
| 1095 | {
|
---|
| 1096 | return new literal_delim_node(this);
|
---|
| 1097 | }
|
---|
| 1098 |
|
---|
| 1099 | /* (:words . a) */
|
---|
| 1100 | words_node new_words(node a)
|
---|
| 1101 | {
|
---|
| 1102 | return new words_node(this, a);
|
---|
| 1103 | }
|
---|
| 1104 |
|
---|
| 1105 | /* (:symbols . a) */
|
---|
| 1106 | symbols_node new_symbols(node a)
|
---|
| 1107 | {
|
---|
| 1108 | return new symbols_node(this, a);
|
---|
| 1109 | }
|
---|
| 1110 |
|
---|
| 1111 | filename_node new_filename(string s)
|
---|
| 1112 | {
|
---|
| 1113 | var str = MrbParser.UTF8StringToArray(s);
|
---|
| 1114 | return new filename_node(this, str, str.Length);
|
---|
| 1115 | }
|
---|
| 1116 |
|
---|
| 1117 | lineno_node new_lineno(int lineno)
|
---|
| 1118 | {
|
---|
| 1119 | return new lineno_node(this, lineno);
|
---|
| 1120 | }
|
---|
| 1121 |
|
---|
| 1122 | /* xxx ----------------------------- */
|
---|
| 1123 |
|
---|
| 1124 | /* (:call a op) */
|
---|
| 1125 | node call_uni_op(node recv, string m)
|
---|
| 1126 | {
|
---|
| 1127 | return new_call(recv, intern_cstr(MrbParser.UTF8StringToArray(m)), null, (MrbTokens)1);
|
---|
| 1128 | }
|
---|
| 1129 |
|
---|
| 1130 | /* (:call a op b) */
|
---|
| 1131 | node call_bin_op(node recv, string m, node arg1)
|
---|
| 1132 | {
|
---|
| 1133 | return new_call(recv, intern_cstr(MrbParser.UTF8StringToArray(m)), list1(list1(arg1)), (MrbTokens)1);
|
---|
| 1134 | }
|
---|
| 1135 |
|
---|
| 1136 | void args_with_block(node a, node b)
|
---|
| 1137 | {
|
---|
| 1138 | if (b != null) {
|
---|
| 1139 | if (a.cdr != null) {
|
---|
| 1140 | yyError("both block arg and actual block given");
|
---|
| 1141 | }
|
---|
| 1142 | a.cdr = b;
|
---|
| 1143 | }
|
---|
| 1144 | }
|
---|
| 1145 |
|
---|
| 1146 | void call_with_block(node a, node b)
|
---|
| 1147 | {
|
---|
| 1148 | switch ((node_type)a.car) {
|
---|
| 1149 | case node_type.NODE_SUPER:
|
---|
| 1150 | ((super_node)a).add_block(b);
|
---|
| 1151 | break;
|
---|
| 1152 | case node_type.NODE_ZSUPER:
|
---|
| 1153 | ((zsuper_node)a).add_block(b);
|
---|
| 1154 | break;
|
---|
| 1155 | case node_type.NODE_CALL:
|
---|
| 1156 | ((call_node)a).add_block(b);
|
---|
| 1157 | break;
|
---|
| 1158 | case node_type.NODE_FCALL:
|
---|
| 1159 | ((fcall_node)a).add_block(b);
|
---|
| 1160 | break;
|
---|
| 1161 | default:
|
---|
| 1162 | break;
|
---|
| 1163 | }
|
---|
| 1164 | }
|
---|
| 1165 |
|
---|
| 1166 | node negate_lit(node n)
|
---|
| 1167 | {
|
---|
| 1168 | return new negate_node(this, n);
|
---|
| 1169 | }
|
---|
| 1170 |
|
---|
| 1171 | static node cond(node n)
|
---|
| 1172 | {
|
---|
| 1173 | return n;
|
---|
| 1174 | }
|
---|
| 1175 |
|
---|
| 1176 | node ret_args(node n)
|
---|
| 1177 | {
|
---|
| 1178 | if (n.cdr != null) {
|
---|
| 1179 | yyError("block argument should not be given");
|
---|
| 1180 | return null;
|
---|
| 1181 | }
|
---|
| 1182 | if (((node)n.car).cdr == null) return (node)((node)n.car).car;
|
---|
| 1183 | return new_array((node)n.car);
|
---|
| 1184 | }
|
---|
| 1185 |
|
---|
| 1186 | void assignable(node lhs)
|
---|
| 1187 | {
|
---|
| 1188 | var lvar = lhs as lvar_node;
|
---|
| 1189 | if (lvar != null) {
|
---|
| 1190 | local_add(lvar.name);
|
---|
| 1191 | }
|
---|
| 1192 | }
|
---|
| 1193 |
|
---|
| 1194 | node var_reference(node lhs)
|
---|
| 1195 | {
|
---|
| 1196 | node n;
|
---|
| 1197 |
|
---|
| 1198 | var lvar = lhs as lvar_node;
|
---|
| 1199 | if (lvar != null) {
|
---|
| 1200 | if (!local_var_p(lvar.name)) {
|
---|
| 1201 | n = new_fcall(lvar.name, null);
|
---|
| 1202 | return n;
|
---|
| 1203 | }
|
---|
| 1204 | }
|
---|
| 1205 |
|
---|
| 1206 | return lhs;
|
---|
| 1207 | }
|
---|
| 1208 |
|
---|
| 1209 | node new_strterm(mrb_string_type type, int term, int paren)
|
---|
| 1210 | {
|
---|
| 1211 | return cons(type, cons(0, cons(paren, term)));
|
---|
| 1212 | }
|
---|
| 1213 |
|
---|
| 1214 | void end_strterm()
|
---|
| 1215 | {
|
---|
| 1216 | this.lex_strterm = null;
|
---|
| 1217 | }
|
---|
| 1218 |
|
---|
| 1219 | parser_heredoc_info parsing_heredoc_inf()
|
---|
| 1220 | {
|
---|
| 1221 | node nd = this.parsing_heredoc;
|
---|
| 1222 | if (nd == null)
|
---|
| 1223 | return null;
|
---|
| 1224 | /* mrb_assert(nd.car.car == node_type.NODE_HEREDOC); */
|
---|
| 1225 | return ((heredoc_node)nd.car).info;
|
---|
| 1226 | }
|
---|
| 1227 |
|
---|
| 1228 | void heredoc_treat_nextline()
|
---|
| 1229 | {
|
---|
| 1230 | if (this.heredocs_from_nextline == null)
|
---|
| 1231 | return;
|
---|
| 1232 | if (this.parsing_heredoc == null) {
|
---|
| 1233 | node n;
|
---|
| 1234 | this.parsing_heredoc = this.heredocs_from_nextline;
|
---|
| 1235 | this.lex_strterm_before_heredoc = this.lex_strterm;
|
---|
| 1236 | this.lex_strterm = new_strterm(parsing_heredoc_inf().type, 0, 0);
|
---|
| 1237 | n = this.all_heredocs;
|
---|
| 1238 | if (n != null) {
|
---|
| 1239 | while (n.cdr != null)
|
---|
| 1240 | n = (node)n.cdr;
|
---|
| 1241 | n.cdr = this.parsing_heredoc;
|
---|
| 1242 | }
|
---|
| 1243 | else {
|
---|
| 1244 | this.all_heredocs = this.parsing_heredoc;
|
---|
| 1245 | }
|
---|
| 1246 | }
|
---|
| 1247 | else {
|
---|
| 1248 | node n, m;
|
---|
| 1249 | m = this.heredocs_from_nextline;
|
---|
| 1250 | while (m.cdr != null)
|
---|
| 1251 | m = (node)m.cdr;
|
---|
| 1252 | n = this.all_heredocs;
|
---|
| 1253 | mrb_assert(n != null);
|
---|
| 1254 | if (n == this.parsing_heredoc) {
|
---|
| 1255 | m.cdr = n;
|
---|
| 1256 | this.all_heredocs = this.heredocs_from_nextline;
|
---|
| 1257 | this.parsing_heredoc = this.heredocs_from_nextline;
|
---|
| 1258 | }
|
---|
| 1259 | else {
|
---|
| 1260 | while (n.cdr != this.parsing_heredoc) {
|
---|
| 1261 | n = (node)n.cdr;
|
---|
| 1262 | mrb_assert(n != null);
|
---|
| 1263 | }
|
---|
| 1264 | m.cdr = n.cdr;
|
---|
| 1265 | n.cdr = this.heredocs_from_nextline;
|
---|
| 1266 | this.parsing_heredoc = this.heredocs_from_nextline;
|
---|
| 1267 | }
|
---|
| 1268 | }
|
---|
| 1269 | this.heredocs_from_nextline = null;
|
---|
| 1270 | }
|
---|
| 1271 |
|
---|
| 1272 | void heredoc_end()
|
---|
| 1273 | {
|
---|
| 1274 | this.parsing_heredoc = (node)this.parsing_heredoc.cdr;
|
---|
| 1275 | if (this.parsing_heredoc == null) {
|
---|
| 1276 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 1277 | this.cmd_start = true;
|
---|
| 1278 | end_strterm();
|
---|
| 1279 | this.lex_strterm = (node)this.lex_strterm_before_heredoc;
|
---|
| 1280 | this.lex_strterm_before_heredoc = null;
|
---|
| 1281 | }
|
---|
| 1282 | else {
|
---|
| 1283 | /* next heredoc */
|
---|
| 1284 | this.lex_strterm.car = parsing_heredoc_inf().type;
|
---|
| 1285 | }
|
---|
| 1286 | }
|
---|
| 1287 |
|
---|
| 1288 | bool is_strterm_type(mrb_string_type str_func)
|
---|
| 1289 | {
|
---|
| 1290 | return (((int)lex_strterm.car) & (int)str_func) != 0;
|
---|
| 1291 | }
|
---|
| 1292 |
|
---|
| 1293 | static Uint8Array begin = MrbParser.UTF8StringToArray("begin");
|
---|
| 1294 | static Uint8Array end = MrbParser.UTF8StringToArray("\n=end");
|
---|
| 1295 |
|
---|
| 1296 | void pushback(int c)
|
---|
| 1297 | {
|
---|
| 1298 | if (c >= 0) {
|
---|
| 1299 | column--;
|
---|
| 1300 | }
|
---|
| 1301 | this.pb = cons(c, this.pb);
|
---|
| 1302 | }
|
---|
| 1303 |
|
---|
| 1304 | void backref_error(node n)
|
---|
| 1305 | {
|
---|
| 1306 | var c = (node_type)n.car;
|
---|
| 1307 |
|
---|
| 1308 | if (c == node_type.NODE_NTH_REF) {
|
---|
| 1309 | yyError("can't set variable ${0}", ((int)n.cdr).ToString());
|
---|
| 1310 | }
|
---|
| 1311 | else if (c == node_type.NODE_BACK_REF) {
|
---|
| 1312 | yyError("can't set variable ${0}", ((char)n.cdr).ToString());
|
---|
| 1313 | }
|
---|
| 1314 | else {
|
---|
| 1315 | //mrb_bug(mrb, "Internal error in backref_error() : n=>car == %S", mrb_fixnum_value(c));
|
---|
| 1316 | }
|
---|
| 1317 | }
|
---|
| 1318 |
|
---|
| 1319 | void void_expr_error(node n)
|
---|
| 1320 | {
|
---|
| 1321 | if (n == null) return;
|
---|
| 1322 | switch ((node_type)n.car) {
|
---|
| 1323 | case node_type.NODE_BREAK:
|
---|
| 1324 | case node_type.NODE_RETURN:
|
---|
| 1325 | case node_type.NODE_NEXT:
|
---|
| 1326 | case node_type.NODE_REDO:
|
---|
| 1327 | case node_type.NODE_RETRY:
|
---|
| 1328 | yyError("void value expression");
|
---|
| 1329 | break;
|
---|
| 1330 | default:
|
---|
| 1331 | break;
|
---|
| 1332 | }
|
---|
| 1333 | }
|
---|
| 1334 |
|
---|
| 1335 | int nextc()
|
---|
| 1336 | {
|
---|
| 1337 | for (;;) {
|
---|
| 1338 | int c;
|
---|
| 1339 |
|
---|
| 1340 | if (this.pb != null) {
|
---|
| 1341 | node tmp;
|
---|
| 1342 |
|
---|
| 1343 | c = (int)this.pb.car;
|
---|
| 1344 | tmp = this.pb;
|
---|
| 1345 | this.pb = (node)this.pb.cdr;
|
---|
| 1346 | }
|
---|
| 1347 | else {
|
---|
| 1348 | #if false
|
---|
| 1349 | if (this.f != null) {
|
---|
| 1350 | if (!this.f.CanRead)
|
---|
| 1351 | break;
|
---|
| 1352 | c = this.f.ReadByte();
|
---|
| 1353 | if (c == -1) break;
|
---|
| 1354 | }
|
---|
| 1355 | else
|
---|
| 1356 | #endif
|
---|
| 1357 | if (this.s == null || this.sp >= this.s.Length) {
|
---|
| 1358 | break;
|
---|
| 1359 | }
|
---|
| 1360 | else {
|
---|
| 1361 | c = (byte)this.s[sp++];
|
---|
| 1362 | }
|
---|
| 1363 | }
|
---|
| 1364 | if (c >= 0) {
|
---|
| 1365 | this.column++;
|
---|
| 1366 | }
|
---|
| 1367 | if (c == '\r') {
|
---|
| 1368 | c = nextc();
|
---|
| 1369 | if (c != '\n') {
|
---|
| 1370 | pushback(c);
|
---|
| 1371 | return '\r';
|
---|
| 1372 | }
|
---|
| 1373 | return c;
|
---|
| 1374 | }
|
---|
| 1375 | return c;
|
---|
| 1376 | }
|
---|
| 1377 |
|
---|
| 1378 | if (this.partial_hook == null) return -1;
|
---|
| 1379 | else {
|
---|
| 1380 | if (this.partial_hook(this) < 0)
|
---|
| 1381 | return -1; /* end of program(s) */
|
---|
| 1382 | return -2; /* end of a file in the program files */
|
---|
| 1383 | }
|
---|
| 1384 | }
|
---|
| 1385 |
|
---|
| 1386 | void skip(char term)
|
---|
| 1387 | {
|
---|
| 1388 | int c;
|
---|
| 1389 |
|
---|
| 1390 | for (;;) {
|
---|
| 1391 | c = nextc();
|
---|
| 1392 | if (c < 0) break;
|
---|
| 1393 | if (c == term) break;
|
---|
| 1394 | }
|
---|
| 1395 | }
|
---|
| 1396 |
|
---|
| 1397 | int peekc_n(int n)
|
---|
| 1398 | {
|
---|
| 1399 | node list = null;
|
---|
| 1400 | int c0;
|
---|
| 1401 |
|
---|
| 1402 | do {
|
---|
| 1403 | c0 = nextc();
|
---|
| 1404 | if (c0 == -1) return c0; /* do not skip partial EOF */
|
---|
| 1405 | if (c0 >= 0) --column;
|
---|
| 1406 | list = push(list, c0);
|
---|
| 1407 | } while (n-- != 0);
|
---|
| 1408 | if (this.pb != null) {
|
---|
| 1409 | this.pb = append(list, this.pb);
|
---|
| 1410 | }
|
---|
| 1411 | else {
|
---|
| 1412 | this.pb = list;
|
---|
| 1413 | }
|
---|
| 1414 | return c0;
|
---|
| 1415 | }
|
---|
| 1416 |
|
---|
| 1417 | bool peek_n(int c, int n)
|
---|
| 1418 | {
|
---|
| 1419 | return peekc_n(n) == c && c >= 0;
|
---|
| 1420 | }
|
---|
| 1421 |
|
---|
| 1422 | bool peek(int c)
|
---|
| 1423 | {
|
---|
| 1424 | return peek_n(c, 0);
|
---|
| 1425 | }
|
---|
| 1426 |
|
---|
| 1427 | bool peeks(Uint8Array s, int p)
|
---|
| 1428 | {
|
---|
| 1429 | int len = strlen(s, p);
|
---|
| 1430 | #if false
|
---|
| 1431 | if (this.f != null) {
|
---|
| 1432 | int n = 0;
|
---|
| 1433 | while (s[p] != 0) {
|
---|
| 1434 | if (!peek_n(s[p++], n++)) return false;
|
---|
| 1435 | }
|
---|
| 1436 | return true;
|
---|
| 1437 | }
|
---|
| 1438 | else
|
---|
| 1439 | #endif
|
---|
| 1440 | if (this.s != null && this.sp + len <= this.s.Length) {
|
---|
| 1441 | if (memcmp(this.s, this.sp, s, p, len) == 0) return true;
|
---|
| 1442 | }
|
---|
| 1443 | return false;
|
---|
| 1444 | }
|
---|
| 1445 |
|
---|
| 1446 | bool skips(Uint8Array s, int p)
|
---|
| 1447 | {
|
---|
| 1448 | int c;
|
---|
| 1449 |
|
---|
| 1450 | for (;;) {
|
---|
| 1451 | /* skip until first char */
|
---|
| 1452 | for (;;) {
|
---|
| 1453 | c = nextc();
|
---|
| 1454 | if (c < 0) return c != 0;
|
---|
| 1455 | if (c == '\n') {
|
---|
| 1456 | this.lineno++;
|
---|
| 1457 | this.column = 0;
|
---|
| 1458 | }
|
---|
| 1459 | if (c == s[p]) break;
|
---|
| 1460 | }
|
---|
| 1461 | p++;
|
---|
| 1462 | if (peeks(s, p)) {
|
---|
| 1463 | int len = strlen(s, p);
|
---|
| 1464 |
|
---|
| 1465 | while (len-- != 0) {
|
---|
| 1466 | if (nextc() == '\n') {
|
---|
| 1467 | this.lineno++;
|
---|
| 1468 | this.column = 0;
|
---|
| 1469 | }
|
---|
| 1470 | }
|
---|
| 1471 | return true;
|
---|
| 1472 | }
|
---|
| 1473 | else {
|
---|
| 1474 | p--;
|
---|
| 1475 | }
|
---|
| 1476 | }
|
---|
| 1477 | }
|
---|
| 1478 |
|
---|
| 1479 | int newtok()
|
---|
| 1480 | {
|
---|
| 1481 | if (this.tokbuf != this.buf) {
|
---|
| 1482 | //delete this.tokbuf;
|
---|
| 1483 | this.tokbuf = this.buf;
|
---|
| 1484 | this.tsiz = MRB_PARSER_TOKBUF_SIZE;
|
---|
| 1485 | }
|
---|
| 1486 | this.tidx = 0;
|
---|
| 1487 | return this.column - 1;
|
---|
| 1488 | }
|
---|
| 1489 |
|
---|
| 1490 | void tokadd(int c)
|
---|
| 1491 | {
|
---|
| 1492 | Uint8Array utf8 = new Uint8Array(4);
|
---|
| 1493 | int len;
|
---|
| 1494 |
|
---|
| 1495 | /* mrb_assert(-0x10FFFF <= c && c <= 0xFF); */
|
---|
| 1496 | if (c >= 0) {
|
---|
| 1497 | /* Single byte from source or non-Unicode escape */
|
---|
| 1498 | utf8[0] = (byte)c;
|
---|
| 1499 | len = 1;
|
---|
| 1500 | }
|
---|
| 1501 | else {
|
---|
| 1502 | /* Unicode character */
|
---|
| 1503 | c = -c;
|
---|
| 1504 | if (c < 0x80) {
|
---|
| 1505 | utf8[0] = (byte)c;
|
---|
| 1506 | len = 1;
|
---|
| 1507 | }
|
---|
| 1508 | else if (c < 0x800) {
|
---|
| 1509 | utf8[0] = (byte)(0xC0 | (c >> 6));
|
---|
| 1510 | utf8[1] = (byte)(0x80 | (c & 0x3F));
|
---|
| 1511 | len = 2;
|
---|
| 1512 | }
|
---|
| 1513 | else if (c < 0x10000) {
|
---|
| 1514 | utf8[0] = (byte)(0xE0 | (c >> 12));
|
---|
| 1515 | utf8[1] = (byte)(0x80 | ((c >> 6) & 0x3F));
|
---|
| 1516 | utf8[2] = (byte)(0x80 | (c & 0x3F));
|
---|
| 1517 | len = 3;
|
---|
| 1518 | }
|
---|
| 1519 | else {
|
---|
| 1520 | utf8[0] = (byte)(0xF0 | (c >> 18));
|
---|
| 1521 | utf8[1] = (byte)(0x80 | ((c >> 12) & 0x3F));
|
---|
| 1522 | utf8[2] = (byte)(0x80 | ((c >> 6) & 0x3F));
|
---|
| 1523 | utf8[3] = (byte)(0x80 | (c & 0x3F));
|
---|
| 1524 | len = 4;
|
---|
| 1525 | }
|
---|
| 1526 | }
|
---|
| 1527 | if (this.tidx + len >= this.tsiz) {
|
---|
| 1528 | if (this.tsiz >= MRB_PARSER_TOKBUF_MAX) {
|
---|
| 1529 | this.tidx += len;
|
---|
| 1530 | return;
|
---|
| 1531 | }
|
---|
| 1532 | this.tsiz *= 2;
|
---|
| 1533 | if (this.tokbuf == this.buf) {
|
---|
| 1534 | this.tokbuf = new Uint8Array(this.tsiz);
|
---|
| 1535 | //for(int i = 0; i < MRB_PARSER_TOKBUF_SIZE; i++) this.tokbuf[i] = this.buf[i];
|
---|
| 1536 | this.tokbuf.Set(this.buf, 0);
|
---|
| 1537 | }
|
---|
| 1538 | else {
|
---|
| 1539 | var buf = new Uint8Array(this.tsiz);
|
---|
| 1540 | buf.Set(this.tokbuf, 0);
|
---|
| 1541 | this.tokbuf = buf;
|
---|
| 1542 | }
|
---|
| 1543 | }
|
---|
| 1544 | for (int i = 0; i < len; i++) {
|
---|
| 1545 | this.tokbuf[this.tidx++] = utf8[i];
|
---|
| 1546 | }
|
---|
| 1547 | }
|
---|
| 1548 |
|
---|
| 1549 | int toklast()
|
---|
| 1550 | {
|
---|
| 1551 | return this.tokbuf[this.tidx - 1];
|
---|
| 1552 | }
|
---|
| 1553 |
|
---|
| 1554 | void tokfix()
|
---|
| 1555 | {
|
---|
| 1556 | if (this.tidx >= MRB_PARSER_TOKBUF_MAX) {
|
---|
| 1557 | this.tidx = MRB_PARSER_TOKBUF_MAX - 1;
|
---|
| 1558 | yyError("string too long (truncated)");
|
---|
| 1559 | }
|
---|
| 1560 | this.tokbuf[this.tidx] = (byte)'\0';
|
---|
| 1561 | }
|
---|
| 1562 |
|
---|
| 1563 | Uint8Array tok()
|
---|
| 1564 | {
|
---|
| 1565 | return this.tokbuf;
|
---|
| 1566 | }
|
---|
| 1567 |
|
---|
| 1568 | int toklen()
|
---|
| 1569 | {
|
---|
| 1570 | return this.tidx;
|
---|
| 1571 | }
|
---|
| 1572 |
|
---|
| 1573 | bool ISASCII(int c) { return ((uint)c <= 0x7f); }
|
---|
| 1574 | bool ISPRINT(int c) { return ((uint)(c - 0x20) < 0x5f); }
|
---|
| 1575 | bool ISSPACE(int c) { return (c == ' ' || (uint)(c - '\t') < 5); }
|
---|
| 1576 | bool ISUPPER(int c) { return ((uint)(c - 'A') < 26); }
|
---|
| 1577 | bool ISLOWER(int c) { return ((uint)(c - 'a') < 26); }
|
---|
| 1578 | bool ISALPHA(int c) { return ((uint)((c | 0x20) - 'a') < 26); }
|
---|
| 1579 | bool ISDIGIT(int c) { return ((uint)(c - '0') < 10); }
|
---|
| 1580 | bool ISXDIGIT(int c) { return (ISDIGIT(c) || (uint)((c | 0x20) - 'a') < 6); }
|
---|
| 1581 | bool ISALNUM(int c) { return (ISALPHA(c) || ISDIGIT(c)); }
|
---|
| 1582 | bool ISBLANK(int c) { return (c == ' ' || c == '\t'); }
|
---|
| 1583 | bool ISCNTRL(int c) { return ((uint)c < 0x20 || c == 0x7f); }
|
---|
| 1584 | int TOUPPER(int c) { return (ISLOWER(c) ? (c & 0x5f) : (c)); }
|
---|
| 1585 | int TOLOWER(int c) { return (ISUPPER(c) ? (c | 0x20) : (c)); }
|
---|
| 1586 |
|
---|
| 1587 | bool IS_ARG() { return (this.lstate == mrb_lex_state_enum.EXPR_ARG || this.lstate == mrb_lex_state_enum.EXPR_CMDARG); }
|
---|
| 1588 | bool IS_END() { return (this.lstate == mrb_lex_state_enum.EXPR_END || this.lstate == mrb_lex_state_enum.EXPR_ENDARG || this.lstate == mrb_lex_state_enum.EXPR_ENDFN); }
|
---|
| 1589 | bool IS_BEG() { return (this.lstate == mrb_lex_state_enum.EXPR_BEG || this.lstate == mrb_lex_state_enum.EXPR_MID || this.lstate == mrb_lex_state_enum.EXPR_VALUE || this.lstate == mrb_lex_state_enum.EXPR_CLASS); }
|
---|
| 1590 | bool IS_SPCARG(int c, bool space_seen) { return (IS_ARG() && space_seen && !ISSPACE(c)); }
|
---|
| 1591 | bool IS_LABEL_POSSIBLE(bool cmd_state) { return ((this.lstate == mrb_lex_state_enum.EXPR_BEG && !cmd_state) || IS_ARG()); }
|
---|
| 1592 | bool IS_LABEL_SUFFIX(int n) { return (peek_n(':', (n)) && !peek_n(':', (n) + 1)); }
|
---|
| 1593 |
|
---|
| 1594 | static int scan_oct(int[] start, int len, ref int retlen)
|
---|
| 1595 | {
|
---|
| 1596 | int s = 0;
|
---|
| 1597 | int retval = 0;
|
---|
| 1598 |
|
---|
| 1599 | /* mrb_assert(len <= 3) */
|
---|
| 1600 | while (len-- != 0 && start[s] >= '0' && start[s] <= '7') {
|
---|
| 1601 | retval <<= 3;
|
---|
| 1602 | retval |= start[s++] - '0';
|
---|
| 1603 | }
|
---|
| 1604 | retlen = s;
|
---|
| 1605 |
|
---|
| 1606 | return retval;
|
---|
| 1607 | }
|
---|
| 1608 |
|
---|
| 1609 | static int scan_hex(int[] start, int len, ref int retlen)
|
---|
| 1610 | {
|
---|
| 1611 | Uint8Array hexdigit = MrbParser.UTF8StringToArray("0123456789abcdef0123456789ABCDEF");
|
---|
| 1612 | int s = 0;
|
---|
| 1613 | int retval = 0;
|
---|
| 1614 | int tmp;
|
---|
| 1615 |
|
---|
| 1616 | /* mrb_assert(len <= 8) */
|
---|
| 1617 | while (len-- != 0 && start[s] != 0 && (tmp = strchr(hexdigit, 0, start[s])) >= 0) {
|
---|
| 1618 | retval <<= 4;
|
---|
| 1619 | retval |= tmp & 15;
|
---|
| 1620 | s++;
|
---|
| 1621 | }
|
---|
| 1622 | retlen = s;
|
---|
| 1623 |
|
---|
| 1624 | return retval;
|
---|
| 1625 | }
|
---|
| 1626 |
|
---|
| 1627 | int read_escape_unicode(int limit)
|
---|
| 1628 | {
|
---|
| 1629 | for (;;) {
|
---|
| 1630 | int c;
|
---|
| 1631 | int[] buf = new int[9];
|
---|
| 1632 | int i;
|
---|
| 1633 |
|
---|
| 1634 | /* Look for opening brace */
|
---|
| 1635 | i = 0;
|
---|
| 1636 | buf[0] = nextc();
|
---|
| 1637 | if (buf[0] < 0) break;
|
---|
| 1638 | if (ISXDIGIT(buf[0])) {
|
---|
| 1639 | /* \uxxxx form */
|
---|
| 1640 | for (i = 1; i < limit; i++) {
|
---|
| 1641 | buf[i] = nextc();
|
---|
| 1642 | if (buf[i] < 0) break;
|
---|
| 1643 | if (!ISXDIGIT(buf[i])) {
|
---|
| 1644 | pushback(buf[i]);
|
---|
| 1645 | break;
|
---|
| 1646 | }
|
---|
| 1647 | }
|
---|
| 1648 | }
|
---|
| 1649 | else {
|
---|
| 1650 | pushback(buf[0]);
|
---|
| 1651 | }
|
---|
| 1652 | c = scan_hex(buf, i, ref i);
|
---|
| 1653 | if (i == 0) {
|
---|
| 1654 | break;
|
---|
| 1655 | }
|
---|
| 1656 | if (c < 0 || c > 0x10FFFF || (c & 0xFFFFF800) == 0xD800) {
|
---|
| 1657 | yyError("Invalid Unicode code point");
|
---|
| 1658 | return -1;
|
---|
| 1659 | }
|
---|
| 1660 | return c;
|
---|
| 1661 | }
|
---|
| 1662 |
|
---|
| 1663 | yyError("Invalid escape character syntax");
|
---|
| 1664 | return -1;
|
---|
| 1665 | }
|
---|
| 1666 |
|
---|
| 1667 | /* Return negative to indicate Unicode code point */
|
---|
| 1668 | int read_escape()
|
---|
| 1669 | {
|
---|
| 1670 | int c;
|
---|
| 1671 |
|
---|
| 1672 | switch (c = nextc()) {
|
---|
| 1673 | case '\\':/* Backslash */
|
---|
| 1674 | return c;
|
---|
| 1675 |
|
---|
| 1676 | case 'n':/* newline */
|
---|
| 1677 | return '\n';
|
---|
| 1678 |
|
---|
| 1679 | case 't':/* horizontal tab */
|
---|
| 1680 | return '\t';
|
---|
| 1681 |
|
---|
| 1682 | case 'r':/* carriage-return */
|
---|
| 1683 | return '\r';
|
---|
| 1684 |
|
---|
| 1685 | case 'f':/* form-feed */
|
---|
| 1686 | return '\f';
|
---|
| 1687 |
|
---|
| 1688 | case 'v':/* vertical tab */
|
---|
| 1689 | return '\v';
|
---|
| 1690 |
|
---|
| 1691 | case 'a':/* alarm(bell) */
|
---|
| 1692 | return '\x07';
|
---|
| 1693 |
|
---|
| 1694 | case 'e':/* escape */
|
---|
| 1695 | return 033;
|
---|
| 1696 | case '0':
|
---|
| 1697 | case '1':
|
---|
| 1698 | case '2':
|
---|
| 1699 | case '3': /* octal constant */
|
---|
| 1700 | case '4':
|
---|
| 1701 | case '5':
|
---|
| 1702 | case '6':
|
---|
| 1703 | case '7': {
|
---|
| 1704 | int[] buf = new int[3];
|
---|
| 1705 | int i;
|
---|
| 1706 |
|
---|
| 1707 | bool error = false;
|
---|
| 1708 | buf[0] = c;
|
---|
| 1709 | for (i = 1; i < 3; i++) {
|
---|
| 1710 | buf[i] = nextc();
|
---|
| 1711 | if (buf[i] < 0) {
|
---|
| 1712 | error = true;
|
---|
| 1713 | break;
|
---|
| 1714 | }
|
---|
| 1715 | if (buf[i] < '0' || '7' < buf[i]) {
|
---|
| 1716 | pushback(buf[i]);
|
---|
| 1717 | break;
|
---|
| 1718 | }
|
---|
| 1719 | }
|
---|
| 1720 | if (error)
|
---|
| 1721 | break;
|
---|
| 1722 | c = scan_oct(buf, i, ref i);
|
---|
| 1723 | }
|
---|
| 1724 | return c;
|
---|
| 1725 |
|
---|
| 1726 | case 'x': /* hex constant */
|
---|
| 1727 | {
|
---|
| 1728 | int[] buf = new int[2];
|
---|
| 1729 | int i;
|
---|
| 1730 |
|
---|
| 1731 | bool error = false;
|
---|
| 1732 | for (i = 0; i < 2; i++) {
|
---|
| 1733 | buf[i] = nextc();
|
---|
| 1734 | if (buf[i] < 0) {
|
---|
| 1735 | error = true;
|
---|
| 1736 | break;
|
---|
| 1737 | }
|
---|
| 1738 | if (!ISXDIGIT(buf[i])) {
|
---|
| 1739 | pushback(buf[i]);
|
---|
| 1740 | break;
|
---|
| 1741 | }
|
---|
| 1742 | }
|
---|
| 1743 | if (error)
|
---|
| 1744 | break;
|
---|
| 1745 | c = scan_hex(buf, i, ref i);
|
---|
| 1746 | if (i == 0) {
|
---|
| 1747 | yyError("Invalid escape character syntax");
|
---|
| 1748 | return 0;
|
---|
| 1749 | }
|
---|
| 1750 | }
|
---|
| 1751 | return c;
|
---|
| 1752 |
|
---|
| 1753 | case 'u': /* Unicode */
|
---|
| 1754 | if (peek('{')) {
|
---|
| 1755 | /* \u{xxxxxxxx} form */
|
---|
| 1756 | nextc();
|
---|
| 1757 | c = read_escape_unicode(8);
|
---|
| 1758 | if (c < 0) return 0;
|
---|
| 1759 | if (nextc() != '}') break;
|
---|
| 1760 | }
|
---|
| 1761 | else {
|
---|
| 1762 | c = read_escape_unicode(4);
|
---|
| 1763 | if (c < 0) return 0;
|
---|
| 1764 | }
|
---|
| 1765 | return -c;
|
---|
| 1766 |
|
---|
| 1767 | case 'b':/* backspace */
|
---|
| 1768 | return '\b';
|
---|
| 1769 |
|
---|
| 1770 | case 's':/* space */
|
---|
| 1771 | return ' ';
|
---|
| 1772 |
|
---|
| 1773 | case 'M':
|
---|
| 1774 | if ((c = nextc()) != '-') {
|
---|
| 1775 | yyError("Invalid escape character syntax");
|
---|
| 1776 | pushback(c);
|
---|
| 1777 | return '\0';
|
---|
| 1778 | }
|
---|
| 1779 | if ((c = nextc()) == '\\') {
|
---|
| 1780 | return read_escape() | 0x80;
|
---|
| 1781 | }
|
---|
| 1782 | else if (c < 0) break;
|
---|
| 1783 | else {
|
---|
| 1784 | return ((c & 0xff) | 0x80);
|
---|
| 1785 | }
|
---|
| 1786 |
|
---|
| 1787 | case 'C':
|
---|
| 1788 | case 'c':
|
---|
| 1789 | if (c == 'C') {
|
---|
| 1790 | if ((c = nextc()) != '-') {
|
---|
| 1791 | yyError("Invalid escape character syntax");
|
---|
| 1792 | pushback(c);
|
---|
| 1793 | return '\0';
|
---|
| 1794 | }
|
---|
| 1795 | }
|
---|
| 1796 | if ((c = nextc()) == '\\') {
|
---|
| 1797 | c = read_escape();
|
---|
| 1798 | }
|
---|
| 1799 | else if (c == '?')
|
---|
| 1800 | return 0177;
|
---|
| 1801 | else if (c < 0) break;
|
---|
| 1802 | return c & 0x9f;
|
---|
| 1803 |
|
---|
| 1804 | case -1:
|
---|
| 1805 | case -2: /* end of a file */
|
---|
| 1806 | break;
|
---|
| 1807 |
|
---|
| 1808 | default:
|
---|
| 1809 | return c;
|
---|
| 1810 | }
|
---|
| 1811 |
|
---|
| 1812 | yyError("Invalid escape character syntax");
|
---|
| 1813 | return '\0';
|
---|
| 1814 | }
|
---|
| 1815 |
|
---|
| 1816 | MrbTokens parse_string()
|
---|
| 1817 | {
|
---|
| 1818 | int c;
|
---|
| 1819 | var type = (mrb_string_type)this.lex_strterm.car;
|
---|
| 1820 | var nest_level = (int)((node)this.lex_strterm.cdr).car;
|
---|
| 1821 | var beg = (int)((node)((node)this.lex_strterm.cdr).cdr).car;
|
---|
| 1822 | var end = (int)((node)((node)this.lex_strterm.cdr).cdr).cdr;
|
---|
| 1823 | var hinf = (type & mrb_string_type.STR_FUNC_HEREDOC) != 0 ? parsing_heredoc_inf() : null;
|
---|
| 1824 | var cmd_state = this.cmd_start;
|
---|
| 1825 |
|
---|
| 1826 | if (beg == 0) beg = -3; /* should never happen */
|
---|
| 1827 | if (end == 0) end = -3;
|
---|
| 1828 | newtok();
|
---|
| 1829 | while ((c = nextc()) != end || nest_level != 0) {
|
---|
| 1830 | if (hinf != null && (c == '\n' || c < 0)) {
|
---|
| 1831 | bool line_head;
|
---|
| 1832 | tokadd('\n');
|
---|
| 1833 | tokfix();
|
---|
| 1834 | this.lineno++;
|
---|
| 1835 | this.column = 0;
|
---|
| 1836 | line_head = hinf.line_head;
|
---|
| 1837 | hinf.line_head = true;
|
---|
| 1838 | if (line_head) {
|
---|
| 1839 | /* check whether end of heredoc */
|
---|
| 1840 | Uint8Array s = tok();
|
---|
| 1841 | int p = 0;
|
---|
| 1842 | int len = toklen();
|
---|
| 1843 | if (hinf.allow_indent) {
|
---|
| 1844 | while (ISSPACE(s[p]) && len > 0) {
|
---|
| 1845 | ++p;
|
---|
| 1846 | --len;
|
---|
| 1847 | }
|
---|
| 1848 | }
|
---|
| 1849 | if ((len - 1 == hinf.term_len) && (strncmp(s, p, hinf.term, 0, len - 1) == 0)) {
|
---|
| 1850 | if (c < 0) {
|
---|
| 1851 | parsing_heredoc = null;
|
---|
| 1852 | }
|
---|
| 1853 | else {
|
---|
| 1854 | return MrbTokens.tHEREDOC_END;
|
---|
| 1855 | }
|
---|
| 1856 | }
|
---|
| 1857 | }
|
---|
| 1858 | if (c < 0) {
|
---|
| 1859 | yyError("can't find heredoc delimiter \"{0}\" anywhere before EOF", MrbParser.UTF8ArrayToString(hinf.term, 0));
|
---|
| 1860 | return 0;
|
---|
| 1861 | }
|
---|
| 1862 | yylval.nd = new_str(tok(), toklen());
|
---|
| 1863 | return MrbTokens.tHD_STRING_MID;
|
---|
| 1864 | }
|
---|
| 1865 | if (c < 0) {
|
---|
| 1866 | yyError("unterminated Uint8Array meets end of file");
|
---|
| 1867 | return 0;
|
---|
| 1868 | }
|
---|
| 1869 | else if (c == beg) {
|
---|
| 1870 | nest_level++;
|
---|
| 1871 | ((node)this.lex_strterm.cdr).car = nest_level;
|
---|
| 1872 | }
|
---|
| 1873 | else if (c == end) {
|
---|
| 1874 | nest_level--;
|
---|
| 1875 | ((node)this.lex_strterm.cdr).car = nest_level;
|
---|
| 1876 | }
|
---|
| 1877 | else if (c == '\\') {
|
---|
| 1878 | c = nextc();
|
---|
| 1879 | if ((type & mrb_string_type.STR_FUNC_EXPAND) != 0) {
|
---|
| 1880 | if (c == end || c == beg) {
|
---|
| 1881 | tokadd(c);
|
---|
| 1882 | }
|
---|
| 1883 | else if (c == '\n') {
|
---|
| 1884 | this.lineno++;
|
---|
| 1885 | this.column = 0;
|
---|
| 1886 | if ((type & mrb_string_type.STR_FUNC_ARRAY) != 0) {
|
---|
| 1887 | tokadd('\n');
|
---|
| 1888 | }
|
---|
| 1889 | }
|
---|
| 1890 | else if ((type & mrb_string_type.STR_FUNC_REGEXP) != 0) {
|
---|
| 1891 | tokadd('\\');
|
---|
| 1892 | tokadd(c);
|
---|
| 1893 | }
|
---|
| 1894 | else if (c == 'u' && peek('{')) {
|
---|
| 1895 | /* \u{xxxx xxxx xxxx} form */
|
---|
| 1896 | nextc();
|
---|
| 1897 | while (true) {
|
---|
| 1898 | do c = nextc(); while (ISSPACE(c));
|
---|
| 1899 | if (c == '}') break;
|
---|
| 1900 | pushback(c);
|
---|
| 1901 | c = read_escape_unicode(8);
|
---|
| 1902 | if (c < 0) break;
|
---|
| 1903 | tokadd(-c);
|
---|
| 1904 | }
|
---|
| 1905 | if (hinf != null)
|
---|
| 1906 | hinf.line_head = false;
|
---|
| 1907 | }
|
---|
| 1908 | else {
|
---|
| 1909 | pushback(c);
|
---|
| 1910 | tokadd(read_escape());
|
---|
| 1911 | if (hinf != null)
|
---|
| 1912 | hinf.line_head = false;
|
---|
| 1913 | }
|
---|
| 1914 | }
|
---|
| 1915 | else {
|
---|
| 1916 | if (c != beg && c != end) {
|
---|
| 1917 | if (c == '\n') {
|
---|
| 1918 | this.lineno++;
|
---|
| 1919 | this.column = 0;
|
---|
| 1920 | }
|
---|
| 1921 | if (!(c == '\\' || ((type & mrb_string_type.STR_FUNC_ARRAY) != 0 && ISSPACE(c)))) {
|
---|
| 1922 | tokadd('\\');
|
---|
| 1923 | }
|
---|
| 1924 | }
|
---|
| 1925 | tokadd(c);
|
---|
| 1926 | }
|
---|
| 1927 | continue;
|
---|
| 1928 | }
|
---|
| 1929 | else if ((c == '#') && (type & mrb_string_type.STR_FUNC_EXPAND) != 0) {
|
---|
| 1930 | c = nextc();
|
---|
| 1931 | if (c == '{') {
|
---|
| 1932 | tokfix();
|
---|
| 1933 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 1934 | this.cmd_start = true;
|
---|
| 1935 | yylval.nd = new_str(tok(), toklen());
|
---|
| 1936 | if (hinf != null) {
|
---|
| 1937 | hinf.line_head = false;
|
---|
| 1938 | return MrbTokens.tHD_STRING_PART;
|
---|
| 1939 | }
|
---|
| 1940 | return MrbTokens.tSTRING_PART;
|
---|
| 1941 | }
|
---|
| 1942 | tokadd('#');
|
---|
| 1943 | pushback(c);
|
---|
| 1944 | continue;
|
---|
| 1945 | }
|
---|
| 1946 | if ((type & mrb_string_type.STR_FUNC_ARRAY) != 0 && ISSPACE(c)) {
|
---|
| 1947 | if (toklen() == 0) {
|
---|
| 1948 | do {
|
---|
| 1949 | if (c == '\n') {
|
---|
| 1950 | this.lineno++;
|
---|
| 1951 | this.column = 0;
|
---|
| 1952 | heredoc_treat_nextline();
|
---|
| 1953 | if (this.parsing_heredoc != null) {
|
---|
| 1954 | return MrbTokens.tHD_LITERAL_DELIM;
|
---|
| 1955 | }
|
---|
| 1956 | }
|
---|
| 1957 | c = nextc();
|
---|
| 1958 | } while (ISSPACE(c));
|
---|
| 1959 | pushback(c);
|
---|
| 1960 | return MrbTokens.tLITERAL_DELIM;
|
---|
| 1961 | }
|
---|
| 1962 | else {
|
---|
| 1963 | pushback(c);
|
---|
| 1964 | tokfix();
|
---|
| 1965 | yylval.nd = new_str(tok(), toklen());
|
---|
| 1966 | return MrbTokens.tSTRING_MID;
|
---|
| 1967 | }
|
---|
| 1968 | }
|
---|
| 1969 | tokadd(c);
|
---|
| 1970 | }
|
---|
| 1971 |
|
---|
| 1972 | tokfix();
|
---|
| 1973 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 1974 | end_strterm();
|
---|
| 1975 |
|
---|
| 1976 | if ((type & mrb_string_type.STR_FUNC_XQUOTE) != 0) {
|
---|
| 1977 | yylval.nd = new_xstr(tok(), toklen());
|
---|
| 1978 | return MrbTokens.tXSTRING;
|
---|
| 1979 | }
|
---|
| 1980 |
|
---|
| 1981 | if ((type & mrb_string_type.STR_FUNC_REGEXP) != 0) {
|
---|
| 1982 | int f = 0;
|
---|
| 1983 | int re_opt;
|
---|
| 1984 | Uint8Array s = strndup(tok(), 0, toklen());
|
---|
| 1985 | Uint8Array flags = new Uint8Array(3);
|
---|
| 1986 | int flag = 0;
|
---|
| 1987 | var enc = (byte)'\0';
|
---|
| 1988 | Uint8Array encp;
|
---|
| 1989 | Uint8Array dup;
|
---|
| 1990 |
|
---|
| 1991 | newtok();
|
---|
| 1992 | while ((re_opt = nextc()) >= 0 && ISALPHA(re_opt)) {
|
---|
| 1993 | switch (re_opt) {
|
---|
| 1994 | case 'i': f |= 1; break;
|
---|
| 1995 | case 'x': f |= 2; break;
|
---|
| 1996 | case 'm': f |= 4; break;
|
---|
| 1997 | case 'u': f |= 16; break;
|
---|
| 1998 | case 'n': f |= 32; break;
|
---|
| 1999 | default: tokadd(re_opt); break;
|
---|
| 2000 | }
|
---|
| 2001 | }
|
---|
| 2002 | pushback(re_opt);
|
---|
| 2003 | if (toklen() != 0) {
|
---|
| 2004 | tokfix();
|
---|
| 2005 | yyError("unknown regexp option%s - %s", toklen() > 1 ? "s" : "", MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
|
---|
| 2006 | }
|
---|
| 2007 | if (f != 0) {
|
---|
| 2008 | if ((f & 1) != 0) flags[flag++] = (byte)'i';
|
---|
| 2009 | if ((f & 2) != 0) flags[flag++] = (byte)'x';
|
---|
| 2010 | if ((f & 4) != 0) flags[flag++] = (byte)'m';
|
---|
| 2011 | if ((f & 16) != 0) enc = (byte)'u';
|
---|
| 2012 | if ((f & 32) != 0) enc = (byte)'n';
|
---|
| 2013 | }
|
---|
| 2014 | if (flag > 0) {
|
---|
| 2015 | dup = strndup(flags, 0, flag);
|
---|
| 2016 | }
|
---|
| 2017 | else {
|
---|
| 2018 | dup = null;
|
---|
| 2019 | }
|
---|
| 2020 | if (enc != 0) {
|
---|
| 2021 | encp = strndup(new Uint8Array(new byte[] { enc }), 0, 1);
|
---|
| 2022 | }
|
---|
| 2023 | else {
|
---|
| 2024 | encp = null;
|
---|
| 2025 | }
|
---|
| 2026 | yylval.nd = new_regx(s, dup, encp);
|
---|
| 2027 |
|
---|
| 2028 | return MrbTokens.tREGEXP;
|
---|
| 2029 | }
|
---|
| 2030 | yylval.nd = new_str(tok(), toklen());
|
---|
| 2031 | if (IS_LABEL_POSSIBLE(cmd_state)) {
|
---|
| 2032 | if (IS_LABEL_SUFFIX(0)) {
|
---|
| 2033 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2034 | nextc();
|
---|
| 2035 | return MrbTokens.tLABEL_END;
|
---|
| 2036 | }
|
---|
| 2037 | }
|
---|
| 2038 | return MrbTokens.tSTRING;
|
---|
| 2039 | }
|
---|
| 2040 |
|
---|
| 2041 | MrbTokens heredoc_identifier()
|
---|
| 2042 | {
|
---|
| 2043 | int c;
|
---|
| 2044 | mrb_string_type type = mrb_string_type.str_heredoc;
|
---|
| 2045 | bool indent = false;
|
---|
| 2046 | bool quote = false;
|
---|
| 2047 | heredoc_node newnode;
|
---|
| 2048 | parser_heredoc_info info;
|
---|
| 2049 |
|
---|
| 2050 | c = nextc();
|
---|
| 2051 | if (ISSPACE(c) || c == '=') {
|
---|
| 2052 | pushback(c);
|
---|
| 2053 | return 0;
|
---|
| 2054 | }
|
---|
| 2055 | if (c == '-') {
|
---|
| 2056 | indent = true;
|
---|
| 2057 | c = nextc();
|
---|
| 2058 | }
|
---|
| 2059 | if (c == '\'' || c == '"') {
|
---|
| 2060 | int term = c;
|
---|
| 2061 | if (c == '\'')
|
---|
| 2062 | quote = true;
|
---|
| 2063 | newtok();
|
---|
| 2064 | while ((c = nextc()) >= 0 && c != term) {
|
---|
| 2065 | if (c == '\n') {
|
---|
| 2066 | c = -1;
|
---|
| 2067 | break;
|
---|
| 2068 | }
|
---|
| 2069 | tokadd(c);
|
---|
| 2070 | }
|
---|
| 2071 | if (c < 0) {
|
---|
| 2072 | yyError("unterminated here document identifier");
|
---|
| 2073 | return 0;
|
---|
| 2074 | }
|
---|
| 2075 | }
|
---|
| 2076 | else {
|
---|
| 2077 | if (c < 0) {
|
---|
| 2078 | return 0; /* missing here document identifier */
|
---|
| 2079 | }
|
---|
| 2080 | if (!identchar(c)) {
|
---|
| 2081 | pushback(c);
|
---|
| 2082 | if (indent) pushback('-');
|
---|
| 2083 | return 0;
|
---|
| 2084 | }
|
---|
| 2085 | newtok();
|
---|
| 2086 | do {
|
---|
| 2087 | tokadd(c);
|
---|
| 2088 | } while ((c = nextc()) >= 0 && identchar(c));
|
---|
| 2089 | pushback(c);
|
---|
| 2090 | }
|
---|
| 2091 | tokfix();
|
---|
| 2092 | newnode = new_heredoc();
|
---|
| 2093 | info = newnode.info;
|
---|
| 2094 | info.term = strndup(tok(), 0, toklen());
|
---|
| 2095 | info.term_len = toklen();
|
---|
| 2096 | if (!quote)
|
---|
| 2097 | type |= mrb_string_type.STR_FUNC_EXPAND;
|
---|
| 2098 | info.type = type;
|
---|
| 2099 | info.allow_indent = indent;
|
---|
| 2100 | info.line_head = true;
|
---|
| 2101 | info.claer_doc();
|
---|
| 2102 | this.heredocs_from_nextline = push(this.heredocs_from_nextline, newnode);
|
---|
| 2103 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 2104 |
|
---|
| 2105 | yylval.nd = newnode;
|
---|
| 2106 | return MrbTokens.tHEREDOC_BEG;
|
---|
| 2107 | }
|
---|
| 2108 |
|
---|
| 2109 | MrbTokens start_num(int c)
|
---|
| 2110 | {
|
---|
| 2111 | int nondigit;
|
---|
| 2112 |
|
---|
| 2113 | nondigit = 0;
|
---|
| 2114 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 2115 | newtok();
|
---|
| 2116 | if (c == '-' || c == '+') {
|
---|
| 2117 | tokadd(c);
|
---|
| 2118 | c = nextc();
|
---|
| 2119 | }
|
---|
| 2120 | if (c == '0') {
|
---|
| 2121 | int start = toklen();
|
---|
| 2122 | c = nextc();
|
---|
| 2123 | if (c == 'x' || c == 'X') {
|
---|
| 2124 | /* hexadecimal */
|
---|
| 2125 | c = nextc();
|
---|
| 2126 | if (c >= 0 && ISXDIGIT(c)) {
|
---|
| 2127 | do {
|
---|
| 2128 | if (c == '_') {
|
---|
| 2129 | if (nondigit != 0) break;
|
---|
| 2130 | nondigit = c;
|
---|
| 2131 | continue;
|
---|
| 2132 | }
|
---|
| 2133 | if (!ISXDIGIT(c)) break;
|
---|
| 2134 | nondigit = 0;
|
---|
| 2135 | tokadd(tolower(c));
|
---|
| 2136 | } while ((c = nextc()) >= 0);
|
---|
| 2137 | }
|
---|
| 2138 | pushback(c);
|
---|
| 2139 | tokfix();
|
---|
| 2140 | if (toklen() == start) {
|
---|
| 2141 | yyError("numeric literal without digits");
|
---|
| 2142 | return 0;
|
---|
| 2143 | }
|
---|
| 2144 | else if (nondigit != 0) return trailing_uc(nondigit, 0);
|
---|
| 2145 | yylval.nd = new_int(tok(), 16);
|
---|
| 2146 | return MrbTokens.tINTEGER;
|
---|
| 2147 | }
|
---|
| 2148 | if (c == 'b' || c == 'B') {
|
---|
| 2149 | /* binary */
|
---|
| 2150 | c = nextc();
|
---|
| 2151 | if (c == '0' || c == '1') {
|
---|
| 2152 | do {
|
---|
| 2153 | if (c == '_') {
|
---|
| 2154 | if (nondigit != 0) break;
|
---|
| 2155 | nondigit = c;
|
---|
| 2156 | continue;
|
---|
| 2157 | }
|
---|
| 2158 | if (c != '0' && c != '1') break;
|
---|
| 2159 | nondigit = 0;
|
---|
| 2160 | tokadd(c);
|
---|
| 2161 | } while ((c = nextc()) >= 0);
|
---|
| 2162 | }
|
---|
| 2163 | pushback(c);
|
---|
| 2164 | tokfix();
|
---|
| 2165 | if (toklen() == start) {
|
---|
| 2166 | yyError("numeric literal without digits");
|
---|
| 2167 | return 0;
|
---|
| 2168 | }
|
---|
| 2169 | else if (nondigit != 0) return trailing_uc(nondigit, 0);
|
---|
| 2170 | yylval.nd = new_int(tok(), 2);
|
---|
| 2171 | return MrbTokens.tINTEGER;
|
---|
| 2172 | }
|
---|
| 2173 | if (c == 'd' || c == 'D') {
|
---|
| 2174 | /* decimal */
|
---|
| 2175 | c = nextc();
|
---|
| 2176 | if (c >= 0 && ISDIGIT(c)) {
|
---|
| 2177 | do {
|
---|
| 2178 | if (c == '_') {
|
---|
| 2179 | if (nondigit != 0) break;
|
---|
| 2180 | nondigit = c;
|
---|
| 2181 | continue;
|
---|
| 2182 | }
|
---|
| 2183 | if (!ISDIGIT(c)) break;
|
---|
| 2184 | nondigit = 0;
|
---|
| 2185 | tokadd(c);
|
---|
| 2186 | } while ((c = nextc()) >= 0);
|
---|
| 2187 | }
|
---|
| 2188 | pushback(c);
|
---|
| 2189 | tokfix();
|
---|
| 2190 | if (toklen() == start) {
|
---|
| 2191 | yyError("numeric literal without digits");
|
---|
| 2192 | return 0;
|
---|
| 2193 | }
|
---|
| 2194 | else if (nondigit != 0) return trailing_uc(nondigit, 0);
|
---|
| 2195 | yylval.nd = new_int(tok(), 10);
|
---|
| 2196 | return MrbTokens.tINTEGER;
|
---|
| 2197 | }
|
---|
| 2198 | if (c == 'o' || c == 'O') {
|
---|
| 2199 | /* prefixed octal */
|
---|
| 2200 | c = nextc();
|
---|
| 2201 | if (c < 0 || c == '_' || !ISDIGIT(c)) {
|
---|
| 2202 | yyError("numeric literal without digits");
|
---|
| 2203 | return 0;
|
---|
| 2204 | }
|
---|
| 2205 | }
|
---|
| 2206 | if ((c == '_')/* 0_0 */ ||
|
---|
| 2207 | (c >= '0' && c <= '7')/* octal */) {
|
---|
| 2208 | do {
|
---|
| 2209 | if (c == '_') {
|
---|
| 2210 | if (nondigit != 0) break;
|
---|
| 2211 | nondigit = c;
|
---|
| 2212 | continue;
|
---|
| 2213 | }
|
---|
| 2214 | if (c < '0' || c > '9') break;
|
---|
| 2215 | if (c > '7') {
|
---|
| 2216 | yyError("Invalid octal digit");
|
---|
| 2217 | return invalid_octal(c, nondigit);
|
---|
| 2218 | }
|
---|
| 2219 | nondigit = 0;
|
---|
| 2220 | tokadd(c);
|
---|
| 2221 | } while ((c = nextc()) >= 0);
|
---|
| 2222 |
|
---|
| 2223 | if (toklen() > start) {
|
---|
| 2224 | pushback(c);
|
---|
| 2225 | tokfix();
|
---|
| 2226 | if (nondigit != 0) return trailing_uc(nondigit, 0);
|
---|
| 2227 | yylval.nd = new_int(tok(), 8);
|
---|
| 2228 | return MrbTokens.tINTEGER;
|
---|
| 2229 | }
|
---|
| 2230 | if (nondigit != 0) {
|
---|
| 2231 | pushback(c);
|
---|
| 2232 | return trailing_uc(nondigit, 0);
|
---|
| 2233 | }
|
---|
| 2234 | }
|
---|
| 2235 |
|
---|
| 2236 | if (c > '7' && c <= '9') {
|
---|
| 2237 | yyError("Invalid octal digit");
|
---|
| 2238 | return invalid_octal(c, nondigit);
|
---|
| 2239 | }
|
---|
| 2240 | else if (c == '.' || c == 'e' || c == 'E') {
|
---|
| 2241 | tokadd('0');
|
---|
| 2242 | return invalid_octal(c, nondigit);
|
---|
| 2243 | }
|
---|
| 2244 | else {
|
---|
| 2245 | pushback(c);
|
---|
| 2246 | yylval.nd = new_int("0", 10);
|
---|
| 2247 | return MrbTokens.tINTEGER;
|
---|
| 2248 | }
|
---|
| 2249 | }
|
---|
| 2250 |
|
---|
| 2251 | return invalid_octal(c, nondigit);
|
---|
| 2252 | }
|
---|
| 2253 |
|
---|
| 2254 | MrbTokens invalid_octal(int c, int nondigit)
|
---|
| 2255 | {
|
---|
| 2256 | int is_float, seen_point, seen_e;
|
---|
| 2257 |
|
---|
| 2258 | is_float = seen_point = seen_e = 0;
|
---|
| 2259 |
|
---|
| 2260 | for (;;) {
|
---|
| 2261 | switch (c) {
|
---|
| 2262 | case '0':
|
---|
| 2263 | case '1':
|
---|
| 2264 | case '2':
|
---|
| 2265 | case '3':
|
---|
| 2266 | case '4':
|
---|
| 2267 | case '5':
|
---|
| 2268 | case '6':
|
---|
| 2269 | case '7':
|
---|
| 2270 | case '8':
|
---|
| 2271 | case '9':
|
---|
| 2272 | nondigit = 0;
|
---|
| 2273 | tokadd(c);
|
---|
| 2274 | break;
|
---|
| 2275 |
|
---|
| 2276 | case '.':
|
---|
| 2277 | if (nondigit != 0) return trailing_uc(nondigit, is_float);
|
---|
| 2278 | if (seen_point != 0 || seen_e != 0) {
|
---|
| 2279 | pushback(c);
|
---|
| 2280 | return trailing_uc(nondigit, is_float);
|
---|
| 2281 | }
|
---|
| 2282 | else {
|
---|
| 2283 | int c0 = nextc();
|
---|
| 2284 | if (c0 < 0 || !ISDIGIT(c0)) {
|
---|
| 2285 | pushback(c0);
|
---|
| 2286 | pushback(c);
|
---|
| 2287 | return trailing_uc(nondigit, is_float);
|
---|
| 2288 | }
|
---|
| 2289 | c = c0;
|
---|
| 2290 | }
|
---|
| 2291 | tokadd('.');
|
---|
| 2292 | tokadd(c);
|
---|
| 2293 | is_float++;
|
---|
| 2294 | seen_point++;
|
---|
| 2295 | nondigit = 0;
|
---|
| 2296 | break;
|
---|
| 2297 |
|
---|
| 2298 | case 'e':
|
---|
| 2299 | case 'E':
|
---|
| 2300 | if (nondigit != 0) {
|
---|
| 2301 | pushback(c);
|
---|
| 2302 | c = nondigit;
|
---|
| 2303 | pushback(c);
|
---|
| 2304 | return trailing_uc(nondigit, is_float);
|
---|
| 2305 | }
|
---|
| 2306 | if (seen_e != 0) {
|
---|
| 2307 | pushback(c);
|
---|
| 2308 | return trailing_uc(nondigit, is_float);
|
---|
| 2309 | }
|
---|
| 2310 | tokadd(c);
|
---|
| 2311 | seen_e++;
|
---|
| 2312 | is_float++;
|
---|
| 2313 | nondigit = c;
|
---|
| 2314 | c = nextc();
|
---|
| 2315 | if (c != '-' && c != '+') continue;
|
---|
| 2316 | tokadd(c);
|
---|
| 2317 | nondigit = c;
|
---|
| 2318 | break;
|
---|
| 2319 |
|
---|
| 2320 | case '_': /* '_' in number just ignored */
|
---|
| 2321 | if (nondigit != 0) {
|
---|
| 2322 | pushback(c);
|
---|
| 2323 | return trailing_uc(nondigit, is_float);
|
---|
| 2324 | }
|
---|
| 2325 | nondigit = c;
|
---|
| 2326 | break;
|
---|
| 2327 |
|
---|
| 2328 | default: {
|
---|
| 2329 | pushback(c);
|
---|
| 2330 | return trailing_uc(nondigit, is_float);
|
---|
| 2331 | }
|
---|
| 2332 | }
|
---|
| 2333 | c = nextc();
|
---|
| 2334 | }
|
---|
| 2335 | }
|
---|
| 2336 |
|
---|
| 2337 | MrbTokens trailing_uc(int nondigit, int is_float)
|
---|
| 2338 | {
|
---|
| 2339 | if (nondigit != 0) {
|
---|
| 2340 | yyError("trailing '{0}' in number", nondigit.ToString());
|
---|
| 2341 | }
|
---|
| 2342 |
|
---|
| 2343 | tokfix();
|
---|
| 2344 | if (is_float != 0) {
|
---|
| 2345 | double d;
|
---|
| 2346 | Uint8Array endp;
|
---|
| 2347 |
|
---|
| 2348 | errno = 0;
|
---|
| 2349 | d = mrb_float_read(tok(), 0, out endp);
|
---|
| 2350 | if (d == 0 && endp == tok()) {
|
---|
| 2351 | yyWarning("corrupted float value {0}", MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
|
---|
| 2352 | }
|
---|
| 2353 | else if (errno == ERANGE) {
|
---|
| 2354 | yyWarning("float {0} out of range", MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
|
---|
| 2355 | errno = 0;
|
---|
| 2356 | }
|
---|
| 2357 | yylval.nd = new_float(tok());
|
---|
| 2358 | return MrbTokens.tFLOAT;
|
---|
| 2359 | }
|
---|
| 2360 | yylval.nd = new_int(tok(), 10);
|
---|
| 2361 | return MrbTokens.tINTEGER;
|
---|
| 2362 | }
|
---|
| 2363 |
|
---|
| 2364 | bool arg_ambiguous()
|
---|
| 2365 | {
|
---|
| 2366 | yyWarning("ambiguous first argument; put parentheses or even spaces");
|
---|
| 2367 | return true;
|
---|
| 2368 | }
|
---|
| 2369 |
|
---|
| 2370 | MrbTokens quotation(int c)
|
---|
| 2371 | {
|
---|
| 2372 | int term;
|
---|
| 2373 | int paren;
|
---|
| 2374 |
|
---|
| 2375 | if (c < 0 || !ISALNUM(c)) {
|
---|
| 2376 | term = c;
|
---|
| 2377 | c = 'Q';
|
---|
| 2378 | }
|
---|
| 2379 | else {
|
---|
| 2380 | term = nextc();
|
---|
| 2381 | if (isalnum(term)) {
|
---|
| 2382 | yyError("unknown type of %string");
|
---|
| 2383 | return 0;
|
---|
| 2384 | }
|
---|
| 2385 | }
|
---|
| 2386 | if (c < 0 || term < 0) {
|
---|
| 2387 | yyError("unterminated quoted string meets end of file");
|
---|
| 2388 | return 0;
|
---|
| 2389 | }
|
---|
| 2390 | paren = term;
|
---|
| 2391 | if (term == '(') term = ')';
|
---|
| 2392 | else if (term == '[') term = ']';
|
---|
| 2393 | else if (term == '{') term = '}';
|
---|
| 2394 | else if (term == '<') term = '>';
|
---|
| 2395 | else paren = 0;
|
---|
| 2396 |
|
---|
| 2397 | switch (c) {
|
---|
| 2398 | case 'Q':
|
---|
| 2399 | this.lex_strterm = new_strterm(mrb_string_type.str_dquote, term, paren);
|
---|
| 2400 | return MrbTokens.tSTRING_BEG;
|
---|
| 2401 |
|
---|
| 2402 | case 'q':
|
---|
| 2403 | this.lex_strterm = new_strterm(mrb_string_type.str_squote, term, paren);
|
---|
| 2404 | return parse_string();
|
---|
| 2405 |
|
---|
| 2406 | case 'W':
|
---|
| 2407 | this.lex_strterm = new_strterm(mrb_string_type.str_dword, term, paren);
|
---|
| 2408 | return MrbTokens.tWORDS_BEG;
|
---|
| 2409 |
|
---|
| 2410 | case 'w':
|
---|
| 2411 | this.lex_strterm = new_strterm(mrb_string_type.str_sword, term, paren);
|
---|
| 2412 | return MrbTokens.tWORDS_BEG;
|
---|
| 2413 |
|
---|
| 2414 | case 'x':
|
---|
| 2415 | this.lex_strterm = new_strterm(mrb_string_type.str_xquote, term, paren);
|
---|
| 2416 | return MrbTokens.tXSTRING_BEG;
|
---|
| 2417 |
|
---|
| 2418 | case 'r':
|
---|
| 2419 | this.lex_strterm = new_strterm(mrb_string_type.str_regexp, term, paren);
|
---|
| 2420 | return MrbTokens.tREGEXP_BEG;
|
---|
| 2421 |
|
---|
| 2422 | case 's':
|
---|
| 2423 | this.lex_strterm = new_strterm(mrb_string_type.str_ssym, term, paren);
|
---|
| 2424 | return MrbTokens.tSYMBEG;
|
---|
| 2425 |
|
---|
| 2426 | case 'I':
|
---|
| 2427 | this.lex_strterm = new_strterm(mrb_string_type.str_dsymbols, term, paren);
|
---|
| 2428 | return MrbTokens.tSYMBOLS_BEG;
|
---|
| 2429 |
|
---|
| 2430 | case 'i':
|
---|
| 2431 | this.lex_strterm = new_strterm(mrb_string_type.str_ssymbols, term, paren);
|
---|
| 2432 | return MrbTokens.tSYMBOLS_BEG;
|
---|
| 2433 |
|
---|
| 2434 | default:
|
---|
| 2435 | yyError("unknown type of %string");
|
---|
| 2436 | return 0;
|
---|
| 2437 | }
|
---|
| 2438 | }
|
---|
| 2439 |
|
---|
| 2440 | static readonly Dictionary<string, kwtable> wordlist = new Dictionary<string, kwtable>()
|
---|
| 2441 | {
|
---|
| 2442 | {"break", new kwtable("break", MrbTokens.keyword_break, MrbTokens.keyword_break, mrb_lex_state_enum.EXPR_MID) },
|
---|
| 2443 | {"else", new kwtable("else", MrbTokens.keyword_else, MrbTokens.keyword_else, mrb_lex_state_enum.EXPR_BEG) },
|
---|
| 2444 | {"nil", new kwtable("nil", MrbTokens.keyword_nil, MrbTokens.keyword_nil, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2445 | {"ensure", new kwtable("ensure", MrbTokens.keyword_ensure, MrbTokens.keyword_ensure, mrb_lex_state_enum.EXPR_BEG) },
|
---|
| 2446 | {"end", new kwtable("end", MrbTokens.keyword_end, MrbTokens.keyword_end, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2447 | {"then", new kwtable("then", MrbTokens.keyword_then, MrbTokens.keyword_then, mrb_lex_state_enum.EXPR_BEG) },
|
---|
| 2448 | {"not", new kwtable("not", MrbTokens.keyword_not, MrbTokens.keyword_not, mrb_lex_state_enum.EXPR_ARG) },
|
---|
| 2449 | {"false", new kwtable("false", MrbTokens.keyword_false, MrbTokens.keyword_false, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2450 | {"self", new kwtable("self", MrbTokens.keyword_self, MrbTokens.keyword_self, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2451 | {"elsif", new kwtable("elsif", MrbTokens.keyword_elsif, MrbTokens.keyword_elsif, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2452 | {"rescue", new kwtable("rescue", MrbTokens.keyword_rescue, MrbTokens.modifier_rescue, mrb_lex_state_enum.EXPR_MID) },
|
---|
| 2453 | {"true", new kwtable("true", MrbTokens.keyword_true, MrbTokens.keyword_true, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2454 | {"until", new kwtable("until", MrbTokens.keyword_until, MrbTokens.modifier_until, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2455 | {"unless", new kwtable("unless", MrbTokens.keyword_unless, MrbTokens.modifier_unless, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2456 | {"return", new kwtable("return", MrbTokens.keyword_return, MrbTokens.keyword_return, mrb_lex_state_enum.EXPR_MID) },
|
---|
| 2457 | {"def", new kwtable("def", MrbTokens.keyword_def, MrbTokens.keyword_def, mrb_lex_state_enum.EXPR_FNAME) },
|
---|
| 2458 | {"and", new kwtable("and", MrbTokens.keyword_and, MrbTokens.keyword_and, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2459 | {"do", new kwtable("do", MrbTokens.keyword_do, MrbTokens.keyword_do, mrb_lex_state_enum.EXPR_BEG) },
|
---|
| 2460 | {"yield", new kwtable("yield", MrbTokens.keyword_yield, MrbTokens.keyword_yield, mrb_lex_state_enum.EXPR_ARG) },
|
---|
| 2461 | {"for", new kwtable("for", MrbTokens.keyword_for, MrbTokens.keyword_for, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2462 | {"undef", new kwtable("undef", MrbTokens.keyword_undef, MrbTokens.keyword_undef, mrb_lex_state_enum.EXPR_FNAME) },
|
---|
| 2463 | {"or", new kwtable("or", MrbTokens.keyword_or, MrbTokens.keyword_or, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2464 | {"in", new kwtable("in", MrbTokens.keyword_in, MrbTokens.keyword_in, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2465 | {"when", new kwtable("when", MrbTokens.keyword_when, MrbTokens.keyword_when, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2466 | {"retry", new kwtable("retry", MrbTokens.keyword_retry, MrbTokens.keyword_retry, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2467 | {"if", new kwtable("if", MrbTokens.keyword_if, MrbTokens.modifier_if, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2468 | {"case", new kwtable("case", MrbTokens.keyword_case, MrbTokens.keyword_case, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2469 | {"redo", new kwtable("redo", MrbTokens.keyword_redo, MrbTokens.keyword_redo, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2470 | {"next", new kwtable("next", MrbTokens.keyword_next, MrbTokens.keyword_next, mrb_lex_state_enum.EXPR_MID) },
|
---|
| 2471 | {"super", new kwtable("super", MrbTokens.keyword_super, MrbTokens.keyword_super, mrb_lex_state_enum.EXPR_ARG) },
|
---|
| 2472 | {"module", new kwtable("module", MrbTokens.keyword_module, MrbTokens.keyword_module, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2473 | {"begin", new kwtable("begin", MrbTokens.keyword_begin, MrbTokens.keyword_begin, mrb_lex_state_enum.EXPR_BEG) },
|
---|
| 2474 | {"__LINE__", new kwtable("__LINE__", MrbTokens.keyword__LINE__, MrbTokens.keyword__LINE__, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2475 | {"__FILE__", new kwtable("__FILE__", MrbTokens.keyword__FILE__, MrbTokens.keyword__FILE__, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2476 | {"__ENCODING__", new kwtable("__ENCODING__", MrbTokens.keyword__ENCODING__, MrbTokens.keyword__ENCODING__, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2477 | {"END", new kwtable("END", MrbTokens.keyword_END, MrbTokens.keyword_END, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2478 | {"alias", new kwtable("alias", MrbTokens.keyword_alias, MrbTokens.keyword_alias, mrb_lex_state_enum.EXPR_FNAME) },
|
---|
| 2479 | {"BEGIN", new kwtable("BEGIN", MrbTokens.keyword_BEGIN, MrbTokens.keyword_BEGIN, mrb_lex_state_enum.EXPR_END) },
|
---|
| 2480 | {"class", new kwtable("class", MrbTokens.keyword_class, MrbTokens.keyword_class, mrb_lex_state_enum.EXPR_CLASS) },
|
---|
| 2481 | {"while", new kwtable("while", MrbTokens.keyword_while, MrbTokens.modifier_while, mrb_lex_state_enum.EXPR_VALUE) },
|
---|
| 2482 | };
|
---|
| 2483 |
|
---|
| 2484 | kwtable mrb_reserved_word(Uint8Array str, int len)
|
---|
| 2485 | {
|
---|
| 2486 | var key = MrbParser.UTF8ArrayToString(str.SubArray(0, len + 1), 0);
|
---|
| 2487 | kwtable result;
|
---|
| 2488 |
|
---|
| 2489 | if (wordlist.TryGetValue(key, out result)) {
|
---|
| 2490 | return result;
|
---|
| 2491 | }
|
---|
| 2492 |
|
---|
| 2493 | return null;
|
---|
| 2494 | }
|
---|
| 2495 |
|
---|
| 2496 | MrbTokens parser_yylex()
|
---|
| 2497 | {
|
---|
| 2498 | int c;
|
---|
| 2499 | bool space_seen = false;
|
---|
| 2500 | bool cmd_state;
|
---|
| 2501 | mrb_lex_state_enum last_state;
|
---|
| 2502 | int token_column = 0;
|
---|
| 2503 |
|
---|
| 2504 | if (this.lex_strterm != null) {
|
---|
| 2505 | if (is_strterm_type(mrb_string_type.STR_FUNC_HEREDOC)) {
|
---|
| 2506 | if (this.parsing_heredoc != null)
|
---|
| 2507 | return parse_string();
|
---|
| 2508 | }
|
---|
| 2509 | else
|
---|
| 2510 | return parse_string();
|
---|
| 2511 | }
|
---|
| 2512 | cmd_state = this.cmd_start;
|
---|
| 2513 | this.cmd_start = false;
|
---|
| 2514 | for (;;) {
|
---|
| 2515 | last_state = this.lstate;
|
---|
| 2516 | switch (c = nextc()) {
|
---|
| 2517 | /* white spaces */
|
---|
| 2518 | case ' ':
|
---|
| 2519 | case '\t':
|
---|
| 2520 | case '\f':
|
---|
| 2521 | case '\r':
|
---|
| 2522 | case '\v':
|
---|
| 2523 | space_seen = true;
|
---|
| 2524 | continue;
|
---|
| 2525 |
|
---|
| 2526 | case '\x04': /* ^D */
|
---|
| 2527 | case '\x1a': /* ^Z */
|
---|
| 2528 | case '\0': /* NUL */
|
---|
| 2529 | case -1: /* end of script. */
|
---|
| 2530 | case '#': /* it's a comment */
|
---|
| 2531 | case -2: /* end of a file */
|
---|
| 2532 | case '\n':
|
---|
| 2533 | if (c == '#') {
|
---|
| 2534 | skip('\n');
|
---|
| 2535 | }
|
---|
| 2536 | else if ((c != -2) && (c != '\n')) {
|
---|
| 2537 | if (this.heredocs_from_nextline == null)
|
---|
| 2538 | return 0;
|
---|
| 2539 | }
|
---|
| 2540 | heredoc_treat_nextline();
|
---|
| 2541 | switch (this.lstate) {
|
---|
| 2542 | case mrb_lex_state_enum.EXPR_BEG:
|
---|
| 2543 | case mrb_lex_state_enum.EXPR_FNAME:
|
---|
| 2544 | case mrb_lex_state_enum.EXPR_DOT:
|
---|
| 2545 | case mrb_lex_state_enum.EXPR_CLASS:
|
---|
| 2546 | case mrb_lex_state_enum.EXPR_VALUE:
|
---|
| 2547 | this.lineno++;
|
---|
| 2548 | this.column = 0;
|
---|
| 2549 | if (this.parsing_heredoc != null) {
|
---|
| 2550 | if (this.lex_strterm != null) {
|
---|
| 2551 | return parse_string();
|
---|
| 2552 | }
|
---|
| 2553 | }
|
---|
| 2554 | continue;
|
---|
| 2555 | default:
|
---|
| 2556 | break;
|
---|
| 2557 | }
|
---|
| 2558 | if (this.parsing_heredoc != null) {
|
---|
| 2559 | return (MrbTokens)'\n';
|
---|
| 2560 | }
|
---|
| 2561 | bool retry = false;
|
---|
| 2562 | while ((c = nextc()) != 0) {
|
---|
| 2563 | switch (c) {
|
---|
| 2564 | case ' ':
|
---|
| 2565 | case '\t':
|
---|
| 2566 | case '\f':
|
---|
| 2567 | case '\r':
|
---|
| 2568 | case '\v':
|
---|
| 2569 | space_seen = true;
|
---|
| 2570 | continue;
|
---|
| 2571 | case '.':
|
---|
| 2572 | if ((c = nextc()) != '.') {
|
---|
| 2573 | pushback(c);
|
---|
| 2574 | pushback('.');
|
---|
| 2575 | retry = true;
|
---|
| 2576 | }
|
---|
| 2577 | break;
|
---|
| 2578 | case -1: /* EOF */
|
---|
| 2579 | case -2: /* end of a file */
|
---|
| 2580 | break;
|
---|
| 2581 | default:
|
---|
| 2582 | pushback(c);
|
---|
| 2583 | break;
|
---|
| 2584 | }
|
---|
| 2585 | break;
|
---|
| 2586 | }
|
---|
| 2587 | if (retry)
|
---|
| 2588 | continue;
|
---|
| 2589 | this.cmd_start = true;
|
---|
| 2590 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2591 | return (MrbTokens)'\n';
|
---|
| 2592 |
|
---|
| 2593 | case '*':
|
---|
| 2594 | if ((c = nextc()) == '*') {
|
---|
| 2595 | if ((c = nextc()) == '=') {
|
---|
| 2596 | yylval.id = intern("**", 2);
|
---|
| 2597 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2598 | return MrbTokens.tOP_ASGN;
|
---|
| 2599 | }
|
---|
| 2600 | pushback(c);
|
---|
| 2601 | c = (int)MrbTokens.tPOW;
|
---|
| 2602 | }
|
---|
| 2603 | else {
|
---|
| 2604 | if (c == '=') {
|
---|
| 2605 | yylval.id = intern_c('*');
|
---|
| 2606 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2607 | return MrbTokens.tOP_ASGN;
|
---|
| 2608 | }
|
---|
| 2609 | pushback(c);
|
---|
| 2610 | if (IS_SPCARG(c, space_seen)) {
|
---|
| 2611 | yyWarning("'*' interpreted as argument prefix");
|
---|
| 2612 | c = (int)MrbTokens.tSTAR;
|
---|
| 2613 | }
|
---|
| 2614 | else if (IS_BEG()) {
|
---|
| 2615 | c = (int)MrbTokens.tSTAR;
|
---|
| 2616 | }
|
---|
| 2617 | else {
|
---|
| 2618 | c = '*';
|
---|
| 2619 | }
|
---|
| 2620 | }
|
---|
| 2621 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2622 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2623 | }
|
---|
| 2624 | else {
|
---|
| 2625 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2626 | }
|
---|
| 2627 | return (MrbTokens)c;
|
---|
| 2628 |
|
---|
| 2629 | case '!':
|
---|
| 2630 | c = nextc();
|
---|
| 2631 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2632 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2633 | if (c == '@') {
|
---|
| 2634 | return (MrbTokens)'!';
|
---|
| 2635 | }
|
---|
| 2636 | }
|
---|
| 2637 | else {
|
---|
| 2638 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2639 | }
|
---|
| 2640 | if (c == '=') {
|
---|
| 2641 | return MrbTokens.tNEQ;
|
---|
| 2642 | }
|
---|
| 2643 | if (c == '~') {
|
---|
| 2644 | return MrbTokens.tNMATCH;
|
---|
| 2645 | }
|
---|
| 2646 | pushback(c);
|
---|
| 2647 | return (MrbTokens)'!';
|
---|
| 2648 |
|
---|
| 2649 | case '=':
|
---|
| 2650 | if (this.column == 1) {
|
---|
| 2651 | if (peeks(begin, 0)) {
|
---|
| 2652 | c = peekc_n(begin.Length - 1);
|
---|
| 2653 | if (c < 0 || ISSPACE(c)) {
|
---|
| 2654 | do {
|
---|
| 2655 | if (!skips(end, 0)) {
|
---|
| 2656 | yyError("embedded document meets end of file");
|
---|
| 2657 | return 0;
|
---|
| 2658 | }
|
---|
| 2659 | c = nextc();
|
---|
| 2660 | } while (!(c < 0 || ISSPACE(c)));
|
---|
| 2661 | if (c != '\n') skip('\n');
|
---|
| 2662 | this.lineno++;
|
---|
| 2663 | this.column = 0;
|
---|
| 2664 | continue;
|
---|
| 2665 | }
|
---|
| 2666 | }
|
---|
| 2667 | }
|
---|
| 2668 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2669 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2670 | }
|
---|
| 2671 | else {
|
---|
| 2672 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2673 | }
|
---|
| 2674 | if ((c = nextc()) == '=') {
|
---|
| 2675 | if ((c = nextc()) == '=') {
|
---|
| 2676 | return MrbTokens.tEQQ;
|
---|
| 2677 | }
|
---|
| 2678 | pushback(c);
|
---|
| 2679 | return MrbTokens.tEQ;
|
---|
| 2680 | }
|
---|
| 2681 | if (c == '~') {
|
---|
| 2682 | return MrbTokens.tMATCH;
|
---|
| 2683 | }
|
---|
| 2684 | else if (c == '>') {
|
---|
| 2685 | return MrbTokens.tASSOC;
|
---|
| 2686 | }
|
---|
| 2687 | pushback(c);
|
---|
| 2688 | return (MrbTokens)'=';
|
---|
| 2689 |
|
---|
| 2690 | case '<':
|
---|
| 2691 | c = nextc();
|
---|
| 2692 | if (c == '<' &&
|
---|
| 2693 | this.lstate != mrb_lex_state_enum.EXPR_DOT &&
|
---|
| 2694 | this.lstate != mrb_lex_state_enum.EXPR_CLASS &&
|
---|
| 2695 | !IS_END() &&
|
---|
| 2696 | (!IS_ARG() || space_seen)) {
|
---|
| 2697 | MrbTokens token = heredoc_identifier();
|
---|
| 2698 | if (token != 0)
|
---|
| 2699 | return token;
|
---|
| 2700 | }
|
---|
| 2701 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2702 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2703 | }
|
---|
| 2704 | else {
|
---|
| 2705 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2706 | if (this.lstate == mrb_lex_state_enum.EXPR_CLASS) {
|
---|
| 2707 | this.cmd_start = true;
|
---|
| 2708 | }
|
---|
| 2709 | }
|
---|
| 2710 | if (c == '=') {
|
---|
| 2711 | if ((c = nextc()) == '>') {
|
---|
| 2712 | return MrbTokens.tCMP;
|
---|
| 2713 | }
|
---|
| 2714 | pushback(c);
|
---|
| 2715 | return MrbTokens.tLEQ;
|
---|
| 2716 | }
|
---|
| 2717 | if (c == '<') {
|
---|
| 2718 | if ((c = nextc()) == '=') {
|
---|
| 2719 | yylval.id = intern("<<", 2);
|
---|
| 2720 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2721 | return MrbTokens.tOP_ASGN;
|
---|
| 2722 | }
|
---|
| 2723 | pushback(c);
|
---|
| 2724 | return MrbTokens.tLSHFT;
|
---|
| 2725 | }
|
---|
| 2726 | pushback(c);
|
---|
| 2727 | return (MrbTokens)'<';
|
---|
| 2728 |
|
---|
| 2729 | case '>':
|
---|
| 2730 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2731 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2732 | }
|
---|
| 2733 | else {
|
---|
| 2734 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2735 | }
|
---|
| 2736 | if ((c = nextc()) == '=') {
|
---|
| 2737 | return MrbTokens.tGEQ;
|
---|
| 2738 | }
|
---|
| 2739 | if (c == '>') {
|
---|
| 2740 | if ((c = nextc()) == '=') {
|
---|
| 2741 | yylval.id = intern(">>", 2);
|
---|
| 2742 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2743 | return MrbTokens.tOP_ASGN;
|
---|
| 2744 | }
|
---|
| 2745 | pushback(c);
|
---|
| 2746 | return MrbTokens.tRSHFT;
|
---|
| 2747 | }
|
---|
| 2748 | pushback(c);
|
---|
| 2749 | return (MrbTokens)'>';
|
---|
| 2750 |
|
---|
| 2751 | case '"':
|
---|
| 2752 | this.lex_strterm = new_strterm(mrb_string_type.str_dquote, '"', 0);
|
---|
| 2753 | return MrbTokens.tSTRING_BEG;
|
---|
| 2754 |
|
---|
| 2755 | case '\'':
|
---|
| 2756 | this.lex_strterm = new_strterm(mrb_string_type.str_squote, '\'', 0);
|
---|
| 2757 | return parse_string();
|
---|
| 2758 |
|
---|
| 2759 | case '`':
|
---|
| 2760 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME) {
|
---|
| 2761 | this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
|
---|
| 2762 | return (MrbTokens)'`';
|
---|
| 2763 | }
|
---|
| 2764 | if (this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2765 | if (cmd_state)
|
---|
| 2766 | this.lstate = mrb_lex_state_enum.EXPR_CMDARG;
|
---|
| 2767 | else
|
---|
| 2768 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2769 | return (MrbTokens)'`';
|
---|
| 2770 | }
|
---|
| 2771 | this.lex_strterm = new_strterm(mrb_string_type.str_xquote, '`', 0);
|
---|
| 2772 | return MrbTokens.tXSTRING_BEG;
|
---|
| 2773 |
|
---|
| 2774 | case '?':
|
---|
| 2775 | if (IS_END()) {
|
---|
| 2776 | this.lstate = mrb_lex_state_enum.EXPR_VALUE;
|
---|
| 2777 | return (MrbTokens)'?';
|
---|
| 2778 | }
|
---|
| 2779 | c = nextc();
|
---|
| 2780 | if (c < 0) {
|
---|
| 2781 | yyError("incomplete character syntax");
|
---|
| 2782 | return 0;
|
---|
| 2783 | }
|
---|
| 2784 | if (ISSPACE(c)) {
|
---|
| 2785 | if (!IS_ARG()) {
|
---|
| 2786 | int c2;
|
---|
| 2787 | switch (c) {
|
---|
| 2788 | case ' ':
|
---|
| 2789 | c2 = 's';
|
---|
| 2790 | break;
|
---|
| 2791 | case '\n':
|
---|
| 2792 | c2 = 'n';
|
---|
| 2793 | break;
|
---|
| 2794 | case '\t':
|
---|
| 2795 | c2 = 't';
|
---|
| 2796 | break;
|
---|
| 2797 | case '\v':
|
---|
| 2798 | c2 = 'v';
|
---|
| 2799 | break;
|
---|
| 2800 | case '\r':
|
---|
| 2801 | c2 = 'r';
|
---|
| 2802 | break;
|
---|
| 2803 | case '\f':
|
---|
| 2804 | c2 = 'f';
|
---|
| 2805 | break;
|
---|
| 2806 | default:
|
---|
| 2807 | c2 = 0;
|
---|
| 2808 | break;
|
---|
| 2809 | }
|
---|
| 2810 | if (c2 != 0) {
|
---|
| 2811 | yyError(String.Format("invalid character syntax; use ?\\{0}", c2));
|
---|
| 2812 | }
|
---|
| 2813 | }
|
---|
| 2814 |
|
---|
| 2815 | pushback(c);
|
---|
| 2816 | this.lstate = mrb_lex_state_enum.EXPR_VALUE;
|
---|
| 2817 | return (MrbTokens)'?';
|
---|
| 2818 | }
|
---|
| 2819 | newtok();
|
---|
| 2820 | /* need support UTF-8 if configured */
|
---|
| 2821 | if ((isalnum(c) || c == '_')) {
|
---|
| 2822 | int c2 = nextc();
|
---|
| 2823 | pushback(c2);
|
---|
| 2824 | if ((isalnum(c2) || c2 == '_')) {
|
---|
| 2825 | pushback(c);
|
---|
| 2826 | this.lstate = mrb_lex_state_enum.EXPR_VALUE;
|
---|
| 2827 | return (MrbTokens)'?';
|
---|
| 2828 | }
|
---|
| 2829 | }
|
---|
| 2830 | if (c == '\\') {
|
---|
| 2831 | c = read_escape();
|
---|
| 2832 | tokadd(c);
|
---|
| 2833 | }
|
---|
| 2834 | else {
|
---|
| 2835 | tokadd(c);
|
---|
| 2836 | }
|
---|
| 2837 | tokfix();
|
---|
| 2838 | yylval.nd = new_str(tok(), toklen());
|
---|
| 2839 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 2840 | return MrbTokens.tCHAR;
|
---|
| 2841 |
|
---|
| 2842 | case '&':
|
---|
| 2843 | if ((c = nextc()) == '&') {
|
---|
| 2844 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2845 | if ((c = nextc()) == '=') {
|
---|
| 2846 | yylval.id = intern("&&", 2);
|
---|
| 2847 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2848 | return MrbTokens.tOP_ASGN;
|
---|
| 2849 | }
|
---|
| 2850 | pushback(c);
|
---|
| 2851 | return MrbTokens.tANDOP;
|
---|
| 2852 | }
|
---|
| 2853 | else if (c == '.') {
|
---|
| 2854 | this.lstate = mrb_lex_state_enum.EXPR_DOT;
|
---|
| 2855 | return MrbTokens.tANDDOT;
|
---|
| 2856 | }
|
---|
| 2857 | else if (c == '=') {
|
---|
| 2858 | yylval.id = intern_c('&');
|
---|
| 2859 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2860 | return MrbTokens.tOP_ASGN;
|
---|
| 2861 | }
|
---|
| 2862 | pushback(c);
|
---|
| 2863 | if (IS_SPCARG(c, space_seen)) {
|
---|
| 2864 | yyWarning("'&' interpreted as argument prefix");
|
---|
| 2865 | c = (int)MrbTokens.tAMPER;
|
---|
| 2866 | }
|
---|
| 2867 | else if (IS_BEG()) {
|
---|
| 2868 | c = (int)MrbTokens.tAMPER;
|
---|
| 2869 | }
|
---|
| 2870 | else {
|
---|
| 2871 | c = '&';
|
---|
| 2872 | }
|
---|
| 2873 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2874 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2875 | }
|
---|
| 2876 | else {
|
---|
| 2877 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2878 | }
|
---|
| 2879 | return (MrbTokens)c;
|
---|
| 2880 |
|
---|
| 2881 | case '|':
|
---|
| 2882 | if ((c = nextc()) == '|') {
|
---|
| 2883 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2884 | if ((c = nextc()) == '=') {
|
---|
| 2885 | yylval.id = intern("||", 2);
|
---|
| 2886 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2887 | return MrbTokens.tOP_ASGN;
|
---|
| 2888 | }
|
---|
| 2889 | pushback(c);
|
---|
| 2890 | return MrbTokens.tOROP;
|
---|
| 2891 | }
|
---|
| 2892 | if (c == '=') {
|
---|
| 2893 | yylval.id = intern_c('|');
|
---|
| 2894 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2895 | return MrbTokens.tOP_ASGN;
|
---|
| 2896 | }
|
---|
| 2897 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2898 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2899 | }
|
---|
| 2900 | else {
|
---|
| 2901 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2902 | }
|
---|
| 2903 | pushback(c);
|
---|
| 2904 | return (MrbTokens)'|';
|
---|
| 2905 |
|
---|
| 2906 | case '+':
|
---|
| 2907 | c = nextc();
|
---|
| 2908 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2909 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2910 | if (c == '@') {
|
---|
| 2911 | return MrbTokens.tUPLUS;
|
---|
| 2912 | }
|
---|
| 2913 | pushback(c);
|
---|
| 2914 | return (MrbTokens)'+';
|
---|
| 2915 | }
|
---|
| 2916 | if (c == '=') {
|
---|
| 2917 | yylval.id = intern_c('+');
|
---|
| 2918 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2919 | return MrbTokens.tOP_ASGN;
|
---|
| 2920 | }
|
---|
| 2921 | if (IS_BEG() || (IS_SPCARG(c, space_seen) && arg_ambiguous())) {
|
---|
| 2922 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2923 | pushback(c);
|
---|
| 2924 | if (c >= 0 && ISDIGIT(c)) {
|
---|
| 2925 | c = '+';
|
---|
| 2926 | return start_num(c);
|
---|
| 2927 | }
|
---|
| 2928 | return MrbTokens.tUPLUS;
|
---|
| 2929 | }
|
---|
| 2930 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2931 | pushback(c);
|
---|
| 2932 | return (MrbTokens)'+';
|
---|
| 2933 |
|
---|
| 2934 | case '-':
|
---|
| 2935 | c = nextc();
|
---|
| 2936 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 2937 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 2938 | if (c == '@') {
|
---|
| 2939 | return MrbTokens.tUMINUS;
|
---|
| 2940 | }
|
---|
| 2941 | pushback(c);
|
---|
| 2942 | return (MrbTokens)'-';
|
---|
| 2943 | }
|
---|
| 2944 | if (c == '=') {
|
---|
| 2945 | yylval.id = intern_c('-');
|
---|
| 2946 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2947 | return MrbTokens.tOP_ASGN;
|
---|
| 2948 | }
|
---|
| 2949 | if (c == '>') {
|
---|
| 2950 | this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
|
---|
| 2951 | return MrbTokens.tLAMBDA;
|
---|
| 2952 | }
|
---|
| 2953 | if (IS_BEG() || (IS_SPCARG(c, space_seen) && arg_ambiguous())) {
|
---|
| 2954 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2955 | pushback(c);
|
---|
| 2956 | if (c >= 0 && ISDIGIT(c)) {
|
---|
| 2957 | return MrbTokens.tUMINUS_NUM;
|
---|
| 2958 | }
|
---|
| 2959 | return MrbTokens.tUMINUS;
|
---|
| 2960 | }
|
---|
| 2961 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2962 | pushback(c);
|
---|
| 2963 | return (MrbTokens)'-';
|
---|
| 2964 |
|
---|
| 2965 | case '.':
|
---|
| 2966 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 2967 | if ((c = nextc()) == '.') {
|
---|
| 2968 | if ((c = nextc()) == '.') {
|
---|
| 2969 | return MrbTokens.tDOT3;
|
---|
| 2970 | }
|
---|
| 2971 | pushback(c);
|
---|
| 2972 | return MrbTokens.tDOT2;
|
---|
| 2973 | }
|
---|
| 2974 | pushback(c);
|
---|
| 2975 | if (c >= 0 && ISDIGIT(c)) {
|
---|
| 2976 | yyError("no .<digit> floating literal anymore; put 0 before dot");
|
---|
| 2977 | }
|
---|
| 2978 | this.lstate = mrb_lex_state_enum.EXPR_DOT;
|
---|
| 2979 | return (MrbTokens)'.';
|
---|
| 2980 |
|
---|
| 2981 | case '0':
|
---|
| 2982 | case '1':
|
---|
| 2983 | case '2':
|
---|
| 2984 | case '3':
|
---|
| 2985 | case '4':
|
---|
| 2986 | case '5':
|
---|
| 2987 | case '6':
|
---|
| 2988 | case '7':
|
---|
| 2989 | case '8':
|
---|
| 2990 | case '9':
|
---|
| 2991 | return start_num(c);
|
---|
| 2992 |
|
---|
| 2993 | case ')':
|
---|
| 2994 | case ']':
|
---|
| 2995 | case '}':
|
---|
| 2996 | if (c != '}') {
|
---|
| 2997 | this.paren_nest--;
|
---|
| 2998 | }
|
---|
| 2999 | COND_LEXPOP();
|
---|
| 3000 | CMDARG_LEXPOP();
|
---|
| 3001 | if (c == ')')
|
---|
| 3002 | this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
|
---|
| 3003 | else
|
---|
| 3004 | this.lstate = mrb_lex_state_enum.EXPR_ENDARG;
|
---|
| 3005 | return (MrbTokens)c;
|
---|
| 3006 |
|
---|
| 3007 | case ':':
|
---|
| 3008 | c = nextc();
|
---|
| 3009 | if (c == ':') {
|
---|
| 3010 | if (IS_BEG() || this.lstate == mrb_lex_state_enum.EXPR_CLASS || IS_SPCARG(-1, space_seen)) {
|
---|
| 3011 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3012 | return MrbTokens.tCOLON3;
|
---|
| 3013 | }
|
---|
| 3014 | this.lstate = mrb_lex_state_enum.EXPR_DOT;
|
---|
| 3015 | return MrbTokens.tCOLON2;
|
---|
| 3016 | }
|
---|
| 3017 | if (IS_END() || ISSPACE(c)) {
|
---|
| 3018 | pushback(c);
|
---|
| 3019 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3020 | return (MrbTokens)':';
|
---|
| 3021 | }
|
---|
| 3022 | pushback(c);
|
---|
| 3023 | this.lstate = mrb_lex_state_enum.EXPR_FNAME;
|
---|
| 3024 | return MrbTokens.tSYMBEG;
|
---|
| 3025 |
|
---|
| 3026 | case '/':
|
---|
| 3027 | if (IS_BEG()) {
|
---|
| 3028 | this.lex_strterm = new_strterm(mrb_string_type.str_regexp, '/', 0);
|
---|
| 3029 | return MrbTokens.tREGEXP_BEG;
|
---|
| 3030 | }
|
---|
| 3031 | if ((c = nextc()) == '=') {
|
---|
| 3032 | yylval.id = intern_c('/');
|
---|
| 3033 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3034 | return MrbTokens.tOP_ASGN;
|
---|
| 3035 | }
|
---|
| 3036 | pushback(c);
|
---|
| 3037 | if (IS_SPCARG(c, space_seen)) {
|
---|
| 3038 | this.lex_strterm = new_strterm(mrb_string_type.str_regexp, '/', 0);
|
---|
| 3039 | return MrbTokens.tREGEXP_BEG;
|
---|
| 3040 | }
|
---|
| 3041 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 3042 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 3043 | }
|
---|
| 3044 | else {
|
---|
| 3045 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3046 | }
|
---|
| 3047 | return (MrbTokens)'/';
|
---|
| 3048 |
|
---|
| 3049 | case '^':
|
---|
| 3050 | if ((c = nextc()) == '=') {
|
---|
| 3051 | yylval.id = intern_c('^');
|
---|
| 3052 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3053 | return MrbTokens.tOP_ASGN;
|
---|
| 3054 | }
|
---|
| 3055 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 3056 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 3057 | }
|
---|
| 3058 | else {
|
---|
| 3059 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3060 | }
|
---|
| 3061 | pushback(c);
|
---|
| 3062 | return (MrbTokens)'^';
|
---|
| 3063 |
|
---|
| 3064 | case ';':
|
---|
| 3065 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3066 | return (MrbTokens)';';
|
---|
| 3067 |
|
---|
| 3068 | case ',':
|
---|
| 3069 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3070 | return (MrbTokens)',';
|
---|
| 3071 |
|
---|
| 3072 | case '~':
|
---|
| 3073 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 3074 | if ((c = nextc()) != '@') {
|
---|
| 3075 | pushback(c);
|
---|
| 3076 | }
|
---|
| 3077 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 3078 | }
|
---|
| 3079 | else {
|
---|
| 3080 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3081 | }
|
---|
| 3082 | return (MrbTokens)'~';
|
---|
| 3083 |
|
---|
| 3084 | case '(':
|
---|
| 3085 | if (IS_BEG()) {
|
---|
| 3086 | c = (int)MrbTokens.tLPAREN;
|
---|
| 3087 | }
|
---|
| 3088 | else if (IS_SPCARG(-1, space_seen)) {
|
---|
| 3089 | c = (int)MrbTokens.tLPAREN_ARG;
|
---|
| 3090 | }
|
---|
| 3091 | this.paren_nest++;
|
---|
| 3092 | COND_PUSH(0);
|
---|
| 3093 | CMDARG_PUSH(0);
|
---|
| 3094 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3095 | return (MrbTokens)c;
|
---|
| 3096 |
|
---|
| 3097 | case '[':
|
---|
| 3098 | this.paren_nest++;
|
---|
| 3099 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 3100 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 3101 | if ((c = nextc()) == ']') {
|
---|
| 3102 | if ((c = nextc()) == '=') {
|
---|
| 3103 | return MrbTokens.tASET;
|
---|
| 3104 | }
|
---|
| 3105 | pushback(c);
|
---|
| 3106 | return MrbTokens.tAREF;
|
---|
| 3107 | }
|
---|
| 3108 | pushback(c);
|
---|
| 3109 | return (MrbTokens)'[';
|
---|
| 3110 | }
|
---|
| 3111 | else if (IS_BEG()) {
|
---|
| 3112 | c = (int)MrbTokens.tLBRACK;
|
---|
| 3113 | }
|
---|
| 3114 | else if (IS_ARG() && space_seen) {
|
---|
| 3115 | c = (int)MrbTokens.tLBRACK;
|
---|
| 3116 | }
|
---|
| 3117 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3118 | COND_PUSH(0);
|
---|
| 3119 | CMDARG_PUSH(0);
|
---|
| 3120 | return (MrbTokens)c;
|
---|
| 3121 |
|
---|
| 3122 | case '{':
|
---|
| 3123 | if (this.lpar_beg != 0 && this.lpar_beg == this.paren_nest) {
|
---|
| 3124 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3125 | this.lpar_beg = 0;
|
---|
| 3126 | this.paren_nest--;
|
---|
| 3127 | COND_PUSH(0);
|
---|
| 3128 | CMDARG_PUSH(0);
|
---|
| 3129 | return MrbTokens.tLAMBEG;
|
---|
| 3130 | }
|
---|
| 3131 | if (IS_ARG() || this.lstate == mrb_lex_state_enum.EXPR_END || this.lstate == mrb_lex_state_enum.EXPR_ENDFN)
|
---|
| 3132 | c = '{'; /* block (primary) */
|
---|
| 3133 | else if (this.lstate == mrb_lex_state_enum.EXPR_ENDARG)
|
---|
| 3134 | c = (int)MrbTokens.tLBRACE_ARG; /* block (expr) */
|
---|
| 3135 | else
|
---|
| 3136 | c = (int)MrbTokens.tLBRACE; /* hash */
|
---|
| 3137 | COND_PUSH(0);
|
---|
| 3138 | CMDARG_PUSH(0);
|
---|
| 3139 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3140 | return (MrbTokens)c;
|
---|
| 3141 |
|
---|
| 3142 | case '\\':
|
---|
| 3143 | c = nextc();
|
---|
| 3144 | if (c == '\n') {
|
---|
| 3145 | this.lineno++;
|
---|
| 3146 | this.column = 0;
|
---|
| 3147 | space_seen = true;
|
---|
| 3148 | continue; /* skip \\n */
|
---|
| 3149 | }
|
---|
| 3150 | pushback(c);
|
---|
| 3151 | return (MrbTokens)'\\';
|
---|
| 3152 |
|
---|
| 3153 | case '%':
|
---|
| 3154 | if (IS_BEG()) {
|
---|
| 3155 | c = nextc();
|
---|
| 3156 | return quotation(c);
|
---|
| 3157 | }
|
---|
| 3158 | for (;;) {
|
---|
| 3159 | if ((c = nextc()) == '=') {
|
---|
| 3160 | yylval.id = intern_c('%');
|
---|
| 3161 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3162 | return MrbTokens.tOP_ASGN;
|
---|
| 3163 | }
|
---|
| 3164 | if (IS_SPCARG(c, space_seen)) {
|
---|
| 3165 | return quotation(c);
|
---|
| 3166 | }
|
---|
| 3167 | break;
|
---|
| 3168 | }
|
---|
| 3169 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 3170 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 3171 | }
|
---|
| 3172 | else {
|
---|
| 3173 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3174 | }
|
---|
| 3175 | pushback(c);
|
---|
| 3176 | return (MrbTokens)'%';
|
---|
| 3177 |
|
---|
| 3178 | case '$':
|
---|
| 3179 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 3180 | token_column = newtok();
|
---|
| 3181 | c = nextc();
|
---|
| 3182 | if (c < 0) {
|
---|
| 3183 | yyError("incomplete global variable syntax");
|
---|
| 3184 | return 0;
|
---|
| 3185 | }
|
---|
| 3186 | switch (c) {
|
---|
| 3187 | case '_': /* $_: last read line string */
|
---|
| 3188 | case '~': /* $~: match-data */
|
---|
| 3189 | case '*': /* $*: argv */
|
---|
| 3190 | case '$': /* $$: pid */
|
---|
| 3191 | case '?': /* $?: last status */
|
---|
| 3192 | case '!': /* $!: error string */
|
---|
| 3193 | case '@': /* $@: error position */
|
---|
| 3194 | case '/': /* $/: input record separator */
|
---|
| 3195 | case '\\': /* $\: output record separator */
|
---|
| 3196 | case ';': /* $;: field separator */
|
---|
| 3197 | case ',': /* $,: output field separator */
|
---|
| 3198 | case '.': /* $.: last read line number */
|
---|
| 3199 | case '=': /* $=: ignorecase */
|
---|
| 3200 | case ':': /* $:: load path */
|
---|
| 3201 | case '<': /* $<: reading filename */
|
---|
| 3202 | case '>': /* $>: default output handle */
|
---|
| 3203 | case '\"': /* $": already loaded files */
|
---|
| 3204 | if (c == '_') {
|
---|
| 3205 | c = nextc();
|
---|
| 3206 | if (c >= 0 && identchar(c)) { /* if there is more after _ it is a variable */
|
---|
| 3207 | tokadd('$');
|
---|
| 3208 | tokadd(c);
|
---|
| 3209 | break;
|
---|
| 3210 | }
|
---|
| 3211 | pushback(c);
|
---|
| 3212 | c = '_';
|
---|
| 3213 | }
|
---|
| 3214 | tokadd('$');
|
---|
| 3215 | tokadd(c);
|
---|
| 3216 | tokfix();
|
---|
| 3217 | yylval.id = intern_cstr(tok());
|
---|
| 3218 | return MrbTokens.tGVAR;
|
---|
| 3219 |
|
---|
| 3220 | case '-':
|
---|
| 3221 | tokadd('$');
|
---|
| 3222 | tokadd(c);
|
---|
| 3223 | c = nextc();
|
---|
| 3224 | pushback(c);
|
---|
| 3225 | tokfix();
|
---|
| 3226 | yylval.id = intern_cstr(tok());
|
---|
| 3227 | return MrbTokens.tGVAR;
|
---|
| 3228 |
|
---|
| 3229 | case '&': /* $&: last match */
|
---|
| 3230 | case '`': /* $`: string before last match */
|
---|
| 3231 | case '\'': /* $': string after last match */
|
---|
| 3232 | case '+': /* $+: string matches last pattern */
|
---|
| 3233 | if (last_state == mrb_lex_state_enum.EXPR_FNAME) {
|
---|
| 3234 | tokadd('$');
|
---|
| 3235 | tokadd(c);
|
---|
| 3236 | tokfix();
|
---|
| 3237 | yylval.id = intern_cstr(tok());
|
---|
| 3238 | return MrbTokens.tGVAR;
|
---|
| 3239 | }
|
---|
| 3240 | yylval.nd = new_back_ref(c);
|
---|
| 3241 | return MrbTokens.tBACK_REF;
|
---|
| 3242 |
|
---|
| 3243 | case '1':
|
---|
| 3244 | case '2':
|
---|
| 3245 | case '3':
|
---|
| 3246 | case '4':
|
---|
| 3247 | case '5':
|
---|
| 3248 | case '6':
|
---|
| 3249 | case '7':
|
---|
| 3250 | case '8':
|
---|
| 3251 | case '9':
|
---|
| 3252 | do {
|
---|
| 3253 | tokadd(c);
|
---|
| 3254 | c = nextc();
|
---|
| 3255 | } while (c >= 0 && isdigit(c));
|
---|
| 3256 | pushback(c);
|
---|
| 3257 | if (last_state == mrb_lex_state_enum.EXPR_FNAME) {
|
---|
| 3258 | tokfix();
|
---|
| 3259 | yylval.id = intern_cstr(tok());
|
---|
| 3260 | return MrbTokens.tGVAR;
|
---|
| 3261 | }
|
---|
| 3262 | tokfix(); {
|
---|
| 3263 | Uint8Array t;
|
---|
| 3264 | ulong n = strtoul(tok(), 0, out t, 10);
|
---|
| 3265 | if (n > int.MaxValue) {
|
---|
| 3266 | yyError("capture group index must be <= {0}", int.MaxValue.ToString());
|
---|
| 3267 | return 0;
|
---|
| 3268 | }
|
---|
| 3269 | yylval.nd = new_nth_ref((int)n);
|
---|
| 3270 | }
|
---|
| 3271 | return MrbTokens.tNTH_REF;
|
---|
| 3272 |
|
---|
| 3273 | default:
|
---|
| 3274 | if (!identchar(c)) {
|
---|
| 3275 | pushback(c);
|
---|
| 3276 | return (MrbTokens)'$';
|
---|
| 3277 | }
|
---|
| 3278 | tokadd('$');
|
---|
| 3279 | break;
|
---|
| 3280 |
|
---|
| 3281 | case '0':
|
---|
| 3282 | tokadd('$');
|
---|
| 3283 | break;
|
---|
| 3284 | }
|
---|
| 3285 | break;
|
---|
| 3286 |
|
---|
| 3287 | case '@':
|
---|
| 3288 | c = nextc();
|
---|
| 3289 | token_column = newtok();
|
---|
| 3290 | tokadd('@');
|
---|
| 3291 | if (c == '@') {
|
---|
| 3292 | tokadd('@');
|
---|
| 3293 | c = nextc();
|
---|
| 3294 | }
|
---|
| 3295 | if (c < 0) {
|
---|
| 3296 | if (this.tidx == 1) {
|
---|
| 3297 | yyError("incomplete instance variable syntax");
|
---|
| 3298 | }
|
---|
| 3299 | else {
|
---|
| 3300 | yyError("incomplete class variable syntax");
|
---|
| 3301 | }
|
---|
| 3302 | return 0;
|
---|
| 3303 | }
|
---|
| 3304 | else if (isdigit(c)) {
|
---|
| 3305 | if (this.tidx == 1) {
|
---|
| 3306 | yyError("'@{0}' is not allowed as an instance variable name", ((char)c).ToString());
|
---|
| 3307 | }
|
---|
| 3308 | else {
|
---|
| 3309 | yyError("'@@{0}' is not allowed as a class variable name", ((char)c).ToString());
|
---|
| 3310 | }
|
---|
| 3311 | return 0;
|
---|
| 3312 | }
|
---|
| 3313 | if (!identchar(c)) {
|
---|
| 3314 | pushback(c);
|
---|
| 3315 | return (MrbTokens)'@';
|
---|
| 3316 | }
|
---|
| 3317 | break;
|
---|
| 3318 |
|
---|
| 3319 | case '_':
|
---|
| 3320 | token_column = newtok();
|
---|
| 3321 | break;
|
---|
| 3322 |
|
---|
| 3323 | default:
|
---|
| 3324 | if (!identchar(c)) {
|
---|
| 3325 | yyError("Invalid char '\\x{0}' in expression", c.ToString("X2"));
|
---|
| 3326 | continue;
|
---|
| 3327 | }
|
---|
| 3328 |
|
---|
| 3329 | token_column = newtok();
|
---|
| 3330 | break;
|
---|
| 3331 | }
|
---|
| 3332 | break;
|
---|
| 3333 | }
|
---|
| 3334 |
|
---|
| 3335 | do {
|
---|
| 3336 | tokadd(c);
|
---|
| 3337 | c = nextc();
|
---|
| 3338 | if (c < 0) break;
|
---|
| 3339 | } while (identchar(c));
|
---|
| 3340 | if (token_column == 0 && toklen() == 7 && (c < 0 || c == '\n') &&
|
---|
| 3341 | strncmp(tok(), 0, MrbParser.UTF8StringToArray("__END__"), 0, toklen()) == 0)
|
---|
| 3342 | return (MrbTokens)(-1);
|
---|
| 3343 |
|
---|
| 3344 | switch ((char)tok()[0]) {
|
---|
| 3345 | case '@':
|
---|
| 3346 | case '$':
|
---|
| 3347 | pushback(c);
|
---|
| 3348 | break;
|
---|
| 3349 | default:
|
---|
| 3350 | if ((c == '!' || c == '?') && !peek('=')) {
|
---|
| 3351 | tokadd(c);
|
---|
| 3352 | }
|
---|
| 3353 | else {
|
---|
| 3354 | pushback(c);
|
---|
| 3355 | }
|
---|
| 3356 | break;
|
---|
| 3357 | }
|
---|
| 3358 | tokfix();
|
---|
| 3359 | {
|
---|
| 3360 | MrbTokens result = 0;
|
---|
| 3361 |
|
---|
| 3362 | switch ((char)tok()[0]) {
|
---|
| 3363 | case '$':
|
---|
| 3364 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 3365 | result = MrbTokens.tGVAR;
|
---|
| 3366 | break;
|
---|
| 3367 | case '@':
|
---|
| 3368 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 3369 | if (tok()[1] == '@')
|
---|
| 3370 | result = MrbTokens.tCVAR;
|
---|
| 3371 | else
|
---|
| 3372 | result = MrbTokens.tIVAR;
|
---|
| 3373 | break;
|
---|
| 3374 |
|
---|
| 3375 | default:
|
---|
| 3376 | if (toklast() == '!' || toklast() == '?') {
|
---|
| 3377 | result = MrbTokens.tFID;
|
---|
| 3378 | }
|
---|
| 3379 | else {
|
---|
| 3380 | if (this.lstate == mrb_lex_state_enum.EXPR_FNAME) {
|
---|
| 3381 | if ((c = nextc()) == '=' && !peek('~') && !peek('>') &&
|
---|
| 3382 | (!peek('=') || (peek_n('>', 1)))) {
|
---|
| 3383 | result = MrbTokens.tIDENTIFIER;
|
---|
| 3384 | tokadd(c);
|
---|
| 3385 | tokfix();
|
---|
| 3386 | }
|
---|
| 3387 | else {
|
---|
| 3388 | pushback(c);
|
---|
| 3389 | }
|
---|
| 3390 | }
|
---|
| 3391 | if (result == 0 && ISUPPER(tok()[0])) {
|
---|
| 3392 | result = MrbTokens.tCONSTANT;
|
---|
| 3393 | }
|
---|
| 3394 | else {
|
---|
| 3395 | result = MrbTokens.tIDENTIFIER;
|
---|
| 3396 | }
|
---|
| 3397 | }
|
---|
| 3398 |
|
---|
| 3399 | if (IS_LABEL_POSSIBLE(cmd_state)) {
|
---|
| 3400 | if (IS_LABEL_SUFFIX(0)) {
|
---|
| 3401 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3402 | nextc();
|
---|
| 3403 | tokfix();
|
---|
| 3404 | yylval.id = intern_cstr(tok());
|
---|
| 3405 | return MrbTokens.tLABEL;
|
---|
| 3406 | }
|
---|
| 3407 | }
|
---|
| 3408 | if (this.lstate != mrb_lex_state_enum.EXPR_DOT) {
|
---|
| 3409 | kwtable kw;
|
---|
| 3410 | /* See if it is a reserved word. */
|
---|
| 3411 | kw = mrb_reserved_word(tok(), toklen());
|
---|
| 3412 | if (kw != null) {
|
---|
| 3413 | mrb_lex_state_enum state = this.lstate;
|
---|
| 3414 | yylval.num = this.lineno;
|
---|
| 3415 | this.lstate = kw.state;
|
---|
| 3416 | if (state == mrb_lex_state_enum.EXPR_FNAME) {
|
---|
| 3417 | yylval.id = intern_cstr(kw.name);
|
---|
| 3418 | return kw.id0;
|
---|
| 3419 | }
|
---|
| 3420 | if (this.lstate == mrb_lex_state_enum.EXPR_BEG) {
|
---|
| 3421 | this.cmd_start = true;
|
---|
| 3422 | }
|
---|
| 3423 | if (kw.id0 == MrbTokens.keyword_do) {
|
---|
| 3424 | if (this.lpar_beg != 0 && this.lpar_beg == this.paren_nest) {
|
---|
| 3425 | this.lpar_beg = 0;
|
---|
| 3426 | this.paren_nest--;
|
---|
| 3427 | return MrbTokens.keyword_do_LAMBDA;
|
---|
| 3428 | }
|
---|
| 3429 | if (COND_P() != 0) return MrbTokens.keyword_do_cond;
|
---|
| 3430 | if (CMDARG_P() != 0 && state != mrb_lex_state_enum.EXPR_CMDARG)
|
---|
| 3431 | return MrbTokens.keyword_do_block;
|
---|
| 3432 | if (state == mrb_lex_state_enum.EXPR_ENDARG || state == mrb_lex_state_enum.EXPR_BEG)
|
---|
| 3433 | return MrbTokens.keyword_do_block;
|
---|
| 3434 | return MrbTokens.keyword_do;
|
---|
| 3435 | }
|
---|
| 3436 | if (state == mrb_lex_state_enum.EXPR_BEG || state == mrb_lex_state_enum.EXPR_VALUE)
|
---|
| 3437 | return kw.id0;
|
---|
| 3438 | else {
|
---|
| 3439 | if (kw.id0 != kw.id1)
|
---|
| 3440 | this.lstate = mrb_lex_state_enum.EXPR_BEG;
|
---|
| 3441 | return kw.id1;
|
---|
| 3442 | }
|
---|
| 3443 | }
|
---|
| 3444 | }
|
---|
| 3445 |
|
---|
| 3446 | if (IS_BEG() || this.lstate == mrb_lex_state_enum.EXPR_DOT || IS_ARG()) {
|
---|
| 3447 | if (cmd_state) {
|
---|
| 3448 | this.lstate = mrb_lex_state_enum.EXPR_CMDARG;
|
---|
| 3449 | }
|
---|
| 3450 | else {
|
---|
| 3451 | this.lstate = mrb_lex_state_enum.EXPR_ARG;
|
---|
| 3452 | }
|
---|
| 3453 | }
|
---|
| 3454 | else if (this.lstate == mrb_lex_state_enum.EXPR_FNAME) {
|
---|
| 3455 | this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
|
---|
| 3456 | }
|
---|
| 3457 | else {
|
---|
| 3458 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 3459 | }
|
---|
| 3460 | break;
|
---|
| 3461 | }
|
---|
| 3462 | {
|
---|
| 3463 | mrb_sym ident = intern_cstr(tok());
|
---|
| 3464 |
|
---|
| 3465 | yylval.id = ident;
|
---|
| 3466 | #if false
|
---|
| 3467 | if (last_state != mrb_lex_state_enum.EXPR_DOT && islower(tok()[0]) && lvar_defined(ident)) {
|
---|
| 3468 | this.lstate = mrb_lex_state_enum.EXPR_END;
|
---|
| 3469 | }
|
---|
| 3470 | #endif
|
---|
| 3471 | }
|
---|
| 3472 | return result;
|
---|
| 3473 | }
|
---|
| 3474 | }
|
---|
| 3475 |
|
---|
| 3476 | private void mrb_parser_parse()
|
---|
| 3477 | {
|
---|
| 3478 | yylval = new MrbToken(filename);
|
---|
| 3479 |
|
---|
| 3480 | try {
|
---|
| 3481 | this.cmd_start = true;
|
---|
| 3482 | this.in_def = this.in_single = 0;
|
---|
| 3483 | this.lex_strterm = null;
|
---|
| 3484 | this.tokbuf = this.buf;
|
---|
| 3485 | this.tsiz = MRB_PARSER_TOKBUF_SIZE;
|
---|
| 3486 |
|
---|
| 3487 | yyParse(this, null);
|
---|
| 3488 | }
|
---|
| 3489 | catch (Exception) {
|
---|
| 3490 | yyError("memory allocation error");
|
---|
| 3491 | this.tree = null;
|
---|
| 3492 | }
|
---|
| 3493 | }
|
---|
| 3494 |
|
---|
| 3495 | public void mrb_parser_set_filename(string f)
|
---|
| 3496 | {
|
---|
| 3497 | int i;
|
---|
| 3498 |
|
---|
| 3499 | this.lineno = (this.filename_table_length > 0) ? 0 : 1;
|
---|
| 3500 |
|
---|
| 3501 | for (i = 0; i < this.filename_table_length; ++i) {
|
---|
| 3502 | if (this.filename_table[i] == f) {
|
---|
| 3503 | this.current_filename_index = i;
|
---|
| 3504 | return;
|
---|
| 3505 | }
|
---|
| 3506 | }
|
---|
| 3507 |
|
---|
| 3508 | this.current_filename_index = this.filename_table_length + 1;
|
---|
| 3509 | this.filename_table.Push(f);
|
---|
| 3510 | }
|
---|
| 3511 |
|
---|
| 3512 | public void mrb_parse_nstring(string filename, Uint8Array s)
|
---|
| 3513 | {
|
---|
| 3514 | mrb_parser_set_filename(filename);
|
---|
| 3515 | this.s = s;
|
---|
| 3516 | this.sp = 0;
|
---|
| 3517 |
|
---|
| 3518 | mrb_parser_parse();
|
---|
| 3519 | }
|
---|
| 3520 |
|
---|
| 3521 | public static node parse(string text, string filename = "temporary.rb")
|
---|
| 3522 | {
|
---|
| 3523 | var generator = new MrbParser();
|
---|
| 3524 | generator.mrb_parse_nstring(filename, UTF8StringToArray(text));
|
---|
| 3525 | var scope = generator.tree as scope_node;
|
---|
| 3526 | if (scope == null)
|
---|
| 3527 | return null;
|
---|
| 3528 | return scope.body;
|
---|
| 3529 | }
|
---|
| 3530 |
|
---|
| 3531 | public static node evaluate(node tree)
|
---|
| 3532 | {
|
---|
| 3533 | var p = tree.p;
|
---|
| 3534 |
|
---|
| 3535 | var node = tree as IEvaluatable;
|
---|
| 3536 | if (node != null)
|
---|
| 3537 | return tree;
|
---|
| 3538 |
|
---|
| 3539 | var begin = tree as begin_node;
|
---|
| 3540 | if (begin != null) {
|
---|
| 3541 | node[] progs = new node[0];
|
---|
| 3542 | foreach (var r in begin.progs) {
|
---|
| 3543 | progs.Push(evaluate(r));
|
---|
| 3544 | }
|
---|
| 3545 | if (progs.Length != 1)
|
---|
| 3546 | return new begin_node(p, progs);
|
---|
| 3547 | return progs[0];
|
---|
| 3548 | }
|
---|
| 3549 |
|
---|
| 3550 | var negate = tree as negate_node;
|
---|
| 3551 | if (negate != null) {
|
---|
| 3552 | var n = evaluate(negate.n);
|
---|
| 3553 | if (n is int_node) {
|
---|
| 3554 | var a = ((int_node)n).to_i();
|
---|
| 3555 | var c = UTF8StringToArray((-a).ToString());
|
---|
| 3556 | return new int_node(p, c, 10);
|
---|
| 3557 | }
|
---|
| 3558 | if (n is float_node) {
|
---|
| 3559 | var a = ((float_node)n).to_f();
|
---|
| 3560 | var c = UTF8StringToArray((-a).ToString());
|
---|
| 3561 | return new float_node(p, c);
|
---|
| 3562 | }
|
---|
| 3563 | return n;
|
---|
| 3564 | }
|
---|
| 3565 |
|
---|
| 3566 | var dot2 = tree as dot2_node;
|
---|
| 3567 | if (dot2 != null) {
|
---|
| 3568 | var a = evaluate(dot2.a);
|
---|
| 3569 | var b = evaluate(dot2.b);
|
---|
| 3570 | return new dot2_node(p, a, b);
|
---|
| 3571 | }
|
---|
| 3572 |
|
---|
| 3573 | var dot3 = tree as dot3_node;
|
---|
| 3574 | if (dot3 != null) {
|
---|
| 3575 | var a = evaluate(dot3.a);
|
---|
| 3576 | var b = evaluate(dot3.b);
|
---|
| 3577 | return new dot3_node(p, a, b);
|
---|
| 3578 | }
|
---|
| 3579 |
|
---|
| 3580 | var call = tree as call_node;
|
---|
| 3581 | if (call != null) {
|
---|
| 3582 | var obj = evaluate(call.obj);
|
---|
| 3583 | var args = new node[0];
|
---|
| 3584 | foreach (var a in call.args) {
|
---|
| 3585 | args.Push(evaluate(a));
|
---|
| 3586 | }
|
---|
| 3587 |
|
---|
| 3588 | var eva = obj as IEvaluatable;
|
---|
| 3589 | if (eva != null) {
|
---|
| 3590 | node ret;
|
---|
| 3591 | if ((ret = eva.evaluate(p.sym2name(call.method), args)) != null)
|
---|
| 3592 | return ret;
|
---|
| 3593 | }
|
---|
| 3594 | return new call_node(p, obj, call.method, args, call.block);
|
---|
| 3595 | }
|
---|
| 3596 |
|
---|
| 3597 | return tree;
|
---|
| 3598 | }
|
---|
| 3599 |
|
---|
| 3600 | public string to_ruby()
|
---|
| 3601 | {
|
---|
| 3602 | if (tree != null) {
|
---|
| 3603 | var cond = new ruby_code_cond(filename);
|
---|
| 3604 | tree.to_ruby(cond);
|
---|
| 3605 | return cond.ToString();
|
---|
| 3606 | }
|
---|
| 3607 | if (s != null)
|
---|
| 3608 | return UTF8ArrayToString(s, 0);
|
---|
| 3609 | else
|
---|
| 3610 | return "";
|
---|
| 3611 | }
|
---|
| 3612 | int MrbParser.yyInput.Token { get { return (int)yylval.Kind; } }
|
---|
| 3613 |
|
---|
| 3614 | object MrbParser.yyInput.Value { get { return yylval.Value; } }
|
---|
| 3615 |
|
---|
| 3616 | bool MrbParser.yyInput.Advance()
|
---|
| 3617 | {
|
---|
| 3618 | var token = parser_yylex();
|
---|
| 3619 | yylval.SetToken(token, MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
|
---|
| 3620 |
|
---|
| 3621 | return token > 0;
|
---|
| 3622 | }
|
---|
| 3623 |
|
---|
| 3624 | void yyConsoleOut.yyWarning(string message, object[] expected)
|
---|
| 3625 | {
|
---|
| 3626 | App.WriteLine($"{filename}({lineno},{column}): warning {String.Format(message, expected)}");
|
---|
| 3627 | }
|
---|
| 3628 |
|
---|
| 3629 | void yyConsoleOut.yyError(string message, object[] expected)
|
---|
| 3630 | {
|
---|
| 3631 | App.WriteLine($"{filename}({lineno},{column}): error {String.Format(message, expected)}");
|
---|
| 3632 | }
|
---|
| 3633 | }
|
---|
| 3634 | }
|
---|