source: EcnlProtoTool/trunk/webapp/webmrbc/MrbScanner.cs@ 270

Last change on this file since 270 was 270, checked in by coas-nagasima, 7 years ago

mruby版ECNLプロトタイピング・ツールを追加

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
  • Property svn:mime-type set to text/x-csharp
File size: 88.4 KB
Line 
1/*
2** parse.y - mruby parser
3**
4** See Copyright Notice in mruby.h
5*/
6using System;
7using System.Collections.Generic;
8using Bridge.Html5;
9
10namespace WebMrbc
11{
12 interface IEvaluatable
13 {
14 node evaluate(string method, node[] args);
15 }
16
17 class kwtable
18 {
19 public Uint8Array name;
20 public MrbTokens id0;
21 public MrbTokens id1;
22 public mrb_lex_state_enum state;
23
24 public kwtable(string name, MrbTokens id0, MrbTokens id1, mrb_lex_state_enum state)
25 {
26 this.name = MrbParser.UTF8StringToArray(name);
27 this.id0 = id0;
28 this.id1 = id1;
29 this.state = state;
30 }
31
32 public kwtable(string name)
33 {
34 this.name = MrbParser.UTF8StringToArray(name);
35 }
36 }
37
38 delegate int partial_hook_t(MrbParser p);
39
40 public class MrbToken
41 {
42 private string m_Filename;
43 private MrbTokens m_Kind;
44 private string m_Token;
45 private object m_Value;
46
47 public string Filename { get { return m_Filename; } }
48 public MrbTokens Kind { get { return m_Kind; } }
49 public object Value { get { return m_Value; } }
50
51 public node nd { get { return (node)m_Value; } set { this.m_Value = value; } }
52 public mrb_sym id { get { return (mrb_sym)m_Value; } set { this.m_Value = value; } }
53 public int num { get { return (int)m_Value; } set { this.m_Value = value; } }
54 public stack_type stack { get { return (stack_type)m_Value; } set { this.m_Value = value; } }
55 //public vtable vars { get { return (vtable)value; } set { this.value = value; } }
56
57 public MrbToken(string filename)
58 {
59 m_Filename = filename;
60 }
61
62 internal void SetToken(MrbTokens kind, string token)
63 {
64 m_Kind = kind;
65 m_Token = token;
66 }
67 }
68
69 public partial class MrbParser : IMrbParser, MrbParser.yyInput, MrbParser.yyConsoleOut
70 {
71 const int MRB_PARSER_TOKBUF_MAX = 65536;
72 const int MRB_PARSER_TOKBUF_SIZE = 256;
73
74 Uint8Array s;
75 int sp;
76 public string filename {
77 get {
78 if (current_filename_index < filename_table.Length)
79 return filename_table[current_filename_index];
80 else
81 return "(null)";
82 }
83 }
84 public int lineno { get; set; }
85 public int column { get; set; }
86
87 mrb_lex_state_enum lstate;
88 node lex_strterm;
89
90 stack_type cond_stack;
91 stack_type cmdarg_stack;
92 int paren_nest;
93 int lpar_beg;
94 int in_def, in_single;
95 bool cmd_start;
96 locals_node locals;
97
98 node pb;
99 Uint8Array buf = new Uint8Array(MRB_PARSER_TOKBUF_SIZE);
100 Uint8Array tokbuf;
101 int tidx;
102 int tsiz;
103
104 node all_heredocs;
105 node heredocs_from_nextline;
106 node parsing_heredoc;
107 node lex_strterm_before_heredoc;
108
109 internal node tree;
110
111 string[] filename_table = new string[0];
112 int filename_table_length { get { return filename_table.Length; } }
113 public int current_filename_index;
114
115 internal partial_hook_t partial_hook;
116 internal object partial_data;
117
118 MrbToken yylval;
119
120 public MrbParser()
121 {
122 yyConsole = this;
123 }
124
125 void mrb_assert(bool cond)
126 {
127 if (!cond) throw new Exception();
128 }
129
130 const int ERANGE = 1;
131 int errno;
132 static int memcmp(Uint8Array a, int aofs, Uint8Array b, int bofs, int len)
133 {
134 int result;
135 for (int i = 0; i < len; i++) {
136 result = a[i] - b[i];
137 if (result != 0)
138 return result;
139 }
140 return 0;
141 }
142
143 static bool isdigit(int c)
144 {
145 return (c >= '0' && c <= '9');
146 }
147
148 static bool isalnum(int c)
149 {
150 return (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
151 }
152
153 static int tolower(int c)
154 {
155 if (c >= 'A' && c <= 'Z')
156 return c - 'A' + 'a';
157 return c;
158 }
159
160 static int strlen(Uint8Array a, int ofs)
161 {
162 int i = ofs;
163 for (; i < a.Length; i++) {
164 if (a[i] == 0)
165 break;
166 }
167 return i - ofs;
168 }
169
170 static int strncmp(Uint8Array a, int aofs, Uint8Array b, int bofs, int len)
171 {
172 int result;
173 for (int i = 0; i < len; i++) {
174 if (a[i + aofs] == 0) {
175 if (b[i + bofs] == 0) {
176 return 0;
177 }
178 return byte.MaxValue;
179 }
180 if (b[i + bofs] == 0) {
181 return -byte.MaxValue;
182 }
183
184 result = a[i + aofs] - b[i + bofs];
185 if (result != 0)
186 return result;
187 }
188 return 0;
189 }
190
191 static int strchr(Uint8Array s, int ofs, int c)
192 {
193 int i = ofs;
194 for (; i < s.Length; i++) {
195 if (s[i] == c)
196 break;
197 }
198 return i;
199 }
200
201 public static Uint8Array strndup(Uint8Array s, int ofs, int len)
202 {
203 return s.SubArray(ofs, len + 1);
204 }
205
206 public static Uint8Array strdup(Uint8Array s, int ofs)
207 {
208 return s.SubArray(ofs, strlen(s, ofs) + 1);
209 }
210
211 private static string escape(byte c)
212 {
213 switch ((char)c) {
214 case '\\': return "\\";
215 case '\n': return "\\n";
216 case '\t': return "\\t";
217 case '\r': return "\\r";
218 case '\f': return "\\f";
219 case '\v': return "\\v";
220 case '\a': return "\\a";
221 case '\x27': return "\\e";
222 case '\b': return "\\b";
223 case ' ': return "\\s";
224 case '\0': return "\\0";
225 default: return $"\\x{c:X}";
226 }
227 }
228
229 internal static string UTF8ArrayToString(Uint8Array data, int idx)
230 {
231 bool esc;
232 return UTF8ArrayToStringEsc(data, idx, out esc);
233 }
234
235 internal static string UTF8ArrayToStringEsc(Uint8Array data, int idx, out bool esc)
236 {
237 var str = "";
238 int c = 0, t = 0, end = data.Length;
239 var temp = new byte[6];
240
241 esc = false;
242 if (end > 0 && data[end - 1] == '\x0')
243 end--;
244
245 for (int i = idx; i < end; i++) {
246 var d = data[i];
247 temp[c] = d;
248 if (t == 0) {
249 // 1Byteコード
250 if ((d & 0x80) == 0) {
251 // 表示可能なコード
252 if (d >= 0x20 && d < 0x7F)
253 str += (char)d;
254 // 表示不可ならエスケープ
255 else {
256 esc = true;
257 str += escape(d);
258 }
259 continue;
260 }
261 // 2Byteコード
262 else if ((d & 0xE0) == 0xC0) {
263 t = 2;
264 }
265 // 3Byteコード
266 else if ((d & 0xF0) == 0xE0) {
267 t = 3;
268 }
269 // 4Byteコード
270 else if ((d & 0xF8) == 0xF0) {
271 t = 4;
272 }
273 // 5Byteコード
274 else if ((d & 0xFC) == 0xF8) {
275 t = 5;
276 }
277 // 6Byteコード
278 else if ((d & 0xFE) == 0xFC) {
279 t = 6;
280 }
281 // 表示不可ならエスケープ
282 else {
283 esc = true;
284 str += escape(d);
285 continue;
286 }
287 c = 1;
288 }
289 else {
290 // 表示不可ならエスケープ
291 if ((d & 0xC0) != 0x80) {
292 for (int j = 0; j < c; j++) {
293 esc = true;
294 str += escape(temp[j]);
295 }
296 t = 0;
297 c = 0;
298 continue;
299 }
300 c++;
301 // 表示可能なコード
302 if (c == t) {
303 switch (t) {
304 case 2:
305 str += ConvertFromUtf32(((temp[0] & 0x1F) << 6) | (temp[1] & 0x3F));
306 break;
307 case 3:
308 str += ConvertFromUtf32(((temp[0] & 0x0F) << 12) | ((temp[1] & 0x3F) << 6) | (temp[2] & 0x3F));
309 break;
310 case 4:
311 str += ConvertFromUtf32(((temp[0] & 0x07) << 18) | ((temp[1] & 0x3F) << 12) | ((temp[2] & 0x3F) << 6) | (temp[3] & 0x3F));
312 break;
313 case 5:
314 str += ConvertFromUtf32(((temp[0] & 0x03) << 24) | ((temp[1] & 0x3F) << 18) | ((temp[2] & 0x3F) << 12) | ((temp[3] & 0x3F) << 6) | (temp[4] & 0x3F));
315 break;
316 case 6:
317 str += ConvertFromUtf32(((temp[0] & 0x01) << 30) | ((temp[1] & 0x3F) << 24) | ((temp[2] & 0x3F) << 18) | ((temp[3] & 0x3F) << 12) | ((temp[4] & 0x3F) << 6) | (temp[5] & 0x3F));
318 break;
319 }
320 t = 0;
321 c = 0;
322 continue;
323 }
324 }
325 }
326
327 if (c > 0)
328 esc = true;
329 for (int i = 0; i < c; i++) {
330 str += escape(temp[i]);
331 }
332
333 return str;
334 }
335
336 // from Emscripten (http://kripken.github.io/emscripten-site/)
337 // Gotcha: fromCharCode constructs a character from a UTF-16 encoded code (pair), not from a Unicode code point! So encode the code point to UTF-16 for constructing.
338 // See http://unicode.org/faq/utf_bom.html#utf16-3
339 public static string ConvertFromUtf32(int utf32)
340 {
341 var str = "";
342 if (utf32 >= 0x10000) {
343 var ch = utf32 - 0x10000;
344 str += String.FromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
345 }
346 else {
347 str += String.FromCharCode(utf32);
348 }
349 return str;
350 }
351
352 // from Emscripten (http://kripken.github.io/emscripten-site/)
353 // Copies the given Javascript String object 'str' to the given byte array at address 'outIdx',
354 // encoded in UTF8 form and null-terminated. The copy will require at most str.length*4+1 bytes of space in the HEAP.
355 // Use the function lengthBytesUTF8() to compute the exact number of bytes (excluding null terminator) that this function will write.
356 // Parameters:
357 // str: the Javascript string to copy.
358 // outU8Array: the array to copy to. Each index in this array is assumed to be one 8-byte element.
359 // outIdx: The starting offset in the array to begin the copying.
360 // maxBytesToWrite: The maximum number of bytes this function can write to the array. This count should include the null
361 // terminator, i.e. if maxBytesToWrite=1, only the null terminator will be written and nothing else.
362 // maxBytesToWrite=0 does not write any bytes to the output, not even the null terminator.
363 // Returns the number of bytes written, EXCLUDING the null terminator.
364 internal static int stringToUTF8Array(string str, Uint8Array outU8Array, int outIdx, int maxBytesToWrite)
365 {
366 if (!(maxBytesToWrite > 0)) // Parameter maxBytesToWrite is not optional. Negative values, 0, null, undefined and false each don't write out any bytes.
367 return 0;
368
369 var startIdx = outIdx;
370 var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
371 for (var i = 0; i < str.Length; ++i) {
372 // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
373 // See http://unicode.org/faq/utf_bom.html#utf16-3
374 // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629
375 var u = str[i]; // possibly a lead surrogate
376 if (u >= 0xD800 && u <= 0xDFFF) u = (char)(0x10000 + ((u & 0x3FF) << 10) | (str[++i] & 0x3FF));
377 if (u <= 0x7F) {
378 if (outIdx >= endIdx) break;
379 outU8Array[outIdx++] = (byte)u;
380 }
381 else if (u <= 0x7FF) {
382 if (outIdx + 1 >= endIdx) break;
383 outU8Array[outIdx++] = (byte)(0xC0 | (u >> 6));
384 outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
385 }
386 else if (u <= 0xFFFF) {
387 if (outIdx + 2 >= endIdx) break;
388 outU8Array[outIdx++] = (byte)(0xE0 | (u >> 12));
389 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
390 outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
391 }
392 else if (u <= 0x1FFFFF) {
393 if (outIdx + 3 >= endIdx) break;
394 outU8Array[outIdx++] = (byte)(0xF0 | (u >> 18));
395 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 12) & 63));
396 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
397 outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
398 }
399 else if (u <= 0x3FFFFFF) {
400 if (outIdx + 4 >= endIdx) break;
401 outU8Array[outIdx++] = (byte)(0xF8 | (u >> 24));
402 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 18) & 63));
403 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 12) & 63));
404 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
405 outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
406 }
407 else {
408 if (outIdx + 5 >= endIdx) break;
409 outU8Array[outIdx++] = (byte)(0xFC | (u >> 30));
410 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 24) & 63));
411 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 18) & 63));
412 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 12) & 63));
413 outU8Array[outIdx++] = (byte)(0x80 | ((u >> 6) & 63));
414 outU8Array[outIdx++] = (byte)(0x80 | (u & 63));
415 }
416 }
417 // Null-terminate the pointer to the buffer.
418 outU8Array[outIdx] = 0;
419 return outIdx - startIdx;
420 }
421
422 // from Emscripten (http://kripken.github.io/emscripten-site/)
423 // Returns the number of bytes the given Javascript string takes if encoded as a UTF8 byte array, EXCLUDING the null terminator byte.
424 internal static int lengthBytesUTF8(string str)
425 {
426 var len = 0;
427 for (var i = 0; i < str.Length; ++i) {
428 // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
429 // See http://unicode.org/faq/utf_bom.html#utf16-3
430 var u = str[i]; // possibly a lead surrogate
431 if (u >= 0xD800 && u <= 0xDFFF) u = (char)(0x10000 + ((u & 0x3FF) << 10) | (str[++i] & 0x3FF));
432 if (u <= 0x7F) {
433 ++len;
434 }
435 else if (u <= 0x7FF) {
436 len += 2;
437 }
438 else if (u <= 0xFFFF) {
439 len += 3;
440 }
441 else if (u <= 0x1FFFFF) {
442 len += 4;
443 }
444 else if (u <= 0x3FFFFFF) {
445 len += 5;
446 }
447 else {
448 len += 6;
449 }
450 }
451 return len;
452 }
453
454 internal static Uint8Array UTF8StringToArray(string str)
455 {
456 var len = lengthBytesUTF8(str) + 1;
457 var result = new Uint8Array(len);
458 stringToUTF8Array(str, result, 0, len);
459 return result;
460 }
461
462 static ulong strtoul(Uint8Array s, int ofs, out Uint8Array endptr, int _base_)
463 {
464 if (_base_ != 10) throw new AggregateException();
465 ulong result;
466 if (UInt64.TryParse(UTF8ArrayToString(s.SubArray(ofs, s.Length - ofs + 1), 0), out result)) {
467 endptr = null;
468 }
469 else {
470 endptr = s;
471 }
472 return result;
473 }
474
475 static double mrb_float_read(Uint8Array s, int ofs, out Uint8Array endptr)
476 {
477 double result;
478 if (Double.TryParse(UTF8ArrayToString(s.SubArray(ofs, s.Length - ofs + 1), 0), out result)) {
479 endptr = null;
480 }
481 else {
482 endptr = s;
483 }
484 return result;
485 }
486
487 bool identchar(int c) { return (ISALNUM(c) || (c) == '_' || !ISASCII(c)); }
488
489 void BITSTACK_PUSH(ref stack_type stack, uint n) { stack = (stack_type)(((uint)stack << 1) | (n & 1)); }
490 void BITSTACK_POP(ref stack_type stack) { stack = (stack_type)((uint)stack >> 1); }
491 void BITSTACK_LEXPOP(ref stack_type stack) { stack = (stack_type)(((uint)stack >> 1) | ((uint)stack & 1)); }
492 stack_type BITSTACK_SET_P(ref stack_type stack) { return (stack_type)((uint)stack & 1); }
493
494 void COND_PUSH(uint n) { BITSTACK_PUSH(ref cond_stack, (n)); }
495 void COND_POP() { BITSTACK_POP(ref cond_stack); }
496 void COND_LEXPOP() { BITSTACK_LEXPOP(ref cond_stack); }
497 stack_type COND_P() { return BITSTACK_SET_P(ref cond_stack); }
498
499 void CMDARG_PUSH(uint n) { BITSTACK_PUSH(ref cmdarg_stack, (n)); }
500 void CMDARG_POP() { BITSTACK_POP(ref cmdarg_stack); }
501 void CMDARG_LEXPOP() { BITSTACK_LEXPOP(ref cmdarg_stack); }
502 stack_type CMDARG_P() { return BITSTACK_SET_P(ref cmdarg_stack); }
503
504 string[] syms = new string[0];
505
506 private mrb_sym get_sym(string str)
507 {
508 int i = syms.IndexOf(str);
509 if (i < 0) {
510 i = syms.Length;
511 syms.Push(str);
512 }
513 return (mrb_sym)(i + 1);
514 }
515
516 public string sym2name(mrb_sym sym)
517 {
518 int i = (int)sym - 1;
519 if ((i < 0) || (i >= syms.Length))
520 return ((int)sym).ToString();
521 return syms[i];
522 }
523
524 mrb_sym mrb_intern(Uint8Array s, int len)
525 {
526 string str = UTF8ArrayToString(s.SubArray(0, len + 1), 0);
527 return get_sym(str);
528 }
529
530 mrb_sym intern_cstr(Uint8Array s)
531 {
532 string str = UTF8ArrayToString(s.SubArray(0, strlen(s, 0) + 1), 0);
533 return get_sym(str);
534 }
535
536 mrb_sym intern(string s, int len)
537 {
538 string str = s.Substring(0, len);
539 return get_sym(str);
540 }
541
542 mrb_sym intern_c(char c)
543 {
544 string str = c.ToString();
545 return get_sym(str);
546 }
547
548 public node cons(object car, object cdr)
549 {
550 return node.cons(this, car, cdr);
551 }
552
553 public node list1(object a)
554 {
555 return cons(a, null);
556 }
557
558 public node list2(object a, object b)
559 {
560 return cons(a, cons(b, null));
561 }
562
563 public node list3(object a, object b, object c)
564 {
565 return cons(a, cons(b, cons(c, null)));
566 }
567
568 public node list4(object a, object b, object c, object d)
569 {
570 return cons(a, cons(b, cons(c, cons(d, null))));
571 }
572
573 public node list5(object a, object b, object c, object d, object e)
574 {
575 return cons(a, cons(b, cons(c, cons(d, cons(e, null)))));
576 }
577
578 node append(node a, node b)
579 {
580 if (a == null) return b;
581 a.append(b);
582 return a;
583 }
584
585 node push(node a, object b)
586 {
587 return append(a, list1(b));
588 }
589
590 /* xxx ----------------------------- */
591
592 locals_node local_switch()
593 {
594 var prev = this.locals;
595 this.locals = new locals_node(null);
596 return prev;
597 }
598
599 void local_resume(locals_node prev)
600 {
601 this.locals = prev;
602 }
603
604 void local_nest()
605 {
606 this.locals = new locals_node(this.locals);
607 }
608
609 void local_unnest()
610 {
611 if (this.locals != null) {
612 this.locals = this.locals.cdr;
613 }
614 }
615
616 bool local_var_p(mrb_sym sym)
617 {
618 locals_node l = this.locals;
619
620 while (l != null) {
621 if (l.symList.Contains(sym))
622 return true;
623 l = l.cdr;
624 }
625 return false;
626 }
627
628 void local_add_f(mrb_sym sym)
629 {
630 if (this.locals != null) {
631 this.locals.push(sym);
632 }
633 }
634
635 void local_add(mrb_sym sym)
636 {
637 if (!local_var_p(sym)) {
638 local_add_f(sym);
639 }
640 }
641
642 public mrb_sym[] locals_node()
643 {
644 return this.locals != null ? this.locals.symList : null;
645 }
646
647 /* (:scope (vars..) (prog...)) */
648 scope_node new_scope(node body)
649 {
650 return new scope_node(this, body);
651 }
652
653 /* (:begin prog...) */
654 begin_node new_begin(node body)
655 {
656 return new begin_node(this, body);
657 }
658
659 node newline_node(node n)
660 {
661 return n;
662 }
663
664 /* (:rescue body rescue else) */
665 rescue_node new_rescue(node body, node resq, node els)
666 {
667 return new rescue_node(this, body, resq, els);
668 }
669
670 rescue_node new_mod_rescue(node body, node resq)
671 {
672 return new_rescue(body, list1(list3(null, null, resq)), null);
673 }
674
675 /* (:ensure body ensure) */
676 ensure_node new_ensure(node a, node b)
677 {
678 return new ensure_node(this, a, b);
679 }
680
681 /* (:nil) */
682 nil_node new_nil()
683 {
684 return new nil_node(this);
685 }
686
687 /* (:true) */
688 true_node new_true()
689 {
690 return new true_node(this);
691 }
692
693 /* (:false) */
694 false_node new_false()
695 {
696 return new false_node(this);
697 }
698
699 /* (:alias new old) */
700 alias_node new_alias(mrb_sym a, mrb_sym b)
701 {
702 return new alias_node(this, a, b);
703 }
704
705 /* (:if cond then else) */
706 if_node new_if(node a, node b, node c, bool inline = false)
707 {
708 return new if_node(this, a, b, c, inline);
709 }
710
711 /* (:unless cond then else) */
712 unless_node new_unless(node a, node b, node c)
713 {
714 return new unless_node(this, a, b, c);
715 }
716
717 /* (:while cond body) */
718 while_node new_while(node a, node b)
719 {
720 return new while_node(this, a, b);
721 }
722
723 /* (:until cond body) */
724 until_node new_until(node a, node b)
725 {
726 return new until_node(this, a, b);
727 }
728
729 /* (:for var obj body) */
730 for_node new_for(node v, node o, node b)
731 {
732 return new for_node(this, v, o, b);
733 }
734
735 /* (:case a ((when ...) body) ((when...) body)) */
736 case_node new_case(node a, node b)
737 {
738 return new case_node(this, a, b);
739 }
740
741 /* (:postexe a) */
742 postexe_node new_postexe(node a)
743 {
744 return new postexe_node(this, a);
745 }
746
747 /* (:self) */
748 internal self_node new_self()
749 {
750 return new self_node(this);
751 }
752
753 /* (:call a b c) */
754 call_node new_call(node a, mrb_sym b, node c, MrbTokens pass)
755 {
756 return new call_node(this, a, b, c, pass);
757 }
758
759 /* (:fcall self mid args) */
760 fcall_node new_fcall(mrb_sym b, node c)
761 {
762 return new fcall_node(this, b, c);
763 }
764
765 /* (:super . c) */
766 super_node new_super(node c)
767 {
768 return new super_node(this, c);
769 }
770
771 /* (:zsuper) */
772 zsuper_node new_zsuper()
773 {
774 return new zsuper_node(this);
775 }
776
777 /* (:yield . c) */
778 yield_node new_yield(node c)
779 {
780 return new yield_node(this, c);
781 }
782
783 /* (:return . c) */
784 return_node new_return(node c)
785 {
786 return new return_node(this, c);
787 }
788
789 /* (:break . c) */
790 break_node new_break(node c)
791 {
792 return new break_node(this, c);
793 }
794
795 /* (:next . c) */
796 next_node new_next(node c)
797 {
798 return new next_node(this, c);
799 }
800
801 /* (:redo) */
802 redo_node new_redo()
803 {
804 return new redo_node(this);
805 }
806
807 /* (:retry) */
808 retry_node new_retry()
809 {
810 return new retry_node(this);
811 }
812
813 /* (:dot2 a b) */
814 dot2_node new_dot2(node a, node b)
815 {
816 return new dot2_node(this, a, b);
817 }
818
819 /* (:dot3 a b) */
820 dot3_node new_dot3(node a, node b)
821 {
822 return new dot3_node(this, a, b);
823 }
824
825 /* (:colon2 b c) */
826 colon2_node new_colon2(node b, mrb_sym c)
827 {
828 return new colon2_node(this, b, c);
829 }
830
831 /* (:colon3 . c) */
832 colon3_node new_colon3(mrb_sym c)
833 {
834 return new colon3_node(this, c);
835 }
836
837 /* (:and a b) */
838 and_node new_and(node a, node b)
839 {
840 return new and_node(this, a, b);
841 }
842
843 /* (:or a b) */
844 or_node new_or(node a, node b)
845 {
846 return new or_node(this, a, b);
847 }
848
849 /* (:array a...) */
850 array_node new_array(node a)
851 {
852 return new array_node(this, a);
853 }
854
855 /* (:splat . a) */
856 splat_node new_splat(node a)
857 {
858 return new splat_node(this, a);
859 }
860
861 /* (:hash (k . v) (k . v)...) */
862 hash_node new_hash(node a)
863 {
864 return new hash_node(this, a);
865 }
866
867 /* (:sym . a) */
868 sym_node new_sym(mrb_sym sym)
869 {
870 return new sym_node(this, sym);
871 }
872
873 mrb_sym new_strsym(node str)
874 {
875 Uint8Array s;
876 int len;
877
878 if (str is str_node) {
879 s = ((str_node)str).str;
880 len = ((str_node)str).len;
881 }
882 else {
883 s = (Uint8Array)((node)str.cdr).car;
884 len = (int)((node)str.cdr).cdr;
885 }
886
887 return mrb_intern(s, len);
888 }
889
890 /* (:lvar . a) */
891 lvar_node new_lvar(mrb_sym sym)
892 {
893 return new lvar_node(this, sym);
894 }
895
896 /* (:gvar . a) */
897 gvar_node new_gvar(mrb_sym sym)
898 {
899 return new gvar_node(this, sym);
900 }
901
902 /* (:ivar . a) */
903 ivar_node new_ivar(mrb_sym sym)
904 {
905 return new ivar_node(this, sym);
906 }
907
908 /* (:cvar . a) */
909 cvar_node new_cvar(mrb_sym sym)
910 {
911 return new cvar_node(this, sym);
912 }
913
914 /* (:const . a) */
915 const_node new_const(mrb_sym sym)
916 {
917 return new const_node(this, sym);
918 }
919
920 /* (:undef a...) */
921 undef_node new_undef(mrb_sym sym)
922 {
923 return new undef_node(this, sym);
924 }
925
926 /* (:class class super body) */
927 class_node new_class(node c, node s, node b)
928 {
929 return new class_node(this, c, s, b);
930 }
931
932 /* (:sclass obj body) */
933 sclass_node new_sclass(node o, node b)
934 {
935 return new sclass_node(this, o, b);
936 }
937
938 /* (:module module body) */
939 module_node new_module(node m, node b)
940 {
941 return new module_node(this, m, b);
942 }
943
944 /* (:def m lv (arg . body)) */
945 def_node new_def(mrb_sym m, node a, node b)
946 {
947 return new def_node(this, m, a, b);
948 }
949
950 /* (:sdef obj m lv (arg . body)) */
951 sdef_node new_sdef(node o, mrb_sym m, node a, node b)
952 {
953 return new sdef_node(this, o, m, a, b);
954 }
955
956 /* (:arg . sym) */
957 arg_node new_arg(mrb_sym sym)
958 {
959 return new arg_node(this, sym);
960 }
961
962 /* (m o r m2 b) */
963 /* m: (a b c) */
964 /* o: ((a . e1) (b . e2)) */
965 /* r: a */
966 /* m2: (a b c) */
967 /* b: a */
968 node new_args(node m, node opt, mrb_sym rest, node m2, mrb_sym blk)
969 {
970 node n;
971
972 n = cons(m2, blk);
973 n = cons(rest, n);
974 n = cons(opt, n);
975 return cons(m, n);
976 }
977
978 /* (:block_arg . a) */
979 block_arg_node new_block_arg(node a)
980 {
981 return new block_arg_node(this, a);
982 }
983
984 /* (:block arg body) */
985 block_node new_block(node a, node b, bool brace)
986 {
987 return new block_node(this, a, b, brace);
988 }
989
990 /* (:lambda arg body) */
991 lambda_node new_lambda(node a, node b)
992 {
993 return new lambda_node(this, a, b);
994 }
995
996 /* (:asgn lhs rhs) */
997 asgn_node new_asgn(node a, node b)
998 {
999 return new asgn_node(this, a, b);
1000 }
1001
1002 /* (:masgn mlhs=(pre rest post) mrhs) */
1003 masgn_node new_masgn(node a, node b)
1004 {
1005 return new masgn_node(this, a, b);
1006 }
1007
1008 /* (:asgn lhs rhs) */
1009 op_asgn_node new_op_asgn(node a, mrb_sym op, node b)
1010 {
1011 return new op_asgn_node(this, a, op, b);
1012 }
1013
1014 /* (:int . i) */
1015 int_node new_int(Uint8Array s, int _base)
1016 {
1017 return new int_node(this, s, _base);
1018 }
1019
1020 int_node new_int(string s, int _base) { return new_int(MrbParser.UTF8StringToArray(s), _base); }
1021
1022 /* (:float . i) */
1023 float_node new_float(Uint8Array s)
1024 {
1025 return new float_node(this, s);
1026 }
1027
1028 /* (:str . (s . len)) */
1029 str_node new_str(Uint8Array s, int len)
1030 {
1031 return new str_node(this, s, len);
1032 }
1033
1034 str_node new_str(string s, int len) { return new_str(MrbParser.UTF8StringToArray(s), len); }
1035
1036 /* (:dstr . a) */
1037 internal dstr_node new_dstr(node a)
1038 {
1039 return new dstr_node(this, a);
1040 }
1041
1042 /* (:str . (s . len)) */
1043 xstr_node new_xstr(Uint8Array s, int len)
1044 {
1045 return new xstr_node(this, s, len);
1046 }
1047
1048 /* (:xstr . a) */
1049 dxstr_node new_dxstr(node a)
1050 {
1051 return new dxstr_node(this, a);
1052 }
1053
1054 /* (:dsym . a) */
1055 dsym_node new_dsym(node a)
1056 {
1057 return new dsym_node(this, a);
1058 }
1059
1060 /* (:str . (a . a)) */
1061 regx_node new_regx(Uint8Array p1, Uint8Array p2, Uint8Array p3)
1062 {
1063 return new regx_node(this, p1, p2, p3);
1064 }
1065
1066 /* (:dregx . a) */
1067 dregx_node new_dregx(node a, node b)
1068 {
1069 return new dregx_node(this, a, b);
1070 }
1071
1072 /* (:backref . n) */
1073 back_ref_node new_back_ref(int n)
1074 {
1075 return new back_ref_node(this, n);
1076 }
1077
1078 /* (:nthref . n) */
1079 nth_ref_node new_nth_ref(int n)
1080 {
1081 return new nth_ref_node(this, n);
1082 }
1083
1084 /* (:heredoc . a) */
1085 heredoc_node new_heredoc()
1086 {
1087 return new heredoc_node(this);
1088 }
1089
1090 void new_bv(mrb_sym id)
1091 {
1092 }
1093
1094 literal_delim_node new_literal_delim()
1095 {
1096 return new literal_delim_node(this);
1097 }
1098
1099 /* (:words . a) */
1100 words_node new_words(node a)
1101 {
1102 return new words_node(this, a);
1103 }
1104
1105 /* (:symbols . a) */
1106 symbols_node new_symbols(node a)
1107 {
1108 return new symbols_node(this, a);
1109 }
1110
1111 filename_node new_filename(string s)
1112 {
1113 var str = MrbParser.UTF8StringToArray(s);
1114 return new filename_node(this, str, str.Length);
1115 }
1116
1117 lineno_node new_lineno(int lineno)
1118 {
1119 return new lineno_node(this, lineno);
1120 }
1121
1122 /* xxx ----------------------------- */
1123
1124 /* (:call a op) */
1125 node call_uni_op(node recv, string m)
1126 {
1127 return new_call(recv, intern_cstr(MrbParser.UTF8StringToArray(m)), null, (MrbTokens)1);
1128 }
1129
1130 /* (:call a op b) */
1131 node call_bin_op(node recv, string m, node arg1)
1132 {
1133 return new_call(recv, intern_cstr(MrbParser.UTF8StringToArray(m)), list1(list1(arg1)), (MrbTokens)1);
1134 }
1135
1136 void args_with_block(node a, node b)
1137 {
1138 if (b != null) {
1139 if (a.cdr != null) {
1140 yyError("both block arg and actual block given");
1141 }
1142 a.cdr = b;
1143 }
1144 }
1145
1146 void call_with_block(node a, node b)
1147 {
1148 switch ((node_type)a.car) {
1149 case node_type.NODE_SUPER:
1150 ((super_node)a).add_block(b);
1151 break;
1152 case node_type.NODE_ZSUPER:
1153 ((zsuper_node)a).add_block(b);
1154 break;
1155 case node_type.NODE_CALL:
1156 ((call_node)a).add_block(b);
1157 break;
1158 case node_type.NODE_FCALL:
1159 ((fcall_node)a).add_block(b);
1160 break;
1161 default:
1162 break;
1163 }
1164 }
1165
1166 node negate_lit(node n)
1167 {
1168 return new negate_node(this, n);
1169 }
1170
1171 static node cond(node n)
1172 {
1173 return n;
1174 }
1175
1176 node ret_args(node n)
1177 {
1178 if (n.cdr != null) {
1179 yyError("block argument should not be given");
1180 return null;
1181 }
1182 if (((node)n.car).cdr == null) return (node)((node)n.car).car;
1183 return new_array((node)n.car);
1184 }
1185
1186 void assignable(node lhs)
1187 {
1188 var lvar = lhs as lvar_node;
1189 if (lvar != null) {
1190 local_add(lvar.name);
1191 }
1192 }
1193
1194 node var_reference(node lhs)
1195 {
1196 node n;
1197
1198 var lvar = lhs as lvar_node;
1199 if (lvar != null) {
1200 if (!local_var_p(lvar.name)) {
1201 n = new_fcall(lvar.name, null);
1202 return n;
1203 }
1204 }
1205
1206 return lhs;
1207 }
1208
1209 node new_strterm(mrb_string_type type, int term, int paren)
1210 {
1211 return cons(type, cons(0, cons(paren, term)));
1212 }
1213
1214 void end_strterm()
1215 {
1216 this.lex_strterm = null;
1217 }
1218
1219 parser_heredoc_info parsing_heredoc_inf()
1220 {
1221 node nd = this.parsing_heredoc;
1222 if (nd == null)
1223 return null;
1224 /* mrb_assert(nd.car.car == node_type.NODE_HEREDOC); */
1225 return ((heredoc_node)nd.car).info;
1226 }
1227
1228 void heredoc_treat_nextline()
1229 {
1230 if (this.heredocs_from_nextline == null)
1231 return;
1232 if (this.parsing_heredoc == null) {
1233 node n;
1234 this.parsing_heredoc = this.heredocs_from_nextline;
1235 this.lex_strterm_before_heredoc = this.lex_strterm;
1236 this.lex_strterm = new_strterm(parsing_heredoc_inf().type, 0, 0);
1237 n = this.all_heredocs;
1238 if (n != null) {
1239 while (n.cdr != null)
1240 n = (node)n.cdr;
1241 n.cdr = this.parsing_heredoc;
1242 }
1243 else {
1244 this.all_heredocs = this.parsing_heredoc;
1245 }
1246 }
1247 else {
1248 node n, m;
1249 m = this.heredocs_from_nextline;
1250 while (m.cdr != null)
1251 m = (node)m.cdr;
1252 n = this.all_heredocs;
1253 mrb_assert(n != null);
1254 if (n == this.parsing_heredoc) {
1255 m.cdr = n;
1256 this.all_heredocs = this.heredocs_from_nextline;
1257 this.parsing_heredoc = this.heredocs_from_nextline;
1258 }
1259 else {
1260 while (n.cdr != this.parsing_heredoc) {
1261 n = (node)n.cdr;
1262 mrb_assert(n != null);
1263 }
1264 m.cdr = n.cdr;
1265 n.cdr = this.heredocs_from_nextline;
1266 this.parsing_heredoc = this.heredocs_from_nextline;
1267 }
1268 }
1269 this.heredocs_from_nextline = null;
1270 }
1271
1272 void heredoc_end()
1273 {
1274 this.parsing_heredoc = (node)this.parsing_heredoc.cdr;
1275 if (this.parsing_heredoc == null) {
1276 this.lstate = mrb_lex_state_enum.EXPR_BEG;
1277 this.cmd_start = true;
1278 end_strterm();
1279 this.lex_strterm = (node)this.lex_strterm_before_heredoc;
1280 this.lex_strterm_before_heredoc = null;
1281 }
1282 else {
1283 /* next heredoc */
1284 this.lex_strterm.car = parsing_heredoc_inf().type;
1285 }
1286 }
1287
1288 bool is_strterm_type(mrb_string_type str_func)
1289 {
1290 return (((int)lex_strterm.car) & (int)str_func) != 0;
1291 }
1292
1293 static Uint8Array begin = MrbParser.UTF8StringToArray("begin");
1294 static Uint8Array end = MrbParser.UTF8StringToArray("\n=end");
1295
1296 void pushback(int c)
1297 {
1298 if (c >= 0) {
1299 column--;
1300 }
1301 this.pb = cons(c, this.pb);
1302 }
1303
1304 void backref_error(node n)
1305 {
1306 var c = (node_type)n.car;
1307
1308 if (c == node_type.NODE_NTH_REF) {
1309 yyError("can't set variable ${0}", ((int)n.cdr).ToString());
1310 }
1311 else if (c == node_type.NODE_BACK_REF) {
1312 yyError("can't set variable ${0}", ((char)n.cdr).ToString());
1313 }
1314 else {
1315 //mrb_bug(mrb, "Internal error in backref_error() : n=>car == %S", mrb_fixnum_value(c));
1316 }
1317 }
1318
1319 void void_expr_error(node n)
1320 {
1321 if (n == null) return;
1322 switch ((node_type)n.car) {
1323 case node_type.NODE_BREAK:
1324 case node_type.NODE_RETURN:
1325 case node_type.NODE_NEXT:
1326 case node_type.NODE_REDO:
1327 case node_type.NODE_RETRY:
1328 yyError("void value expression");
1329 break;
1330 default:
1331 break;
1332 }
1333 }
1334
1335 int nextc()
1336 {
1337 for (;;) {
1338 int c;
1339
1340 if (this.pb != null) {
1341 node tmp;
1342
1343 c = (int)this.pb.car;
1344 tmp = this.pb;
1345 this.pb = (node)this.pb.cdr;
1346 }
1347 else {
1348#if false
1349 if (this.f != null) {
1350 if (!this.f.CanRead)
1351 break;
1352 c = this.f.ReadByte();
1353 if (c == -1) break;
1354 }
1355 else
1356#endif
1357 if (this.s == null || this.sp >= this.s.Length) {
1358 break;
1359 }
1360 else {
1361 c = (byte)this.s[sp++];
1362 }
1363 }
1364 if (c >= 0) {
1365 this.column++;
1366 }
1367 if (c == '\r') {
1368 c = nextc();
1369 if (c != '\n') {
1370 pushback(c);
1371 return '\r';
1372 }
1373 return c;
1374 }
1375 return c;
1376 }
1377
1378 if (this.partial_hook == null) return -1;
1379 else {
1380 if (this.partial_hook(this) < 0)
1381 return -1; /* end of program(s) */
1382 return -2; /* end of a file in the program files */
1383 }
1384 }
1385
1386 void skip(char term)
1387 {
1388 int c;
1389
1390 for (;;) {
1391 c = nextc();
1392 if (c < 0) break;
1393 if (c == term) break;
1394 }
1395 }
1396
1397 int peekc_n(int n)
1398 {
1399 node list = null;
1400 int c0;
1401
1402 do {
1403 c0 = nextc();
1404 if (c0 == -1) return c0; /* do not skip partial EOF */
1405 if (c0 >= 0) --column;
1406 list = push(list, c0);
1407 } while (n-- != 0);
1408 if (this.pb != null) {
1409 this.pb = append(list, this.pb);
1410 }
1411 else {
1412 this.pb = list;
1413 }
1414 return c0;
1415 }
1416
1417 bool peek_n(int c, int n)
1418 {
1419 return peekc_n(n) == c && c >= 0;
1420 }
1421
1422 bool peek(int c)
1423 {
1424 return peek_n(c, 0);
1425 }
1426
1427 bool peeks(Uint8Array s, int p)
1428 {
1429 int len = strlen(s, p);
1430#if false
1431 if (this.f != null) {
1432 int n = 0;
1433 while (s[p] != 0) {
1434 if (!peek_n(s[p++], n++)) return false;
1435 }
1436 return true;
1437 }
1438 else
1439#endif
1440 if (this.s != null && this.sp + len <= this.s.Length) {
1441 if (memcmp(this.s, this.sp, s, p, len) == 0) return true;
1442 }
1443 return false;
1444 }
1445
1446 bool skips(Uint8Array s, int p)
1447 {
1448 int c;
1449
1450 for (;;) {
1451 /* skip until first char */
1452 for (;;) {
1453 c = nextc();
1454 if (c < 0) return c != 0;
1455 if (c == '\n') {
1456 this.lineno++;
1457 this.column = 0;
1458 }
1459 if (c == s[p]) break;
1460 }
1461 p++;
1462 if (peeks(s, p)) {
1463 int len = strlen(s, p);
1464
1465 while (len-- != 0) {
1466 if (nextc() == '\n') {
1467 this.lineno++;
1468 this.column = 0;
1469 }
1470 }
1471 return true;
1472 }
1473 else {
1474 p--;
1475 }
1476 }
1477 }
1478
1479 int newtok()
1480 {
1481 if (this.tokbuf != this.buf) {
1482 //delete this.tokbuf;
1483 this.tokbuf = this.buf;
1484 this.tsiz = MRB_PARSER_TOKBUF_SIZE;
1485 }
1486 this.tidx = 0;
1487 return this.column - 1;
1488 }
1489
1490 void tokadd(int c)
1491 {
1492 Uint8Array utf8 = new Uint8Array(4);
1493 int len;
1494
1495 /* mrb_assert(-0x10FFFF <= c && c <= 0xFF); */
1496 if (c >= 0) {
1497 /* Single byte from source or non-Unicode escape */
1498 utf8[0] = (byte)c;
1499 len = 1;
1500 }
1501 else {
1502 /* Unicode character */
1503 c = -c;
1504 if (c < 0x80) {
1505 utf8[0] = (byte)c;
1506 len = 1;
1507 }
1508 else if (c < 0x800) {
1509 utf8[0] = (byte)(0xC0 | (c >> 6));
1510 utf8[1] = (byte)(0x80 | (c & 0x3F));
1511 len = 2;
1512 }
1513 else if (c < 0x10000) {
1514 utf8[0] = (byte)(0xE0 | (c >> 12));
1515 utf8[1] = (byte)(0x80 | ((c >> 6) & 0x3F));
1516 utf8[2] = (byte)(0x80 | (c & 0x3F));
1517 len = 3;
1518 }
1519 else {
1520 utf8[0] = (byte)(0xF0 | (c >> 18));
1521 utf8[1] = (byte)(0x80 | ((c >> 12) & 0x3F));
1522 utf8[2] = (byte)(0x80 | ((c >> 6) & 0x3F));
1523 utf8[3] = (byte)(0x80 | (c & 0x3F));
1524 len = 4;
1525 }
1526 }
1527 if (this.tidx + len >= this.tsiz) {
1528 if (this.tsiz >= MRB_PARSER_TOKBUF_MAX) {
1529 this.tidx += len;
1530 return;
1531 }
1532 this.tsiz *= 2;
1533 if (this.tokbuf == this.buf) {
1534 this.tokbuf = new Uint8Array(this.tsiz);
1535 //for(int i = 0; i < MRB_PARSER_TOKBUF_SIZE; i++) this.tokbuf[i] = this.buf[i];
1536 this.tokbuf.Set(this.buf, 0);
1537 }
1538 else {
1539 var buf = new Uint8Array(this.tsiz);
1540 buf.Set(this.tokbuf, 0);
1541 this.tokbuf = buf;
1542 }
1543 }
1544 for (int i = 0; i < len; i++) {
1545 this.tokbuf[this.tidx++] = utf8[i];
1546 }
1547 }
1548
1549 int toklast()
1550 {
1551 return this.tokbuf[this.tidx - 1];
1552 }
1553
1554 void tokfix()
1555 {
1556 if (this.tidx >= MRB_PARSER_TOKBUF_MAX) {
1557 this.tidx = MRB_PARSER_TOKBUF_MAX - 1;
1558 yyError("string too long (truncated)");
1559 }
1560 this.tokbuf[this.tidx] = (byte)'\0';
1561 }
1562
1563 Uint8Array tok()
1564 {
1565 return this.tokbuf;
1566 }
1567
1568 int toklen()
1569 {
1570 return this.tidx;
1571 }
1572
1573 bool ISASCII(int c) { return ((uint)c <= 0x7f); }
1574 bool ISPRINT(int c) { return ((uint)(c - 0x20) < 0x5f); }
1575 bool ISSPACE(int c) { return (c == ' ' || (uint)(c - '\t') < 5); }
1576 bool ISUPPER(int c) { return ((uint)(c - 'A') < 26); }
1577 bool ISLOWER(int c) { return ((uint)(c - 'a') < 26); }
1578 bool ISALPHA(int c) { return ((uint)((c | 0x20) - 'a') < 26); }
1579 bool ISDIGIT(int c) { return ((uint)(c - '0') < 10); }
1580 bool ISXDIGIT(int c) { return (ISDIGIT(c) || (uint)((c | 0x20) - 'a') < 6); }
1581 bool ISALNUM(int c) { return (ISALPHA(c) || ISDIGIT(c)); }
1582 bool ISBLANK(int c) { return (c == ' ' || c == '\t'); }
1583 bool ISCNTRL(int c) { return ((uint)c < 0x20 || c == 0x7f); }
1584 int TOUPPER(int c) { return (ISLOWER(c) ? (c & 0x5f) : (c)); }
1585 int TOLOWER(int c) { return (ISUPPER(c) ? (c | 0x20) : (c)); }
1586
1587 bool IS_ARG() { return (this.lstate == mrb_lex_state_enum.EXPR_ARG || this.lstate == mrb_lex_state_enum.EXPR_CMDARG); }
1588 bool IS_END() { return (this.lstate == mrb_lex_state_enum.EXPR_END || this.lstate == mrb_lex_state_enum.EXPR_ENDARG || this.lstate == mrb_lex_state_enum.EXPR_ENDFN); }
1589 bool IS_BEG() { return (this.lstate == mrb_lex_state_enum.EXPR_BEG || this.lstate == mrb_lex_state_enum.EXPR_MID || this.lstate == mrb_lex_state_enum.EXPR_VALUE || this.lstate == mrb_lex_state_enum.EXPR_CLASS); }
1590 bool IS_SPCARG(int c, bool space_seen) { return (IS_ARG() && space_seen && !ISSPACE(c)); }
1591 bool IS_LABEL_POSSIBLE(bool cmd_state) { return ((this.lstate == mrb_lex_state_enum.EXPR_BEG && !cmd_state) || IS_ARG()); }
1592 bool IS_LABEL_SUFFIX(int n) { return (peek_n(':', (n)) && !peek_n(':', (n) + 1)); }
1593
1594 static int scan_oct(int[] start, int len, ref int retlen)
1595 {
1596 int s = 0;
1597 int retval = 0;
1598
1599 /* mrb_assert(len <= 3) */
1600 while (len-- != 0 && start[s] >= '0' && start[s] <= '7') {
1601 retval <<= 3;
1602 retval |= start[s++] - '0';
1603 }
1604 retlen = s;
1605
1606 return retval;
1607 }
1608
1609 static int scan_hex(int[] start, int len, ref int retlen)
1610 {
1611 Uint8Array hexdigit = MrbParser.UTF8StringToArray("0123456789abcdef0123456789ABCDEF");
1612 int s = 0;
1613 int retval = 0;
1614 int tmp;
1615
1616 /* mrb_assert(len <= 8) */
1617 while (len-- != 0 && start[s] != 0 && (tmp = strchr(hexdigit, 0, start[s])) >= 0) {
1618 retval <<= 4;
1619 retval |= tmp & 15;
1620 s++;
1621 }
1622 retlen = s;
1623
1624 return retval;
1625 }
1626
1627 int read_escape_unicode(int limit)
1628 {
1629 for (;;) {
1630 int c;
1631 int[] buf = new int[9];
1632 int i;
1633
1634 /* Look for opening brace */
1635 i = 0;
1636 buf[0] = nextc();
1637 if (buf[0] < 0) break;
1638 if (ISXDIGIT(buf[0])) {
1639 /* \uxxxx form */
1640 for (i = 1; i < limit; i++) {
1641 buf[i] = nextc();
1642 if (buf[i] < 0) break;
1643 if (!ISXDIGIT(buf[i])) {
1644 pushback(buf[i]);
1645 break;
1646 }
1647 }
1648 }
1649 else {
1650 pushback(buf[0]);
1651 }
1652 c = scan_hex(buf, i, ref i);
1653 if (i == 0) {
1654 break;
1655 }
1656 if (c < 0 || c > 0x10FFFF || (c & 0xFFFFF800) == 0xD800) {
1657 yyError("Invalid Unicode code point");
1658 return -1;
1659 }
1660 return c;
1661 }
1662
1663 yyError("Invalid escape character syntax");
1664 return -1;
1665 }
1666
1667 /* Return negative to indicate Unicode code point */
1668 int read_escape()
1669 {
1670 int c;
1671
1672 switch (c = nextc()) {
1673 case '\\':/* Backslash */
1674 return c;
1675
1676 case 'n':/* newline */
1677 return '\n';
1678
1679 case 't':/* horizontal tab */
1680 return '\t';
1681
1682 case 'r':/* carriage-return */
1683 return '\r';
1684
1685 case 'f':/* form-feed */
1686 return '\f';
1687
1688 case 'v':/* vertical tab */
1689 return '\v';
1690
1691 case 'a':/* alarm(bell) */
1692 return '\x07';
1693
1694 case 'e':/* escape */
1695 return 033;
1696 case '0':
1697 case '1':
1698 case '2':
1699 case '3': /* octal constant */
1700 case '4':
1701 case '5':
1702 case '6':
1703 case '7': {
1704 int[] buf = new int[3];
1705 int i;
1706
1707 bool error = false;
1708 buf[0] = c;
1709 for (i = 1; i < 3; i++) {
1710 buf[i] = nextc();
1711 if (buf[i] < 0) {
1712 error = true;
1713 break;
1714 }
1715 if (buf[i] < '0' || '7' < buf[i]) {
1716 pushback(buf[i]);
1717 break;
1718 }
1719 }
1720 if (error)
1721 break;
1722 c = scan_oct(buf, i, ref i);
1723 }
1724 return c;
1725
1726 case 'x': /* hex constant */
1727 {
1728 int[] buf = new int[2];
1729 int i;
1730
1731 bool error = false;
1732 for (i = 0; i < 2; i++) {
1733 buf[i] = nextc();
1734 if (buf[i] < 0) {
1735 error = true;
1736 break;
1737 }
1738 if (!ISXDIGIT(buf[i])) {
1739 pushback(buf[i]);
1740 break;
1741 }
1742 }
1743 if (error)
1744 break;
1745 c = scan_hex(buf, i, ref i);
1746 if (i == 0) {
1747 yyError("Invalid escape character syntax");
1748 return 0;
1749 }
1750 }
1751 return c;
1752
1753 case 'u': /* Unicode */
1754 if (peek('{')) {
1755 /* \u{xxxxxxxx} form */
1756 nextc();
1757 c = read_escape_unicode(8);
1758 if (c < 0) return 0;
1759 if (nextc() != '}') break;
1760 }
1761 else {
1762 c = read_escape_unicode(4);
1763 if (c < 0) return 0;
1764 }
1765 return -c;
1766
1767 case 'b':/* backspace */
1768 return '\b';
1769
1770 case 's':/* space */
1771 return ' ';
1772
1773 case 'M':
1774 if ((c = nextc()) != '-') {
1775 yyError("Invalid escape character syntax");
1776 pushback(c);
1777 return '\0';
1778 }
1779 if ((c = nextc()) == '\\') {
1780 return read_escape() | 0x80;
1781 }
1782 else if (c < 0) break;
1783 else {
1784 return ((c & 0xff) | 0x80);
1785 }
1786
1787 case 'C':
1788 case 'c':
1789 if (c == 'C') {
1790 if ((c = nextc()) != '-') {
1791 yyError("Invalid escape character syntax");
1792 pushback(c);
1793 return '\0';
1794 }
1795 }
1796 if ((c = nextc()) == '\\') {
1797 c = read_escape();
1798 }
1799 else if (c == '?')
1800 return 0177;
1801 else if (c < 0) break;
1802 return c & 0x9f;
1803
1804 case -1:
1805 case -2: /* end of a file */
1806 break;
1807
1808 default:
1809 return c;
1810 }
1811
1812 yyError("Invalid escape character syntax");
1813 return '\0';
1814 }
1815
1816 MrbTokens parse_string()
1817 {
1818 int c;
1819 var type = (mrb_string_type)this.lex_strterm.car;
1820 var nest_level = (int)((node)this.lex_strterm.cdr).car;
1821 var beg = (int)((node)((node)this.lex_strterm.cdr).cdr).car;
1822 var end = (int)((node)((node)this.lex_strterm.cdr).cdr).cdr;
1823 var hinf = (type & mrb_string_type.STR_FUNC_HEREDOC) != 0 ? parsing_heredoc_inf() : null;
1824 var cmd_state = this.cmd_start;
1825
1826 if (beg == 0) beg = -3; /* should never happen */
1827 if (end == 0) end = -3;
1828 newtok();
1829 while ((c = nextc()) != end || nest_level != 0) {
1830 if (hinf != null && (c == '\n' || c < 0)) {
1831 bool line_head;
1832 tokadd('\n');
1833 tokfix();
1834 this.lineno++;
1835 this.column = 0;
1836 line_head = hinf.line_head;
1837 hinf.line_head = true;
1838 if (line_head) {
1839 /* check whether end of heredoc */
1840 Uint8Array s = tok();
1841 int p = 0;
1842 int len = toklen();
1843 if (hinf.allow_indent) {
1844 while (ISSPACE(s[p]) && len > 0) {
1845 ++p;
1846 --len;
1847 }
1848 }
1849 if ((len - 1 == hinf.term_len) && (strncmp(s, p, hinf.term, 0, len - 1) == 0)) {
1850 if (c < 0) {
1851 parsing_heredoc = null;
1852 }
1853 else {
1854 return MrbTokens.tHEREDOC_END;
1855 }
1856 }
1857 }
1858 if (c < 0) {
1859 yyError("can't find heredoc delimiter \"{0}\" anywhere before EOF", MrbParser.UTF8ArrayToString(hinf.term, 0));
1860 return 0;
1861 }
1862 yylval.nd = new_str(tok(), toklen());
1863 return MrbTokens.tHD_STRING_MID;
1864 }
1865 if (c < 0) {
1866 yyError("unterminated Uint8Array meets end of file");
1867 return 0;
1868 }
1869 else if (c == beg) {
1870 nest_level++;
1871 ((node)this.lex_strterm.cdr).car = nest_level;
1872 }
1873 else if (c == end) {
1874 nest_level--;
1875 ((node)this.lex_strterm.cdr).car = nest_level;
1876 }
1877 else if (c == '\\') {
1878 c = nextc();
1879 if ((type & mrb_string_type.STR_FUNC_EXPAND) != 0) {
1880 if (c == end || c == beg) {
1881 tokadd(c);
1882 }
1883 else if (c == '\n') {
1884 this.lineno++;
1885 this.column = 0;
1886 if ((type & mrb_string_type.STR_FUNC_ARRAY) != 0) {
1887 tokadd('\n');
1888 }
1889 }
1890 else if ((type & mrb_string_type.STR_FUNC_REGEXP) != 0) {
1891 tokadd('\\');
1892 tokadd(c);
1893 }
1894 else if (c == 'u' && peek('{')) {
1895 /* \u{xxxx xxxx xxxx} form */
1896 nextc();
1897 while (true) {
1898 do c = nextc(); while (ISSPACE(c));
1899 if (c == '}') break;
1900 pushback(c);
1901 c = read_escape_unicode(8);
1902 if (c < 0) break;
1903 tokadd(-c);
1904 }
1905 if (hinf != null)
1906 hinf.line_head = false;
1907 }
1908 else {
1909 pushback(c);
1910 tokadd(read_escape());
1911 if (hinf != null)
1912 hinf.line_head = false;
1913 }
1914 }
1915 else {
1916 if (c != beg && c != end) {
1917 if (c == '\n') {
1918 this.lineno++;
1919 this.column = 0;
1920 }
1921 if (!(c == '\\' || ((type & mrb_string_type.STR_FUNC_ARRAY) != 0 && ISSPACE(c)))) {
1922 tokadd('\\');
1923 }
1924 }
1925 tokadd(c);
1926 }
1927 continue;
1928 }
1929 else if ((c == '#') && (type & mrb_string_type.STR_FUNC_EXPAND) != 0) {
1930 c = nextc();
1931 if (c == '{') {
1932 tokfix();
1933 this.lstate = mrb_lex_state_enum.EXPR_BEG;
1934 this.cmd_start = true;
1935 yylval.nd = new_str(tok(), toklen());
1936 if (hinf != null) {
1937 hinf.line_head = false;
1938 return MrbTokens.tHD_STRING_PART;
1939 }
1940 return MrbTokens.tSTRING_PART;
1941 }
1942 tokadd('#');
1943 pushback(c);
1944 continue;
1945 }
1946 if ((type & mrb_string_type.STR_FUNC_ARRAY) != 0 && ISSPACE(c)) {
1947 if (toklen() == 0) {
1948 do {
1949 if (c == '\n') {
1950 this.lineno++;
1951 this.column = 0;
1952 heredoc_treat_nextline();
1953 if (this.parsing_heredoc != null) {
1954 return MrbTokens.tHD_LITERAL_DELIM;
1955 }
1956 }
1957 c = nextc();
1958 } while (ISSPACE(c));
1959 pushback(c);
1960 return MrbTokens.tLITERAL_DELIM;
1961 }
1962 else {
1963 pushback(c);
1964 tokfix();
1965 yylval.nd = new_str(tok(), toklen());
1966 return MrbTokens.tSTRING_MID;
1967 }
1968 }
1969 tokadd(c);
1970 }
1971
1972 tokfix();
1973 this.lstate = mrb_lex_state_enum.EXPR_END;
1974 end_strterm();
1975
1976 if ((type & mrb_string_type.STR_FUNC_XQUOTE) != 0) {
1977 yylval.nd = new_xstr(tok(), toklen());
1978 return MrbTokens.tXSTRING;
1979 }
1980
1981 if ((type & mrb_string_type.STR_FUNC_REGEXP) != 0) {
1982 int f = 0;
1983 int re_opt;
1984 Uint8Array s = strndup(tok(), 0, toklen());
1985 Uint8Array flags = new Uint8Array(3);
1986 int flag = 0;
1987 var enc = (byte)'\0';
1988 Uint8Array encp;
1989 Uint8Array dup;
1990
1991 newtok();
1992 while ((re_opt = nextc()) >= 0 && ISALPHA(re_opt)) {
1993 switch (re_opt) {
1994 case 'i': f |= 1; break;
1995 case 'x': f |= 2; break;
1996 case 'm': f |= 4; break;
1997 case 'u': f |= 16; break;
1998 case 'n': f |= 32; break;
1999 default: tokadd(re_opt); break;
2000 }
2001 }
2002 pushback(re_opt);
2003 if (toklen() != 0) {
2004 tokfix();
2005 yyError("unknown regexp option%s - %s", toklen() > 1 ? "s" : "", MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
2006 }
2007 if (f != 0) {
2008 if ((f & 1) != 0) flags[flag++] = (byte)'i';
2009 if ((f & 2) != 0) flags[flag++] = (byte)'x';
2010 if ((f & 4) != 0) flags[flag++] = (byte)'m';
2011 if ((f & 16) != 0) enc = (byte)'u';
2012 if ((f & 32) != 0) enc = (byte)'n';
2013 }
2014 if (flag > 0) {
2015 dup = strndup(flags, 0, flag);
2016 }
2017 else {
2018 dup = null;
2019 }
2020 if (enc != 0) {
2021 encp = strndup(new Uint8Array(new byte[] { enc }), 0, 1);
2022 }
2023 else {
2024 encp = null;
2025 }
2026 yylval.nd = new_regx(s, dup, encp);
2027
2028 return MrbTokens.tREGEXP;
2029 }
2030 yylval.nd = new_str(tok(), toklen());
2031 if (IS_LABEL_POSSIBLE(cmd_state)) {
2032 if (IS_LABEL_SUFFIX(0)) {
2033 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2034 nextc();
2035 return MrbTokens.tLABEL_END;
2036 }
2037 }
2038 return MrbTokens.tSTRING;
2039 }
2040
2041 MrbTokens heredoc_identifier()
2042 {
2043 int c;
2044 mrb_string_type type = mrb_string_type.str_heredoc;
2045 bool indent = false;
2046 bool quote = false;
2047 heredoc_node newnode;
2048 parser_heredoc_info info;
2049
2050 c = nextc();
2051 if (ISSPACE(c) || c == '=') {
2052 pushback(c);
2053 return 0;
2054 }
2055 if (c == '-') {
2056 indent = true;
2057 c = nextc();
2058 }
2059 if (c == '\'' || c == '"') {
2060 int term = c;
2061 if (c == '\'')
2062 quote = true;
2063 newtok();
2064 while ((c = nextc()) >= 0 && c != term) {
2065 if (c == '\n') {
2066 c = -1;
2067 break;
2068 }
2069 tokadd(c);
2070 }
2071 if (c < 0) {
2072 yyError("unterminated here document identifier");
2073 return 0;
2074 }
2075 }
2076 else {
2077 if (c < 0) {
2078 return 0; /* missing here document identifier */
2079 }
2080 if (!identchar(c)) {
2081 pushback(c);
2082 if (indent) pushback('-');
2083 return 0;
2084 }
2085 newtok();
2086 do {
2087 tokadd(c);
2088 } while ((c = nextc()) >= 0 && identchar(c));
2089 pushback(c);
2090 }
2091 tokfix();
2092 newnode = new_heredoc();
2093 info = newnode.info;
2094 info.term = strndup(tok(), 0, toklen());
2095 info.term_len = toklen();
2096 if (!quote)
2097 type |= mrb_string_type.STR_FUNC_EXPAND;
2098 info.type = type;
2099 info.allow_indent = indent;
2100 info.line_head = true;
2101 info.claer_doc();
2102 this.heredocs_from_nextline = push(this.heredocs_from_nextline, newnode);
2103 this.lstate = mrb_lex_state_enum.EXPR_END;
2104
2105 yylval.nd = newnode;
2106 return MrbTokens.tHEREDOC_BEG;
2107 }
2108
2109 MrbTokens start_num(int c)
2110 {
2111 int nondigit;
2112
2113 nondigit = 0;
2114 this.lstate = mrb_lex_state_enum.EXPR_END;
2115 newtok();
2116 if (c == '-' || c == '+') {
2117 tokadd(c);
2118 c = nextc();
2119 }
2120 if (c == '0') {
2121 int start = toklen();
2122 c = nextc();
2123 if (c == 'x' || c == 'X') {
2124 /* hexadecimal */
2125 c = nextc();
2126 if (c >= 0 && ISXDIGIT(c)) {
2127 do {
2128 if (c == '_') {
2129 if (nondigit != 0) break;
2130 nondigit = c;
2131 continue;
2132 }
2133 if (!ISXDIGIT(c)) break;
2134 nondigit = 0;
2135 tokadd(tolower(c));
2136 } while ((c = nextc()) >= 0);
2137 }
2138 pushback(c);
2139 tokfix();
2140 if (toklen() == start) {
2141 yyError("numeric literal without digits");
2142 return 0;
2143 }
2144 else if (nondigit != 0) return trailing_uc(nondigit, 0);
2145 yylval.nd = new_int(tok(), 16);
2146 return MrbTokens.tINTEGER;
2147 }
2148 if (c == 'b' || c == 'B') {
2149 /* binary */
2150 c = nextc();
2151 if (c == '0' || c == '1') {
2152 do {
2153 if (c == '_') {
2154 if (nondigit != 0) break;
2155 nondigit = c;
2156 continue;
2157 }
2158 if (c != '0' && c != '1') break;
2159 nondigit = 0;
2160 tokadd(c);
2161 } while ((c = nextc()) >= 0);
2162 }
2163 pushback(c);
2164 tokfix();
2165 if (toklen() == start) {
2166 yyError("numeric literal without digits");
2167 return 0;
2168 }
2169 else if (nondigit != 0) return trailing_uc(nondigit, 0);
2170 yylval.nd = new_int(tok(), 2);
2171 return MrbTokens.tINTEGER;
2172 }
2173 if (c == 'd' || c == 'D') {
2174 /* decimal */
2175 c = nextc();
2176 if (c >= 0 && ISDIGIT(c)) {
2177 do {
2178 if (c == '_') {
2179 if (nondigit != 0) break;
2180 nondigit = c;
2181 continue;
2182 }
2183 if (!ISDIGIT(c)) break;
2184 nondigit = 0;
2185 tokadd(c);
2186 } while ((c = nextc()) >= 0);
2187 }
2188 pushback(c);
2189 tokfix();
2190 if (toklen() == start) {
2191 yyError("numeric literal without digits");
2192 return 0;
2193 }
2194 else if (nondigit != 0) return trailing_uc(nondigit, 0);
2195 yylval.nd = new_int(tok(), 10);
2196 return MrbTokens.tINTEGER;
2197 }
2198 if (c == 'o' || c == 'O') {
2199 /* prefixed octal */
2200 c = nextc();
2201 if (c < 0 || c == '_' || !ISDIGIT(c)) {
2202 yyError("numeric literal without digits");
2203 return 0;
2204 }
2205 }
2206 if ((c == '_')/* 0_0 */ ||
2207 (c >= '0' && c <= '7')/* octal */) {
2208 do {
2209 if (c == '_') {
2210 if (nondigit != 0) break;
2211 nondigit = c;
2212 continue;
2213 }
2214 if (c < '0' || c > '9') break;
2215 if (c > '7') {
2216 yyError("Invalid octal digit");
2217 return invalid_octal(c, nondigit);
2218 }
2219 nondigit = 0;
2220 tokadd(c);
2221 } while ((c = nextc()) >= 0);
2222
2223 if (toklen() > start) {
2224 pushback(c);
2225 tokfix();
2226 if (nondigit != 0) return trailing_uc(nondigit, 0);
2227 yylval.nd = new_int(tok(), 8);
2228 return MrbTokens.tINTEGER;
2229 }
2230 if (nondigit != 0) {
2231 pushback(c);
2232 return trailing_uc(nondigit, 0);
2233 }
2234 }
2235
2236 if (c > '7' && c <= '9') {
2237 yyError("Invalid octal digit");
2238 return invalid_octal(c, nondigit);
2239 }
2240 else if (c == '.' || c == 'e' || c == 'E') {
2241 tokadd('0');
2242 return invalid_octal(c, nondigit);
2243 }
2244 else {
2245 pushback(c);
2246 yylval.nd = new_int("0", 10);
2247 return MrbTokens.tINTEGER;
2248 }
2249 }
2250
2251 return invalid_octal(c, nondigit);
2252 }
2253
2254 MrbTokens invalid_octal(int c, int nondigit)
2255 {
2256 int is_float, seen_point, seen_e;
2257
2258 is_float = seen_point = seen_e = 0;
2259
2260 for (;;) {
2261 switch (c) {
2262 case '0':
2263 case '1':
2264 case '2':
2265 case '3':
2266 case '4':
2267 case '5':
2268 case '6':
2269 case '7':
2270 case '8':
2271 case '9':
2272 nondigit = 0;
2273 tokadd(c);
2274 break;
2275
2276 case '.':
2277 if (nondigit != 0) return trailing_uc(nondigit, is_float);
2278 if (seen_point != 0 || seen_e != 0) {
2279 pushback(c);
2280 return trailing_uc(nondigit, is_float);
2281 }
2282 else {
2283 int c0 = nextc();
2284 if (c0 < 0 || !ISDIGIT(c0)) {
2285 pushback(c0);
2286 pushback(c);
2287 return trailing_uc(nondigit, is_float);
2288 }
2289 c = c0;
2290 }
2291 tokadd('.');
2292 tokadd(c);
2293 is_float++;
2294 seen_point++;
2295 nondigit = 0;
2296 break;
2297
2298 case 'e':
2299 case 'E':
2300 if (nondigit != 0) {
2301 pushback(c);
2302 c = nondigit;
2303 pushback(c);
2304 return trailing_uc(nondigit, is_float);
2305 }
2306 if (seen_e != 0) {
2307 pushback(c);
2308 return trailing_uc(nondigit, is_float);
2309 }
2310 tokadd(c);
2311 seen_e++;
2312 is_float++;
2313 nondigit = c;
2314 c = nextc();
2315 if (c != '-' && c != '+') continue;
2316 tokadd(c);
2317 nondigit = c;
2318 break;
2319
2320 case '_': /* '_' in number just ignored */
2321 if (nondigit != 0) {
2322 pushback(c);
2323 return trailing_uc(nondigit, is_float);
2324 }
2325 nondigit = c;
2326 break;
2327
2328 default: {
2329 pushback(c);
2330 return trailing_uc(nondigit, is_float);
2331 }
2332 }
2333 c = nextc();
2334 }
2335 }
2336
2337 MrbTokens trailing_uc(int nondigit, int is_float)
2338 {
2339 if (nondigit != 0) {
2340 yyError("trailing '{0}' in number", nondigit.ToString());
2341 }
2342
2343 tokfix();
2344 if (is_float != 0) {
2345 double d;
2346 Uint8Array endp;
2347
2348 errno = 0;
2349 d = mrb_float_read(tok(), 0, out endp);
2350 if (d == 0 && endp == tok()) {
2351 yyWarning("corrupted float value {0}", MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
2352 }
2353 else if (errno == ERANGE) {
2354 yyWarning("float {0} out of range", MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
2355 errno = 0;
2356 }
2357 yylval.nd = new_float(tok());
2358 return MrbTokens.tFLOAT;
2359 }
2360 yylval.nd = new_int(tok(), 10);
2361 return MrbTokens.tINTEGER;
2362 }
2363
2364 bool arg_ambiguous()
2365 {
2366 yyWarning("ambiguous first argument; put parentheses or even spaces");
2367 return true;
2368 }
2369
2370 MrbTokens quotation(int c)
2371 {
2372 int term;
2373 int paren;
2374
2375 if (c < 0 || !ISALNUM(c)) {
2376 term = c;
2377 c = 'Q';
2378 }
2379 else {
2380 term = nextc();
2381 if (isalnum(term)) {
2382 yyError("unknown type of %string");
2383 return 0;
2384 }
2385 }
2386 if (c < 0 || term < 0) {
2387 yyError("unterminated quoted string meets end of file");
2388 return 0;
2389 }
2390 paren = term;
2391 if (term == '(') term = ')';
2392 else if (term == '[') term = ']';
2393 else if (term == '{') term = '}';
2394 else if (term == '<') term = '>';
2395 else paren = 0;
2396
2397 switch (c) {
2398 case 'Q':
2399 this.lex_strterm = new_strterm(mrb_string_type.str_dquote, term, paren);
2400 return MrbTokens.tSTRING_BEG;
2401
2402 case 'q':
2403 this.lex_strterm = new_strterm(mrb_string_type.str_squote, term, paren);
2404 return parse_string();
2405
2406 case 'W':
2407 this.lex_strterm = new_strterm(mrb_string_type.str_dword, term, paren);
2408 return MrbTokens.tWORDS_BEG;
2409
2410 case 'w':
2411 this.lex_strterm = new_strterm(mrb_string_type.str_sword, term, paren);
2412 return MrbTokens.tWORDS_BEG;
2413
2414 case 'x':
2415 this.lex_strterm = new_strterm(mrb_string_type.str_xquote, term, paren);
2416 return MrbTokens.tXSTRING_BEG;
2417
2418 case 'r':
2419 this.lex_strterm = new_strterm(mrb_string_type.str_regexp, term, paren);
2420 return MrbTokens.tREGEXP_BEG;
2421
2422 case 's':
2423 this.lex_strterm = new_strterm(mrb_string_type.str_ssym, term, paren);
2424 return MrbTokens.tSYMBEG;
2425
2426 case 'I':
2427 this.lex_strterm = new_strterm(mrb_string_type.str_dsymbols, term, paren);
2428 return MrbTokens.tSYMBOLS_BEG;
2429
2430 case 'i':
2431 this.lex_strterm = new_strterm(mrb_string_type.str_ssymbols, term, paren);
2432 return MrbTokens.tSYMBOLS_BEG;
2433
2434 default:
2435 yyError("unknown type of %string");
2436 return 0;
2437 }
2438 }
2439
2440 static readonly Dictionary<string, kwtable> wordlist = new Dictionary<string, kwtable>()
2441 {
2442 {"break", new kwtable("break", MrbTokens.keyword_break, MrbTokens.keyword_break, mrb_lex_state_enum.EXPR_MID) },
2443 {"else", new kwtable("else", MrbTokens.keyword_else, MrbTokens.keyword_else, mrb_lex_state_enum.EXPR_BEG) },
2444 {"nil", new kwtable("nil", MrbTokens.keyword_nil, MrbTokens.keyword_nil, mrb_lex_state_enum.EXPR_END) },
2445 {"ensure", new kwtable("ensure", MrbTokens.keyword_ensure, MrbTokens.keyword_ensure, mrb_lex_state_enum.EXPR_BEG) },
2446 {"end", new kwtable("end", MrbTokens.keyword_end, MrbTokens.keyword_end, mrb_lex_state_enum.EXPR_END) },
2447 {"then", new kwtable("then", MrbTokens.keyword_then, MrbTokens.keyword_then, mrb_lex_state_enum.EXPR_BEG) },
2448 {"not", new kwtable("not", MrbTokens.keyword_not, MrbTokens.keyword_not, mrb_lex_state_enum.EXPR_ARG) },
2449 {"false", new kwtable("false", MrbTokens.keyword_false, MrbTokens.keyword_false, mrb_lex_state_enum.EXPR_END) },
2450 {"self", new kwtable("self", MrbTokens.keyword_self, MrbTokens.keyword_self, mrb_lex_state_enum.EXPR_END) },
2451 {"elsif", new kwtable("elsif", MrbTokens.keyword_elsif, MrbTokens.keyword_elsif, mrb_lex_state_enum.EXPR_VALUE) },
2452 {"rescue", new kwtable("rescue", MrbTokens.keyword_rescue, MrbTokens.modifier_rescue, mrb_lex_state_enum.EXPR_MID) },
2453 {"true", new kwtable("true", MrbTokens.keyword_true, MrbTokens.keyword_true, mrb_lex_state_enum.EXPR_END) },
2454 {"until", new kwtable("until", MrbTokens.keyword_until, MrbTokens.modifier_until, mrb_lex_state_enum.EXPR_VALUE) },
2455 {"unless", new kwtable("unless", MrbTokens.keyword_unless, MrbTokens.modifier_unless, mrb_lex_state_enum.EXPR_VALUE) },
2456 {"return", new kwtable("return", MrbTokens.keyword_return, MrbTokens.keyword_return, mrb_lex_state_enum.EXPR_MID) },
2457 {"def", new kwtable("def", MrbTokens.keyword_def, MrbTokens.keyword_def, mrb_lex_state_enum.EXPR_FNAME) },
2458 {"and", new kwtable("and", MrbTokens.keyword_and, MrbTokens.keyword_and, mrb_lex_state_enum.EXPR_VALUE) },
2459 {"do", new kwtable("do", MrbTokens.keyword_do, MrbTokens.keyword_do, mrb_lex_state_enum.EXPR_BEG) },
2460 {"yield", new kwtable("yield", MrbTokens.keyword_yield, MrbTokens.keyword_yield, mrb_lex_state_enum.EXPR_ARG) },
2461 {"for", new kwtable("for", MrbTokens.keyword_for, MrbTokens.keyword_for, mrb_lex_state_enum.EXPR_VALUE) },
2462 {"undef", new kwtable("undef", MrbTokens.keyword_undef, MrbTokens.keyword_undef, mrb_lex_state_enum.EXPR_FNAME) },
2463 {"or", new kwtable("or", MrbTokens.keyword_or, MrbTokens.keyword_or, mrb_lex_state_enum.EXPR_VALUE) },
2464 {"in", new kwtable("in", MrbTokens.keyword_in, MrbTokens.keyword_in, mrb_lex_state_enum.EXPR_VALUE) },
2465 {"when", new kwtable("when", MrbTokens.keyword_when, MrbTokens.keyword_when, mrb_lex_state_enum.EXPR_VALUE) },
2466 {"retry", new kwtable("retry", MrbTokens.keyword_retry, MrbTokens.keyword_retry, mrb_lex_state_enum.EXPR_END) },
2467 {"if", new kwtable("if", MrbTokens.keyword_if, MrbTokens.modifier_if, mrb_lex_state_enum.EXPR_VALUE) },
2468 {"case", new kwtable("case", MrbTokens.keyword_case, MrbTokens.keyword_case, mrb_lex_state_enum.EXPR_VALUE) },
2469 {"redo", new kwtable("redo", MrbTokens.keyword_redo, MrbTokens.keyword_redo, mrb_lex_state_enum.EXPR_END) },
2470 {"next", new kwtable("next", MrbTokens.keyword_next, MrbTokens.keyword_next, mrb_lex_state_enum.EXPR_MID) },
2471 {"super", new kwtable("super", MrbTokens.keyword_super, MrbTokens.keyword_super, mrb_lex_state_enum.EXPR_ARG) },
2472 {"module", new kwtable("module", MrbTokens.keyword_module, MrbTokens.keyword_module, mrb_lex_state_enum.EXPR_VALUE) },
2473 {"begin", new kwtable("begin", MrbTokens.keyword_begin, MrbTokens.keyword_begin, mrb_lex_state_enum.EXPR_BEG) },
2474 {"__LINE__", new kwtable("__LINE__", MrbTokens.keyword__LINE__, MrbTokens.keyword__LINE__, mrb_lex_state_enum.EXPR_END) },
2475 {"__FILE__", new kwtable("__FILE__", MrbTokens.keyword__FILE__, MrbTokens.keyword__FILE__, mrb_lex_state_enum.EXPR_END) },
2476 {"__ENCODING__", new kwtable("__ENCODING__", MrbTokens.keyword__ENCODING__, MrbTokens.keyword__ENCODING__, mrb_lex_state_enum.EXPR_END) },
2477 {"END", new kwtable("END", MrbTokens.keyword_END, MrbTokens.keyword_END, mrb_lex_state_enum.EXPR_END) },
2478 {"alias", new kwtable("alias", MrbTokens.keyword_alias, MrbTokens.keyword_alias, mrb_lex_state_enum.EXPR_FNAME) },
2479 {"BEGIN", new kwtable("BEGIN", MrbTokens.keyword_BEGIN, MrbTokens.keyword_BEGIN, mrb_lex_state_enum.EXPR_END) },
2480 {"class", new kwtable("class", MrbTokens.keyword_class, MrbTokens.keyword_class, mrb_lex_state_enum.EXPR_CLASS) },
2481 {"while", new kwtable("while", MrbTokens.keyword_while, MrbTokens.modifier_while, mrb_lex_state_enum.EXPR_VALUE) },
2482 };
2483
2484 kwtable mrb_reserved_word(Uint8Array str, int len)
2485 {
2486 var key = MrbParser.UTF8ArrayToString(str.SubArray(0, len + 1), 0);
2487 kwtable result;
2488
2489 if (wordlist.TryGetValue(key, out result)) {
2490 return result;
2491 }
2492
2493 return null;
2494 }
2495
2496 MrbTokens parser_yylex()
2497 {
2498 int c;
2499 bool space_seen = false;
2500 bool cmd_state;
2501 mrb_lex_state_enum last_state;
2502 int token_column = 0;
2503
2504 if (this.lex_strterm != null) {
2505 if (is_strterm_type(mrb_string_type.STR_FUNC_HEREDOC)) {
2506 if (this.parsing_heredoc != null)
2507 return parse_string();
2508 }
2509 else
2510 return parse_string();
2511 }
2512 cmd_state = this.cmd_start;
2513 this.cmd_start = false;
2514 for (;;) {
2515 last_state = this.lstate;
2516 switch (c = nextc()) {
2517 /* white spaces */
2518 case ' ':
2519 case '\t':
2520 case '\f':
2521 case '\r':
2522 case '\v':
2523 space_seen = true;
2524 continue;
2525
2526 case '\x04': /* ^D */
2527 case '\x1a': /* ^Z */
2528 case '\0': /* NUL */
2529 case -1: /* end of script. */
2530 case '#': /* it's a comment */
2531 case -2: /* end of a file */
2532 case '\n':
2533 if (c == '#') {
2534 skip('\n');
2535 }
2536 else if ((c != -2) && (c != '\n')) {
2537 if (this.heredocs_from_nextline == null)
2538 return 0;
2539 }
2540 heredoc_treat_nextline();
2541 switch (this.lstate) {
2542 case mrb_lex_state_enum.EXPR_BEG:
2543 case mrb_lex_state_enum.EXPR_FNAME:
2544 case mrb_lex_state_enum.EXPR_DOT:
2545 case mrb_lex_state_enum.EXPR_CLASS:
2546 case mrb_lex_state_enum.EXPR_VALUE:
2547 this.lineno++;
2548 this.column = 0;
2549 if (this.parsing_heredoc != null) {
2550 if (this.lex_strterm != null) {
2551 return parse_string();
2552 }
2553 }
2554 continue;
2555 default:
2556 break;
2557 }
2558 if (this.parsing_heredoc != null) {
2559 return (MrbTokens)'\n';
2560 }
2561 bool retry = false;
2562 while ((c = nextc()) != 0) {
2563 switch (c) {
2564 case ' ':
2565 case '\t':
2566 case '\f':
2567 case '\r':
2568 case '\v':
2569 space_seen = true;
2570 continue;
2571 case '.':
2572 if ((c = nextc()) != '.') {
2573 pushback(c);
2574 pushback('.');
2575 retry = true;
2576 }
2577 break;
2578 case -1: /* EOF */
2579 case -2: /* end of a file */
2580 break;
2581 default:
2582 pushback(c);
2583 break;
2584 }
2585 break;
2586 }
2587 if (retry)
2588 continue;
2589 this.cmd_start = true;
2590 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2591 return (MrbTokens)'\n';
2592
2593 case '*':
2594 if ((c = nextc()) == '*') {
2595 if ((c = nextc()) == '=') {
2596 yylval.id = intern("**", 2);
2597 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2598 return MrbTokens.tOP_ASGN;
2599 }
2600 pushback(c);
2601 c = (int)MrbTokens.tPOW;
2602 }
2603 else {
2604 if (c == '=') {
2605 yylval.id = intern_c('*');
2606 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2607 return MrbTokens.tOP_ASGN;
2608 }
2609 pushback(c);
2610 if (IS_SPCARG(c, space_seen)) {
2611 yyWarning("'*' interpreted as argument prefix");
2612 c = (int)MrbTokens.tSTAR;
2613 }
2614 else if (IS_BEG()) {
2615 c = (int)MrbTokens.tSTAR;
2616 }
2617 else {
2618 c = '*';
2619 }
2620 }
2621 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2622 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2623 }
2624 else {
2625 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2626 }
2627 return (MrbTokens)c;
2628
2629 case '!':
2630 c = nextc();
2631 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2632 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2633 if (c == '@') {
2634 return (MrbTokens)'!';
2635 }
2636 }
2637 else {
2638 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2639 }
2640 if (c == '=') {
2641 return MrbTokens.tNEQ;
2642 }
2643 if (c == '~') {
2644 return MrbTokens.tNMATCH;
2645 }
2646 pushback(c);
2647 return (MrbTokens)'!';
2648
2649 case '=':
2650 if (this.column == 1) {
2651 if (peeks(begin, 0)) {
2652 c = peekc_n(begin.Length - 1);
2653 if (c < 0 || ISSPACE(c)) {
2654 do {
2655 if (!skips(end, 0)) {
2656 yyError("embedded document meets end of file");
2657 return 0;
2658 }
2659 c = nextc();
2660 } while (!(c < 0 || ISSPACE(c)));
2661 if (c != '\n') skip('\n');
2662 this.lineno++;
2663 this.column = 0;
2664 continue;
2665 }
2666 }
2667 }
2668 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2669 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2670 }
2671 else {
2672 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2673 }
2674 if ((c = nextc()) == '=') {
2675 if ((c = nextc()) == '=') {
2676 return MrbTokens.tEQQ;
2677 }
2678 pushback(c);
2679 return MrbTokens.tEQ;
2680 }
2681 if (c == '~') {
2682 return MrbTokens.tMATCH;
2683 }
2684 else if (c == '>') {
2685 return MrbTokens.tASSOC;
2686 }
2687 pushback(c);
2688 return (MrbTokens)'=';
2689
2690 case '<':
2691 c = nextc();
2692 if (c == '<' &&
2693 this.lstate != mrb_lex_state_enum.EXPR_DOT &&
2694 this.lstate != mrb_lex_state_enum.EXPR_CLASS &&
2695 !IS_END() &&
2696 (!IS_ARG() || space_seen)) {
2697 MrbTokens token = heredoc_identifier();
2698 if (token != 0)
2699 return token;
2700 }
2701 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2702 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2703 }
2704 else {
2705 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2706 if (this.lstate == mrb_lex_state_enum.EXPR_CLASS) {
2707 this.cmd_start = true;
2708 }
2709 }
2710 if (c == '=') {
2711 if ((c = nextc()) == '>') {
2712 return MrbTokens.tCMP;
2713 }
2714 pushback(c);
2715 return MrbTokens.tLEQ;
2716 }
2717 if (c == '<') {
2718 if ((c = nextc()) == '=') {
2719 yylval.id = intern("<<", 2);
2720 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2721 return MrbTokens.tOP_ASGN;
2722 }
2723 pushback(c);
2724 return MrbTokens.tLSHFT;
2725 }
2726 pushback(c);
2727 return (MrbTokens)'<';
2728
2729 case '>':
2730 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2731 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2732 }
2733 else {
2734 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2735 }
2736 if ((c = nextc()) == '=') {
2737 return MrbTokens.tGEQ;
2738 }
2739 if (c == '>') {
2740 if ((c = nextc()) == '=') {
2741 yylval.id = intern(">>", 2);
2742 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2743 return MrbTokens.tOP_ASGN;
2744 }
2745 pushback(c);
2746 return MrbTokens.tRSHFT;
2747 }
2748 pushback(c);
2749 return (MrbTokens)'>';
2750
2751 case '"':
2752 this.lex_strterm = new_strterm(mrb_string_type.str_dquote, '"', 0);
2753 return MrbTokens.tSTRING_BEG;
2754
2755 case '\'':
2756 this.lex_strterm = new_strterm(mrb_string_type.str_squote, '\'', 0);
2757 return parse_string();
2758
2759 case '`':
2760 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME) {
2761 this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
2762 return (MrbTokens)'`';
2763 }
2764 if (this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2765 if (cmd_state)
2766 this.lstate = mrb_lex_state_enum.EXPR_CMDARG;
2767 else
2768 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2769 return (MrbTokens)'`';
2770 }
2771 this.lex_strterm = new_strterm(mrb_string_type.str_xquote, '`', 0);
2772 return MrbTokens.tXSTRING_BEG;
2773
2774 case '?':
2775 if (IS_END()) {
2776 this.lstate = mrb_lex_state_enum.EXPR_VALUE;
2777 return (MrbTokens)'?';
2778 }
2779 c = nextc();
2780 if (c < 0) {
2781 yyError("incomplete character syntax");
2782 return 0;
2783 }
2784 if (ISSPACE(c)) {
2785 if (!IS_ARG()) {
2786 int c2;
2787 switch (c) {
2788 case ' ':
2789 c2 = 's';
2790 break;
2791 case '\n':
2792 c2 = 'n';
2793 break;
2794 case '\t':
2795 c2 = 't';
2796 break;
2797 case '\v':
2798 c2 = 'v';
2799 break;
2800 case '\r':
2801 c2 = 'r';
2802 break;
2803 case '\f':
2804 c2 = 'f';
2805 break;
2806 default:
2807 c2 = 0;
2808 break;
2809 }
2810 if (c2 != 0) {
2811 yyError(String.Format("invalid character syntax; use ?\\{0}", c2));
2812 }
2813 }
2814
2815 pushback(c);
2816 this.lstate = mrb_lex_state_enum.EXPR_VALUE;
2817 return (MrbTokens)'?';
2818 }
2819 newtok();
2820 /* need support UTF-8 if configured */
2821 if ((isalnum(c) || c == '_')) {
2822 int c2 = nextc();
2823 pushback(c2);
2824 if ((isalnum(c2) || c2 == '_')) {
2825 pushback(c);
2826 this.lstate = mrb_lex_state_enum.EXPR_VALUE;
2827 return (MrbTokens)'?';
2828 }
2829 }
2830 if (c == '\\') {
2831 c = read_escape();
2832 tokadd(c);
2833 }
2834 else {
2835 tokadd(c);
2836 }
2837 tokfix();
2838 yylval.nd = new_str(tok(), toklen());
2839 this.lstate = mrb_lex_state_enum.EXPR_END;
2840 return MrbTokens.tCHAR;
2841
2842 case '&':
2843 if ((c = nextc()) == '&') {
2844 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2845 if ((c = nextc()) == '=') {
2846 yylval.id = intern("&&", 2);
2847 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2848 return MrbTokens.tOP_ASGN;
2849 }
2850 pushback(c);
2851 return MrbTokens.tANDOP;
2852 }
2853 else if (c == '.') {
2854 this.lstate = mrb_lex_state_enum.EXPR_DOT;
2855 return MrbTokens.tANDDOT;
2856 }
2857 else if (c == '=') {
2858 yylval.id = intern_c('&');
2859 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2860 return MrbTokens.tOP_ASGN;
2861 }
2862 pushback(c);
2863 if (IS_SPCARG(c, space_seen)) {
2864 yyWarning("'&' interpreted as argument prefix");
2865 c = (int)MrbTokens.tAMPER;
2866 }
2867 else if (IS_BEG()) {
2868 c = (int)MrbTokens.tAMPER;
2869 }
2870 else {
2871 c = '&';
2872 }
2873 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2874 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2875 }
2876 else {
2877 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2878 }
2879 return (MrbTokens)c;
2880
2881 case '|':
2882 if ((c = nextc()) == '|') {
2883 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2884 if ((c = nextc()) == '=') {
2885 yylval.id = intern("||", 2);
2886 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2887 return MrbTokens.tOP_ASGN;
2888 }
2889 pushback(c);
2890 return MrbTokens.tOROP;
2891 }
2892 if (c == '=') {
2893 yylval.id = intern_c('|');
2894 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2895 return MrbTokens.tOP_ASGN;
2896 }
2897 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2898 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2899 }
2900 else {
2901 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2902 }
2903 pushback(c);
2904 return (MrbTokens)'|';
2905
2906 case '+':
2907 c = nextc();
2908 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2909 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2910 if (c == '@') {
2911 return MrbTokens.tUPLUS;
2912 }
2913 pushback(c);
2914 return (MrbTokens)'+';
2915 }
2916 if (c == '=') {
2917 yylval.id = intern_c('+');
2918 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2919 return MrbTokens.tOP_ASGN;
2920 }
2921 if (IS_BEG() || (IS_SPCARG(c, space_seen) && arg_ambiguous())) {
2922 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2923 pushback(c);
2924 if (c >= 0 && ISDIGIT(c)) {
2925 c = '+';
2926 return start_num(c);
2927 }
2928 return MrbTokens.tUPLUS;
2929 }
2930 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2931 pushback(c);
2932 return (MrbTokens)'+';
2933
2934 case '-':
2935 c = nextc();
2936 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
2937 this.lstate = mrb_lex_state_enum.EXPR_ARG;
2938 if (c == '@') {
2939 return MrbTokens.tUMINUS;
2940 }
2941 pushback(c);
2942 return (MrbTokens)'-';
2943 }
2944 if (c == '=') {
2945 yylval.id = intern_c('-');
2946 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2947 return MrbTokens.tOP_ASGN;
2948 }
2949 if (c == '>') {
2950 this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
2951 return MrbTokens.tLAMBDA;
2952 }
2953 if (IS_BEG() || (IS_SPCARG(c, space_seen) && arg_ambiguous())) {
2954 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2955 pushback(c);
2956 if (c >= 0 && ISDIGIT(c)) {
2957 return MrbTokens.tUMINUS_NUM;
2958 }
2959 return MrbTokens.tUMINUS;
2960 }
2961 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2962 pushback(c);
2963 return (MrbTokens)'-';
2964
2965 case '.':
2966 this.lstate = mrb_lex_state_enum.EXPR_BEG;
2967 if ((c = nextc()) == '.') {
2968 if ((c = nextc()) == '.') {
2969 return MrbTokens.tDOT3;
2970 }
2971 pushback(c);
2972 return MrbTokens.tDOT2;
2973 }
2974 pushback(c);
2975 if (c >= 0 && ISDIGIT(c)) {
2976 yyError("no .<digit> floating literal anymore; put 0 before dot");
2977 }
2978 this.lstate = mrb_lex_state_enum.EXPR_DOT;
2979 return (MrbTokens)'.';
2980
2981 case '0':
2982 case '1':
2983 case '2':
2984 case '3':
2985 case '4':
2986 case '5':
2987 case '6':
2988 case '7':
2989 case '8':
2990 case '9':
2991 return start_num(c);
2992
2993 case ')':
2994 case ']':
2995 case '}':
2996 if (c != '}') {
2997 this.paren_nest--;
2998 }
2999 COND_LEXPOP();
3000 CMDARG_LEXPOP();
3001 if (c == ')')
3002 this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
3003 else
3004 this.lstate = mrb_lex_state_enum.EXPR_ENDARG;
3005 return (MrbTokens)c;
3006
3007 case ':':
3008 c = nextc();
3009 if (c == ':') {
3010 if (IS_BEG() || this.lstate == mrb_lex_state_enum.EXPR_CLASS || IS_SPCARG(-1, space_seen)) {
3011 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3012 return MrbTokens.tCOLON3;
3013 }
3014 this.lstate = mrb_lex_state_enum.EXPR_DOT;
3015 return MrbTokens.tCOLON2;
3016 }
3017 if (IS_END() || ISSPACE(c)) {
3018 pushback(c);
3019 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3020 return (MrbTokens)':';
3021 }
3022 pushback(c);
3023 this.lstate = mrb_lex_state_enum.EXPR_FNAME;
3024 return MrbTokens.tSYMBEG;
3025
3026 case '/':
3027 if (IS_BEG()) {
3028 this.lex_strterm = new_strterm(mrb_string_type.str_regexp, '/', 0);
3029 return MrbTokens.tREGEXP_BEG;
3030 }
3031 if ((c = nextc()) == '=') {
3032 yylval.id = intern_c('/');
3033 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3034 return MrbTokens.tOP_ASGN;
3035 }
3036 pushback(c);
3037 if (IS_SPCARG(c, space_seen)) {
3038 this.lex_strterm = new_strterm(mrb_string_type.str_regexp, '/', 0);
3039 return MrbTokens.tREGEXP_BEG;
3040 }
3041 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
3042 this.lstate = mrb_lex_state_enum.EXPR_ARG;
3043 }
3044 else {
3045 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3046 }
3047 return (MrbTokens)'/';
3048
3049 case '^':
3050 if ((c = nextc()) == '=') {
3051 yylval.id = intern_c('^');
3052 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3053 return MrbTokens.tOP_ASGN;
3054 }
3055 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
3056 this.lstate = mrb_lex_state_enum.EXPR_ARG;
3057 }
3058 else {
3059 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3060 }
3061 pushback(c);
3062 return (MrbTokens)'^';
3063
3064 case ';':
3065 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3066 return (MrbTokens)';';
3067
3068 case ',':
3069 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3070 return (MrbTokens)',';
3071
3072 case '~':
3073 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
3074 if ((c = nextc()) != '@') {
3075 pushback(c);
3076 }
3077 this.lstate = mrb_lex_state_enum.EXPR_ARG;
3078 }
3079 else {
3080 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3081 }
3082 return (MrbTokens)'~';
3083
3084 case '(':
3085 if (IS_BEG()) {
3086 c = (int)MrbTokens.tLPAREN;
3087 }
3088 else if (IS_SPCARG(-1, space_seen)) {
3089 c = (int)MrbTokens.tLPAREN_ARG;
3090 }
3091 this.paren_nest++;
3092 COND_PUSH(0);
3093 CMDARG_PUSH(0);
3094 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3095 return (MrbTokens)c;
3096
3097 case '[':
3098 this.paren_nest++;
3099 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
3100 this.lstate = mrb_lex_state_enum.EXPR_ARG;
3101 if ((c = nextc()) == ']') {
3102 if ((c = nextc()) == '=') {
3103 return MrbTokens.tASET;
3104 }
3105 pushback(c);
3106 return MrbTokens.tAREF;
3107 }
3108 pushback(c);
3109 return (MrbTokens)'[';
3110 }
3111 else if (IS_BEG()) {
3112 c = (int)MrbTokens.tLBRACK;
3113 }
3114 else if (IS_ARG() && space_seen) {
3115 c = (int)MrbTokens.tLBRACK;
3116 }
3117 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3118 COND_PUSH(0);
3119 CMDARG_PUSH(0);
3120 return (MrbTokens)c;
3121
3122 case '{':
3123 if (this.lpar_beg != 0 && this.lpar_beg == this.paren_nest) {
3124 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3125 this.lpar_beg = 0;
3126 this.paren_nest--;
3127 COND_PUSH(0);
3128 CMDARG_PUSH(0);
3129 return MrbTokens.tLAMBEG;
3130 }
3131 if (IS_ARG() || this.lstate == mrb_lex_state_enum.EXPR_END || this.lstate == mrb_lex_state_enum.EXPR_ENDFN)
3132 c = '{'; /* block (primary) */
3133 else if (this.lstate == mrb_lex_state_enum.EXPR_ENDARG)
3134 c = (int)MrbTokens.tLBRACE_ARG; /* block (expr) */
3135 else
3136 c = (int)MrbTokens.tLBRACE; /* hash */
3137 COND_PUSH(0);
3138 CMDARG_PUSH(0);
3139 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3140 return (MrbTokens)c;
3141
3142 case '\\':
3143 c = nextc();
3144 if (c == '\n') {
3145 this.lineno++;
3146 this.column = 0;
3147 space_seen = true;
3148 continue; /* skip \\n */
3149 }
3150 pushback(c);
3151 return (MrbTokens)'\\';
3152
3153 case '%':
3154 if (IS_BEG()) {
3155 c = nextc();
3156 return quotation(c);
3157 }
3158 for (;;) {
3159 if ((c = nextc()) == '=') {
3160 yylval.id = intern_c('%');
3161 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3162 return MrbTokens.tOP_ASGN;
3163 }
3164 if (IS_SPCARG(c, space_seen)) {
3165 return quotation(c);
3166 }
3167 break;
3168 }
3169 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME || this.lstate == mrb_lex_state_enum.EXPR_DOT) {
3170 this.lstate = mrb_lex_state_enum.EXPR_ARG;
3171 }
3172 else {
3173 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3174 }
3175 pushback(c);
3176 return (MrbTokens)'%';
3177
3178 case '$':
3179 this.lstate = mrb_lex_state_enum.EXPR_END;
3180 token_column = newtok();
3181 c = nextc();
3182 if (c < 0) {
3183 yyError("incomplete global variable syntax");
3184 return 0;
3185 }
3186 switch (c) {
3187 case '_': /* $_: last read line string */
3188 case '~': /* $~: match-data */
3189 case '*': /* $*: argv */
3190 case '$': /* $$: pid */
3191 case '?': /* $?: last status */
3192 case '!': /* $!: error string */
3193 case '@': /* $@: error position */
3194 case '/': /* $/: input record separator */
3195 case '\\': /* $\: output record separator */
3196 case ';': /* $;: field separator */
3197 case ',': /* $,: output field separator */
3198 case '.': /* $.: last read line number */
3199 case '=': /* $=: ignorecase */
3200 case ':': /* $:: load path */
3201 case '<': /* $<: reading filename */
3202 case '>': /* $>: default output handle */
3203 case '\"': /* $": already loaded files */
3204 if (c == '_') {
3205 c = nextc();
3206 if (c >= 0 && identchar(c)) { /* if there is more after _ it is a variable */
3207 tokadd('$');
3208 tokadd(c);
3209 break;
3210 }
3211 pushback(c);
3212 c = '_';
3213 }
3214 tokadd('$');
3215 tokadd(c);
3216 tokfix();
3217 yylval.id = intern_cstr(tok());
3218 return MrbTokens.tGVAR;
3219
3220 case '-':
3221 tokadd('$');
3222 tokadd(c);
3223 c = nextc();
3224 pushback(c);
3225 tokfix();
3226 yylval.id = intern_cstr(tok());
3227 return MrbTokens.tGVAR;
3228
3229 case '&': /* $&: last match */
3230 case '`': /* $`: string before last match */
3231 case '\'': /* $': string after last match */
3232 case '+': /* $+: string matches last pattern */
3233 if (last_state == mrb_lex_state_enum.EXPR_FNAME) {
3234 tokadd('$');
3235 tokadd(c);
3236 tokfix();
3237 yylval.id = intern_cstr(tok());
3238 return MrbTokens.tGVAR;
3239 }
3240 yylval.nd = new_back_ref(c);
3241 return MrbTokens.tBACK_REF;
3242
3243 case '1':
3244 case '2':
3245 case '3':
3246 case '4':
3247 case '5':
3248 case '6':
3249 case '7':
3250 case '8':
3251 case '9':
3252 do {
3253 tokadd(c);
3254 c = nextc();
3255 } while (c >= 0 && isdigit(c));
3256 pushback(c);
3257 if (last_state == mrb_lex_state_enum.EXPR_FNAME) {
3258 tokfix();
3259 yylval.id = intern_cstr(tok());
3260 return MrbTokens.tGVAR;
3261 }
3262 tokfix(); {
3263 Uint8Array t;
3264 ulong n = strtoul(tok(), 0, out t, 10);
3265 if (n > int.MaxValue) {
3266 yyError("capture group index must be <= {0}", int.MaxValue.ToString());
3267 return 0;
3268 }
3269 yylval.nd = new_nth_ref((int)n);
3270 }
3271 return MrbTokens.tNTH_REF;
3272
3273 default:
3274 if (!identchar(c)) {
3275 pushback(c);
3276 return (MrbTokens)'$';
3277 }
3278 tokadd('$');
3279 break;
3280
3281 case '0':
3282 tokadd('$');
3283 break;
3284 }
3285 break;
3286
3287 case '@':
3288 c = nextc();
3289 token_column = newtok();
3290 tokadd('@');
3291 if (c == '@') {
3292 tokadd('@');
3293 c = nextc();
3294 }
3295 if (c < 0) {
3296 if (this.tidx == 1) {
3297 yyError("incomplete instance variable syntax");
3298 }
3299 else {
3300 yyError("incomplete class variable syntax");
3301 }
3302 return 0;
3303 }
3304 else if (isdigit(c)) {
3305 if (this.tidx == 1) {
3306 yyError("'@{0}' is not allowed as an instance variable name", ((char)c).ToString());
3307 }
3308 else {
3309 yyError("'@@{0}' is not allowed as a class variable name", ((char)c).ToString());
3310 }
3311 return 0;
3312 }
3313 if (!identchar(c)) {
3314 pushback(c);
3315 return (MrbTokens)'@';
3316 }
3317 break;
3318
3319 case '_':
3320 token_column = newtok();
3321 break;
3322
3323 default:
3324 if (!identchar(c)) {
3325 yyError("Invalid char '\\x{0}' in expression", c.ToString("X2"));
3326 continue;
3327 }
3328
3329 token_column = newtok();
3330 break;
3331 }
3332 break;
3333 }
3334
3335 do {
3336 tokadd(c);
3337 c = nextc();
3338 if (c < 0) break;
3339 } while (identchar(c));
3340 if (token_column == 0 && toklen() == 7 && (c < 0 || c == '\n') &&
3341 strncmp(tok(), 0, MrbParser.UTF8StringToArray("__END__"), 0, toklen()) == 0)
3342 return (MrbTokens)(-1);
3343
3344 switch ((char)tok()[0]) {
3345 case '@':
3346 case '$':
3347 pushback(c);
3348 break;
3349 default:
3350 if ((c == '!' || c == '?') && !peek('=')) {
3351 tokadd(c);
3352 }
3353 else {
3354 pushback(c);
3355 }
3356 break;
3357 }
3358 tokfix();
3359 {
3360 MrbTokens result = 0;
3361
3362 switch ((char)tok()[0]) {
3363 case '$':
3364 this.lstate = mrb_lex_state_enum.EXPR_END;
3365 result = MrbTokens.tGVAR;
3366 break;
3367 case '@':
3368 this.lstate = mrb_lex_state_enum.EXPR_END;
3369 if (tok()[1] == '@')
3370 result = MrbTokens.tCVAR;
3371 else
3372 result = MrbTokens.tIVAR;
3373 break;
3374
3375 default:
3376 if (toklast() == '!' || toklast() == '?') {
3377 result = MrbTokens.tFID;
3378 }
3379 else {
3380 if (this.lstate == mrb_lex_state_enum.EXPR_FNAME) {
3381 if ((c = nextc()) == '=' && !peek('~') && !peek('>') &&
3382 (!peek('=') || (peek_n('>', 1)))) {
3383 result = MrbTokens.tIDENTIFIER;
3384 tokadd(c);
3385 tokfix();
3386 }
3387 else {
3388 pushback(c);
3389 }
3390 }
3391 if (result == 0 && ISUPPER(tok()[0])) {
3392 result = MrbTokens.tCONSTANT;
3393 }
3394 else {
3395 result = MrbTokens.tIDENTIFIER;
3396 }
3397 }
3398
3399 if (IS_LABEL_POSSIBLE(cmd_state)) {
3400 if (IS_LABEL_SUFFIX(0)) {
3401 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3402 nextc();
3403 tokfix();
3404 yylval.id = intern_cstr(tok());
3405 return MrbTokens.tLABEL;
3406 }
3407 }
3408 if (this.lstate != mrb_lex_state_enum.EXPR_DOT) {
3409 kwtable kw;
3410 /* See if it is a reserved word. */
3411 kw = mrb_reserved_word(tok(), toklen());
3412 if (kw != null) {
3413 mrb_lex_state_enum state = this.lstate;
3414 yylval.num = this.lineno;
3415 this.lstate = kw.state;
3416 if (state == mrb_lex_state_enum.EXPR_FNAME) {
3417 yylval.id = intern_cstr(kw.name);
3418 return kw.id0;
3419 }
3420 if (this.lstate == mrb_lex_state_enum.EXPR_BEG) {
3421 this.cmd_start = true;
3422 }
3423 if (kw.id0 == MrbTokens.keyword_do) {
3424 if (this.lpar_beg != 0 && this.lpar_beg == this.paren_nest) {
3425 this.lpar_beg = 0;
3426 this.paren_nest--;
3427 return MrbTokens.keyword_do_LAMBDA;
3428 }
3429 if (COND_P() != 0) return MrbTokens.keyword_do_cond;
3430 if (CMDARG_P() != 0 && state != mrb_lex_state_enum.EXPR_CMDARG)
3431 return MrbTokens.keyword_do_block;
3432 if (state == mrb_lex_state_enum.EXPR_ENDARG || state == mrb_lex_state_enum.EXPR_BEG)
3433 return MrbTokens.keyword_do_block;
3434 return MrbTokens.keyword_do;
3435 }
3436 if (state == mrb_lex_state_enum.EXPR_BEG || state == mrb_lex_state_enum.EXPR_VALUE)
3437 return kw.id0;
3438 else {
3439 if (kw.id0 != kw.id1)
3440 this.lstate = mrb_lex_state_enum.EXPR_BEG;
3441 return kw.id1;
3442 }
3443 }
3444 }
3445
3446 if (IS_BEG() || this.lstate == mrb_lex_state_enum.EXPR_DOT || IS_ARG()) {
3447 if (cmd_state) {
3448 this.lstate = mrb_lex_state_enum.EXPR_CMDARG;
3449 }
3450 else {
3451 this.lstate = mrb_lex_state_enum.EXPR_ARG;
3452 }
3453 }
3454 else if (this.lstate == mrb_lex_state_enum.EXPR_FNAME) {
3455 this.lstate = mrb_lex_state_enum.EXPR_ENDFN;
3456 }
3457 else {
3458 this.lstate = mrb_lex_state_enum.EXPR_END;
3459 }
3460 break;
3461 }
3462 {
3463 mrb_sym ident = intern_cstr(tok());
3464
3465 yylval.id = ident;
3466#if false
3467 if (last_state != mrb_lex_state_enum.EXPR_DOT && islower(tok()[0]) && lvar_defined(ident)) {
3468 this.lstate = mrb_lex_state_enum.EXPR_END;
3469 }
3470#endif
3471 }
3472 return result;
3473 }
3474 }
3475
3476 private void mrb_parser_parse()
3477 {
3478 yylval = new MrbToken(filename);
3479
3480 try {
3481 this.cmd_start = true;
3482 this.in_def = this.in_single = 0;
3483 this.lex_strterm = null;
3484 this.tokbuf = this.buf;
3485 this.tsiz = MRB_PARSER_TOKBUF_SIZE;
3486
3487 yyParse(this, null);
3488 }
3489 catch (Exception) {
3490 yyError("memory allocation error");
3491 this.tree = null;
3492 }
3493 }
3494
3495 public void mrb_parser_set_filename(string f)
3496 {
3497 int i;
3498
3499 this.lineno = (this.filename_table_length > 0) ? 0 : 1;
3500
3501 for (i = 0; i < this.filename_table_length; ++i) {
3502 if (this.filename_table[i] == f) {
3503 this.current_filename_index = i;
3504 return;
3505 }
3506 }
3507
3508 this.current_filename_index = this.filename_table_length + 1;
3509 this.filename_table.Push(f);
3510 }
3511
3512 public void mrb_parse_nstring(string filename, Uint8Array s)
3513 {
3514 mrb_parser_set_filename(filename);
3515 this.s = s;
3516 this.sp = 0;
3517
3518 mrb_parser_parse();
3519 }
3520
3521 public static node parse(string text, string filename = "temporary.rb")
3522 {
3523 var generator = new MrbParser();
3524 generator.mrb_parse_nstring(filename, UTF8StringToArray(text));
3525 var scope = generator.tree as scope_node;
3526 if (scope == null)
3527 return null;
3528 return scope.body;
3529 }
3530
3531 public static node evaluate(node tree)
3532 {
3533 var p = tree.p;
3534
3535 var node = tree as IEvaluatable;
3536 if (node != null)
3537 return tree;
3538
3539 var begin = tree as begin_node;
3540 if (begin != null) {
3541 node[] progs = new node[0];
3542 foreach (var r in begin.progs) {
3543 progs.Push(evaluate(r));
3544 }
3545 if (progs.Length != 1)
3546 return new begin_node(p, progs);
3547 return progs[0];
3548 }
3549
3550 var negate = tree as negate_node;
3551 if (negate != null) {
3552 var n = evaluate(negate.n);
3553 if (n is int_node) {
3554 var a = ((int_node)n).to_i();
3555 var c = UTF8StringToArray((-a).ToString());
3556 return new int_node(p, c, 10);
3557 }
3558 if (n is float_node) {
3559 var a = ((float_node)n).to_f();
3560 var c = UTF8StringToArray((-a).ToString());
3561 return new float_node(p, c);
3562 }
3563 return n;
3564 }
3565
3566 var dot2 = tree as dot2_node;
3567 if (dot2 != null) {
3568 var a = evaluate(dot2.a);
3569 var b = evaluate(dot2.b);
3570 return new dot2_node(p, a, b);
3571 }
3572
3573 var dot3 = tree as dot3_node;
3574 if (dot3 != null) {
3575 var a = evaluate(dot3.a);
3576 var b = evaluate(dot3.b);
3577 return new dot3_node(p, a, b);
3578 }
3579
3580 var call = tree as call_node;
3581 if (call != null) {
3582 var obj = evaluate(call.obj);
3583 var args = new node[0];
3584 foreach (var a in call.args) {
3585 args.Push(evaluate(a));
3586 }
3587
3588 var eva = obj as IEvaluatable;
3589 if (eva != null) {
3590 node ret;
3591 if ((ret = eva.evaluate(p.sym2name(call.method), args)) != null)
3592 return ret;
3593 }
3594 return new call_node(p, obj, call.method, args, call.block);
3595 }
3596
3597 return tree;
3598 }
3599
3600 public string to_ruby()
3601 {
3602 if (tree != null) {
3603 var cond = new ruby_code_cond(filename);
3604 tree.to_ruby(cond);
3605 return cond.ToString();
3606 }
3607 if (s != null)
3608 return UTF8ArrayToString(s, 0);
3609 else
3610 return "";
3611 }
3612 int MrbParser.yyInput.Token { get { return (int)yylval.Kind; } }
3613
3614 object MrbParser.yyInput.Value { get { return yylval.Value; } }
3615
3616 bool MrbParser.yyInput.Advance()
3617 {
3618 var token = parser_yylex();
3619 yylval.SetToken(token, MrbParser.UTF8ArrayToString(tok().SubArray(0, toklen() + 1), 0));
3620
3621 return token > 0;
3622 }
3623
3624 void yyConsoleOut.yyWarning(string message, object[] expected)
3625 {
3626 App.WriteLine($"{filename}({lineno},{column}): warning {String.Format(message, expected)}");
3627 }
3628
3629 void yyConsoleOut.yyError(string message, object[] expected)
3630 {
3631 App.WriteLine($"{filename}({lineno},{column}): error {String.Format(message, expected)}");
3632 }
3633 }
3634}
Note: See TracBrowser for help on using the repository browser.