source: EcnlProtoTool/trunk/onigmo-6.1.3/src/regcomp.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago

prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新
ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新
以下のOSSをアップデート
・mruby-1.3.0
・musl-1.1.18
・onigmo-6.1.3
・tcc-0.9.27
以下のOSSを追加
・openssl-1.1.0e
・curl-7.57.0
・zlib-1.2.11
以下のmrbgemsを追加
・iij/mruby-digest
・iij/mruby-env
・iij/mruby-errno
・iij/mruby-iijson
・iij/mruby-ipaddr
・iij/mruby-mock
・iij/mruby-require
・iij/mruby-tls-openssl

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 155.8 KB
RevLine 
[279]1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
[331]6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
[279]7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
34
35extern OnigCaseFoldType
36onig_get_default_case_fold_flag(void)
37{
38 return OnigDefaultCaseFoldFlag;
39}
40
41extern int
42onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
43{
44 OnigDefaultCaseFoldFlag = case_fold_flag;
45 return 0;
46}
47
48
49#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
50static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
51#endif
52
53#if 0
54static UChar*
55str_dup(UChar* s, UChar* end)
56{
57 ptrdiff_t len = end - s;
58
59 if (len > 0) {
60 UChar* r = (UChar* )xmalloc(len + 1);
61 CHECK_NULL_RETURN(r);
62 xmemcpy(r, s, len);
63 r[len] = (UChar )0;
64 return r;
65 }
66 else return NULL;
67}
68#endif
69
70static void
71swap_node(Node* a, Node* b)
72{
73 Node c;
74 c = *a; *a = *b; *b = c;
75
76 if (NTYPE(a) == NT_STR) {
77 StrNode* sn = NSTR(a);
78 if (sn->capa == 0) {
79 size_t len = sn->end - sn->s;
80 sn->s = sn->buf;
81 sn->end = sn->s + len;
82 }
83 }
84
85 if (NTYPE(b) == NT_STR) {
86 StrNode* sn = NSTR(b);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93}
94
95static OnigDistance
96distance_add(OnigDistance d1, OnigDistance d2)
97{
98 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
99 return ONIG_INFINITE_DISTANCE;
100 else {
101 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
102 else return ONIG_INFINITE_DISTANCE;
103 }
104}
105
106static OnigDistance
107distance_multiply(OnigDistance d, int m)
108{
109 if (m == 0) return 0;
110
111 if (d < ONIG_INFINITE_DISTANCE / m)
112 return d * m;
113 else
114 return ONIG_INFINITE_DISTANCE;
115}
116
117static int
118bitset_is_empty(BitSetRef bs)
119{
120 int i;
121 for (i = 0; i < BITSET_SIZE; i++) {
122 if (bs[i] != 0) return 0;
123 }
124 return 1;
125}
126
127#ifdef ONIG_DEBUG
128static int
129bitset_on_num(BitSetRef bs)
130{
131 int i, n;
132
133 n = 0;
134 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
135 if (BITSET_AT(bs, i)) n++;
136 }
137 return n;
138}
139#endif
140
141extern int
142onig_bbuf_init(BBuf* buf, OnigDistance size)
143{
144 if (size <= 0) {
145 size = 0;
146 buf->p = NULL;
147 }
148 else {
149 buf->p = (UChar* )xmalloc(size);
150 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
151 }
152
153 buf->alloc = (unsigned int )size;
154 buf->used = 0;
155 return 0;
156}
157
158
159#ifdef USE_SUBEXP_CALL
160
161static int
162unset_addr_list_init(UnsetAddrList* uslist, int size)
163{
164 UnsetAddr* p;
165
166 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
167 CHECK_NULL_RETURN_MEMERR(p);
168 uslist->num = 0;
169 uslist->alloc = size;
170 uslist->us = p;
171 return 0;
172}
173
174static void
175unset_addr_list_end(UnsetAddrList* uslist)
176{
177 if (IS_NOT_NULL(uslist->us))
178 xfree(uslist->us);
179}
180
181static int
182unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
183{
184 UnsetAddr* p;
185 int size;
186
187 if (uslist->num >= uslist->alloc) {
188 size = uslist->alloc * 2;
189 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
190 CHECK_NULL_RETURN_MEMERR(p);
191 uslist->alloc = size;
192 uslist->us = p;
193 }
194
195 uslist->us[uslist->num].offset = offset;
196 uslist->us[uslist->num].target = node;
197 uslist->num++;
198 return 0;
199}
200#endif /* USE_SUBEXP_CALL */
201
202
203static int
204add_opcode(regex_t* reg, int opcode)
205{
206 BBUF_ADD1(reg, opcode);
207 return 0;
208}
209
210#ifdef USE_COMBINATION_EXPLOSION_CHECK
211static int
212add_state_check_num(regex_t* reg, int num)
213{
214 StateCheckNumType n = (StateCheckNumType )num;
215
216 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
217 return 0;
218}
219#endif
220
221static int
222add_rel_addr(regex_t* reg, int addr)
223{
224 RelAddrType ra = (RelAddrType )addr;
225
226 BBUF_ADD(reg, &ra, SIZE_RELADDR);
227 return 0;
228}
229
230static int
231add_abs_addr(regex_t* reg, int addr)
232{
233 AbsAddrType ra = (AbsAddrType )addr;
234
235 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
236 return 0;
237}
238
239static int
240add_length(regex_t* reg, OnigDistance len)
241{
242 LengthType l = (LengthType )len;
243
244 BBUF_ADD(reg, &l, SIZE_LENGTH);
245 return 0;
246}
247
248static int
249add_mem_num(regex_t* reg, int num)
250{
251 MemNumType n = (MemNumType )num;
252
253 BBUF_ADD(reg, &n, SIZE_MEMNUM);
254 return 0;
255}
256
[331]257#if 0
[279]258static int
259add_pointer(regex_t* reg, void* addr)
260{
261 PointerType ptr = (PointerType )addr;
262
263 BBUF_ADD(reg, &ptr, SIZE_POINTER);
264 return 0;
265}
[331]266#endif
[279]267
268static int
269add_option(regex_t* reg, OnigOptionType option)
270{
271 BBUF_ADD(reg, &option, SIZE_OPTION);
272 return 0;
273}
274
275static int
276add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
277{
278 int r;
279
280 r = add_opcode(reg, opcode);
281 if (r) return r;
282 r = add_rel_addr(reg, addr);
283 return r;
284}
285
286static int
287add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
288{
289 BBUF_ADD(reg, bytes, len);
290 return 0;
291}
292
293static int
294add_bitset(regex_t* reg, BitSetRef bs)
295{
296 BBUF_ADD(reg, bs, SIZE_BITSET);
297 return 0;
298}
299
300static int
301add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
302{
303 int r;
304
305 r = add_opcode(reg, opcode);
306 if (r) return r;
307 r = add_option(reg, option);
308 return r;
309}
310
311static int compile_length_tree(Node* node, regex_t* reg);
312static int compile_tree(Node* node, regex_t* reg);
313
314
315#define IS_NEED_STR_LEN_OP_EXACT(op) \
316 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
317 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
318
319static int
320select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
321{
322 int op;
323 OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
324
325 if (ignore_case) {
326 switch (str_len) {
327 case 1: op = OP_EXACT1_IC; break;
328 default: op = OP_EXACTN_IC; break;
329 }
330 }
331 else {
332 switch (mb_len) {
333 case 1:
334 switch (str_len) {
335 case 1: op = OP_EXACT1; break;
336 case 2: op = OP_EXACT2; break;
337 case 3: op = OP_EXACT3; break;
338 case 4: op = OP_EXACT4; break;
339 case 5: op = OP_EXACT5; break;
340 default: op = OP_EXACTN; break;
341 }
342 break;
343
344 case 2:
345 switch (str_len) {
346 case 1: op = OP_EXACTMB2N1; break;
347 case 2: op = OP_EXACTMB2N2; break;
348 case 3: op = OP_EXACTMB2N3; break;
349 default: op = OP_EXACTMB2N; break;
350 }
351 break;
352
353 case 3:
354 op = OP_EXACTMB3N;
355 break;
356
357 default:
358 op = OP_EXACTMBN;
359 break;
360 }
361 }
362 return op;
363}
364
365static int
366compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
367{
368 int r;
369 int saved_num_null_check = reg->num_null_check;
370
371 if (empty_info != 0) {
372 r = add_opcode(reg, OP_NULL_CHECK_START);
373 if (r) return r;
374 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
375 if (r) return r;
376 reg->num_null_check++;
377 }
378
379 r = compile_tree(node, reg);
380 if (r) return r;
381
382 if (empty_info != 0) {
383 if (empty_info == NQ_TARGET_IS_EMPTY)
384 r = add_opcode(reg, OP_NULL_CHECK_END);
385 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
386 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
387 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
388 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
389
390 if (r) return r;
391 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
392 }
393 return r;
394}
395
396#ifdef USE_SUBEXP_CALL
397static int
398compile_call(CallNode* node, regex_t* reg)
399{
400 int r;
401
402 r = add_opcode(reg, OP_CALL);
403 if (r) return r;
404 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
405 node->target);
406 if (r) return r;
407 r = add_abs_addr(reg, 0 /*dummy addr.*/);
408 return r;
409}
410#endif
411
412static int
413compile_tree_n_times(Node* node, int n, regex_t* reg)
414{
415 int i, r;
416
417 for (i = 0; i < n; i++) {
418 r = compile_tree(node, reg);
419 if (r) return r;
420 }
421 return 0;
422}
423
424static int
425add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
426 regex_t* reg ARG_UNUSED, int ignore_case)
427{
428 int len;
429 int op = select_str_opcode(mb_len, byte_len, ignore_case);
430
431 len = SIZE_OPCODE;
432
433 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
434 if (IS_NEED_STR_LEN_OP_EXACT(op))
435 len += SIZE_LENGTH;
436
437 len += (int )byte_len;
438 return len;
439}
440
441static int
442add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
443 regex_t* reg, int ignore_case)
444{
445 int op = select_str_opcode(mb_len, byte_len, ignore_case);
446 add_opcode(reg, op);
447
448 if (op == OP_EXACTMBN)
449 add_length(reg, mb_len);
450
451 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
452 if (op == OP_EXACTN_IC)
453 add_length(reg, byte_len);
454 else
455 add_length(reg, byte_len / mb_len);
456 }
457
458 add_bytes(reg, s, byte_len);
459 return 0;
460}
461
462
463static int
464compile_length_string_node(Node* node, regex_t* reg)
465{
466 int rlen, r, len, prev_len, blen, ambig;
467 OnigEncoding enc = reg->enc;
468 UChar *p, *prev;
469 StrNode* sn;
470
471 sn = NSTR(node);
472 if (sn->end <= sn->s)
473 return 0;
474
475 ambig = NSTRING_IS_AMBIG(node);
476
477 p = prev = sn->s;
[331]478 prev_len = enclen(enc, p, sn->end);
[279]479 p += prev_len;
480 blen = prev_len;
481 rlen = 0;
482
483 for (; p < sn->end; ) {
[331]484 len = enclen(enc, p, sn->end);
[279]485 if (len == prev_len || ambig) {
486 blen += len;
487 }
488 else {
489 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
490 rlen += r;
491 prev = p;
492 blen = len;
493 prev_len = len;
494 }
495 p += len;
496 }
497 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
498 rlen += r;
499 return rlen;
500}
501
502static int
503compile_length_string_raw_node(StrNode* sn, regex_t* reg)
504{
505 if (sn->end <= sn->s)
506 return 0;
507
508 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
509}
510
511static int
512compile_string_node(Node* node, regex_t* reg)
513{
514 int r, len, prev_len, blen, ambig;
515 OnigEncoding enc = reg->enc;
516 UChar *p, *prev, *end;
517 StrNode* sn;
518
519 sn = NSTR(node);
520 if (sn->end <= sn->s)
521 return 0;
522
523 end = sn->end;
524 ambig = NSTRING_IS_AMBIG(node);
525
526 p = prev = sn->s;
[331]527 prev_len = enclen(enc, p, end);
[279]528 p += prev_len;
529 blen = prev_len;
530
531 for (; p < end; ) {
[331]532 len = enclen(enc, p, end);
[279]533 if (len == prev_len || ambig) {
534 blen += len;
535 }
536 else {
537 r = add_compile_string(prev, prev_len, blen, reg, ambig);
538 if (r) return r;
539
540 prev = p;
541 blen = len;
542 prev_len = len;
543 }
544
545 p += len;
546 }
547 return add_compile_string(prev, prev_len, blen, reg, ambig);
548}
549
550static int
551compile_string_raw_node(StrNode* sn, regex_t* reg)
552{
553 if (sn->end <= sn->s)
554 return 0;
555
556 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
557}
558
559static int
560add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
561{
562#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
563 add_length(reg, mbuf->used);
564 return add_bytes(reg, mbuf->p, mbuf->used);
565#else
566 int r, pad_size;
567 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
568
569 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
570 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
571 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
572
573 r = add_bytes(reg, mbuf->p, mbuf->used);
574
575 /* padding for return value from compile_length_cclass_node() to be fix. */
576 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
577 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
578 return r;
579#endif
580}
581
582static int
583compile_length_cclass_node(CClassNode* cc, regex_t* reg)
584{
585 int len;
586
587 if (IS_NULL(cc->mbuf)) {
588 len = SIZE_OPCODE + SIZE_BITSET;
589 }
590 else {
591 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
592 len = SIZE_OPCODE;
593 }
594 else {
595 len = SIZE_OPCODE + SIZE_BITSET;
596 }
597#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
598 len += SIZE_LENGTH + cc->mbuf->used;
599#else
600 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
601#endif
602 }
603
604 return len;
605}
606
607static int
608compile_cclass_node(CClassNode* cc, regex_t* reg)
609{
610 int r;
611
612 if (IS_NULL(cc->mbuf)) {
613 if (IS_NCCLASS_NOT(cc))
614 add_opcode(reg, OP_CCLASS_NOT);
615 else
616 add_opcode(reg, OP_CCLASS);
617
618 r = add_bitset(reg, cc->bs);
619 }
620 else {
621 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
622 if (IS_NCCLASS_NOT(cc))
[331]623 add_opcode(reg, OP_CCLASS_MB_NOT);
[279]624 else
[331]625 add_opcode(reg, OP_CCLASS_MB);
[279]626
627 r = add_multi_byte_cclass(cc->mbuf, reg);
628 }
629 else {
630 if (IS_NCCLASS_NOT(cc))
[331]631 add_opcode(reg, OP_CCLASS_MIX_NOT);
[279]632 else
[331]633 add_opcode(reg, OP_CCLASS_MIX);
[279]634
635 r = add_bitset(reg, cc->bs);
636 if (r) return r;
637 r = add_multi_byte_cclass(cc->mbuf, reg);
638 }
639 }
640
641 return r;
642}
643
644static int
645entry_repeat_range(regex_t* reg, int id, int lower, int upper)
646{
647#define REPEAT_RANGE_ALLOC 4
648
649 OnigRepeatRange* p;
650
651 if (reg->repeat_range_alloc == 0) {
652 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
653 CHECK_NULL_RETURN_MEMERR(p);
654 reg->repeat_range = p;
655 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
656 }
657 else if (reg->repeat_range_alloc <= id) {
658 int n;
659 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
660 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
661 sizeof(OnigRepeatRange) * n);
662 CHECK_NULL_RETURN_MEMERR(p);
663 reg->repeat_range = p;
664 reg->repeat_range_alloc = n;
665 }
666 else {
667 p = reg->repeat_range;
668 }
669
670 p[id].lower = lower;
671 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
672 return 0;
673}
674
675static int
676compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
677 regex_t* reg)
678{
679 int r;
680 int num_repeat = reg->num_repeat;
681
682 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
683 if (r) return r;
684 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
685 reg->num_repeat++;
686 if (r) return r;
687 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
688 if (r) return r;
689
690 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
691 if (r) return r;
692
693 r = compile_tree_empty_check(qn->target, reg, empty_info);
694 if (r) return r;
695
696 if (
697#ifdef USE_SUBEXP_CALL
698 reg->num_call > 0 ||
699#endif
700 IS_QUANTIFIER_IN_REPEAT(qn)) {
701 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
702 }
703 else {
704 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
705 }
706 if (r) return r;
707 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
708 return r;
709}
710
711static int
712is_anychar_star_quantifier(QtfrNode* qn)
713{
714 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
715 NTYPE(qn->target) == NT_CANY)
716 return 1;
717 else
718 return 0;
719}
720
721#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
722#define CKN_ON (ckn > 0)
723
724#ifdef USE_COMBINATION_EXPLOSION_CHECK
725
726static int
727compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
728{
729 int len, mod_tlen, cklen;
730 int ckn;
731 int infinite = IS_REPEAT_INFINITE(qn->upper);
732 int empty_info = qn->target_empty_info;
733 int tlen = compile_length_tree(qn->target, reg);
734
735 if (tlen < 0) return tlen;
736
737 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
738
739 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
740
741 /* anychar repeat */
742 if (NTYPE(qn->target) == NT_CANY) {
743 if (qn->greedy && infinite) {
744 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
[331]745 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
[279]746 else
[331]747 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
[279]748 }
749 }
750
751 if (empty_info != 0)
752 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
753 else
754 mod_tlen = tlen;
755
756 if (infinite && qn->lower <= 1) {
757 if (qn->greedy) {
758 if (qn->lower == 1)
759 len = SIZE_OP_JUMP;
760 else
761 len = 0;
762
763 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
764 }
765 else {
766 if (qn->lower == 0)
767 len = SIZE_OP_JUMP;
768 else
769 len = 0;
770
771 len += mod_tlen + SIZE_OP_PUSH + cklen;
772 }
773 }
774 else if (qn->upper == 0) {
775 if (qn->is_refered != 0) /* /(?<n>..){0}/ */
776 len = SIZE_OP_JUMP + tlen;
777 else
778 len = 0;
779 }
780 else if (qn->upper == 1 && qn->greedy) {
781 if (qn->lower == 0) {
782 if (CKN_ON) {
783 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
784 }
785 else {
786 len = SIZE_OP_PUSH + tlen;
787 }
788 }
789 else {
790 len = tlen;
791 }
792 }
793 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
794 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
795 }
796 else {
797 len = SIZE_OP_REPEAT_INC
798 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
799 if (CKN_ON)
800 len += SIZE_OP_STATE_CHECK;
801 }
802
803 return len;
804}
805
806static int
807compile_quantifier_node(QtfrNode* qn, regex_t* reg)
808{
809 int r, mod_tlen;
810 int ckn;
811 int infinite = IS_REPEAT_INFINITE(qn->upper);
812 int empty_info = qn->target_empty_info;
813 int tlen = compile_length_tree(qn->target, reg);
814
815 if (tlen < 0) return tlen;
816
817 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
818
819 if (is_anychar_star_quantifier(qn)) {
820 r = compile_tree_n_times(qn->target, qn->lower, reg);
821 if (r) return r;
822 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
823 if (IS_MULTILINE(reg->options))
824 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
825 else
826 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
827 if (r) return r;
828 if (CKN_ON) {
829 r = add_state_check_num(reg, ckn);
830 if (r) return r;
831 }
832
833 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
834 }
835 else {
836 if (IS_MULTILINE(reg->options)) {
837 r = add_opcode(reg, (CKN_ON ?
838 OP_STATE_CHECK_ANYCHAR_ML_STAR
839 : OP_ANYCHAR_ML_STAR));
840 }
841 else {
842 r = add_opcode(reg, (CKN_ON ?
843 OP_STATE_CHECK_ANYCHAR_STAR
844 : OP_ANYCHAR_STAR));
845 }
846 if (r) return r;
847 if (CKN_ON)
848 r = add_state_check_num(reg, ckn);
849
850 return r;
851 }
852 }
853
854 if (empty_info != 0)
855 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
856 else
857 mod_tlen = tlen;
858
859 if (infinite && qn->lower <= 1) {
860 if (qn->greedy) {
861 if (qn->lower == 1) {
862 r = add_opcode_rel_addr(reg, OP_JUMP,
863 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
864 if (r) return r;
865 }
866
867 if (CKN_ON) {
868 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
869 if (r) return r;
870 r = add_state_check_num(reg, ckn);
871 if (r) return r;
872 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
873 }
874 else {
875 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
876 }
877 if (r) return r;
878 r = compile_tree_empty_check(qn->target, reg, empty_info);
879 if (r) return r;
880 r = add_opcode_rel_addr(reg, OP_JUMP,
881 -(mod_tlen + (int )SIZE_OP_JUMP
882 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
883 }
884 else {
885 if (qn->lower == 0) {
886 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
887 if (r) return r;
888 }
889 r = compile_tree_empty_check(qn->target, reg, empty_info);
890 if (r) return r;
891 if (CKN_ON) {
892 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
893 if (r) return r;
894 r = add_state_check_num(reg, ckn);
895 if (r) return r;
896 r = add_rel_addr(reg,
897 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
898 }
899 else
900 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
901 }
902 }
903 else if (qn->upper == 0) {
904 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
905 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
906 if (r) return r;
907 r = compile_tree(qn->target, reg);
908 }
909 else
910 r = 0;
911 }
912 else if (qn->upper == 1 && qn->greedy) {
913 if (qn->lower == 0) {
914 if (CKN_ON) {
915 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
916 if (r) return r;
917 r = add_state_check_num(reg, ckn);
918 if (r) return r;
919 r = add_rel_addr(reg, tlen);
920 }
921 else {
922 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
923 }
924 if (r) return r;
925 }
926
927 r = compile_tree(qn->target, reg);
928 }
929 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
930 if (CKN_ON) {
931 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
932 if (r) return r;
933 r = add_state_check_num(reg, ckn);
934 if (r) return r;
935 r = add_rel_addr(reg, SIZE_OP_JUMP);
936 }
937 else {
938 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
939 }
940
941 if (r) return r;
942 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
943 if (r) return r;
944 r = compile_tree(qn->target, reg);
945 }
946 else {
947 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
948 if (CKN_ON) {
949 if (r) return r;
950 r = add_opcode(reg, OP_STATE_CHECK);
951 if (r) return r;
952 r = add_state_check_num(reg, ckn);
953 }
954 }
955 return r;
956}
957
958#else /* USE_COMBINATION_EXPLOSION_CHECK */
959
960static int
961compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
962{
963 int len, mod_tlen;
964 int infinite = IS_REPEAT_INFINITE(qn->upper);
965 int empty_info = qn->target_empty_info;
966 int tlen = compile_length_tree(qn->target, reg);
967
968 if (tlen < 0) return tlen;
969
970 /* anychar repeat */
971 if (NTYPE(qn->target) == NT_CANY) {
972 if (qn->greedy && infinite) {
973 if (IS_NOT_NULL(qn->next_head_exact))
[331]974 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
[279]975 else
[331]976 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
[279]977 }
978 }
979
980 if (empty_info != 0)
981 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
982 else
983 mod_tlen = tlen;
984
985 if (infinite &&
986 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
987 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
988 len = SIZE_OP_JUMP;
989 }
990 else {
991 len = tlen * qn->lower;
992 }
993
994 if (qn->greedy) {
[331]995#ifdef USE_OP_PUSH_OR_JUMP_EXACT
[279]996 if (IS_NOT_NULL(qn->head_exact))
997 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
[331]998 else
999#endif
1000 if (IS_NOT_NULL(qn->next_head_exact))
[279]1001 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1002 else
1003 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1004 }
1005 else
1006 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1007 }
1008 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1009 len = SIZE_OP_JUMP + tlen;
1010 }
1011 else if (!infinite && qn->greedy &&
1012 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1013 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1014 len = tlen * qn->lower;
1015 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1016 }
1017 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1018 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1019 }
1020 else {
1021 len = SIZE_OP_REPEAT_INC
1022 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1023 }
1024
1025 return len;
1026}
1027
1028static int
1029compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1030{
1031 int i, r, mod_tlen;
1032 int infinite = IS_REPEAT_INFINITE(qn->upper);
1033 int empty_info = qn->target_empty_info;
1034 int tlen = compile_length_tree(qn->target, reg);
1035
1036 if (tlen < 0) return tlen;
1037
1038 if (is_anychar_star_quantifier(qn)) {
1039 r = compile_tree_n_times(qn->target, qn->lower, reg);
1040 if (r) return r;
1041 if (IS_NOT_NULL(qn->next_head_exact)) {
1042 if (IS_MULTILINE(reg->options))
1043 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1044 else
1045 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1046 if (r) return r;
1047 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1048 }
1049 else {
1050 if (IS_MULTILINE(reg->options))
1051 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1052 else
1053 return add_opcode(reg, OP_ANYCHAR_STAR);
1054 }
1055 }
1056
1057 if (empty_info != 0)
1058 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1059 else
1060 mod_tlen = tlen;
1061
1062 if (infinite &&
1063 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1064 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1065 if (qn->greedy) {
[331]1066#ifdef USE_OP_PUSH_OR_JUMP_EXACT
[279]1067 if (IS_NOT_NULL(qn->head_exact))
1068 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
[331]1069 else
1070#endif
1071 if (IS_NOT_NULL(qn->next_head_exact))
[279]1072 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1073 else
1074 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1075 }
1076 else {
1077 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1078 }
1079 if (r) return r;
1080 }
1081 else {
1082 r = compile_tree_n_times(qn->target, qn->lower, reg);
1083 if (r) return r;
1084 }
1085
1086 if (qn->greedy) {
[331]1087#ifdef USE_OP_PUSH_OR_JUMP_EXACT
[279]1088 if (IS_NOT_NULL(qn->head_exact)) {
1089 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1090 mod_tlen + SIZE_OP_JUMP);
1091 if (r) return r;
1092 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1093 r = compile_tree_empty_check(qn->target, reg, empty_info);
1094 if (r) return r;
1095 r = add_opcode_rel_addr(reg, OP_JUMP,
1096 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1097 }
[331]1098 else
1099#endif
1100 if (IS_NOT_NULL(qn->next_head_exact)) {
[279]1101 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1102 mod_tlen + SIZE_OP_JUMP);
1103 if (r) return r;
1104 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1105 r = compile_tree_empty_check(qn->target, reg, empty_info);
1106 if (r) return r;
1107 r = add_opcode_rel_addr(reg, OP_JUMP,
1108 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1109 }
1110 else {
1111 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1112 if (r) return r;
1113 r = compile_tree_empty_check(qn->target, reg, empty_info);
1114 if (r) return r;
1115 r = add_opcode_rel_addr(reg, OP_JUMP,
1116 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1117 }
1118 }
1119 else {
1120 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1121 if (r) return r;
1122 r = compile_tree_empty_check(qn->target, reg, empty_info);
1123 if (r) return r;
1124 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1125 }
1126 }
1127 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1128 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1129 if (r) return r;
1130 r = compile_tree(qn->target, reg);
1131 }
1132 else if (!infinite && qn->greedy &&
1133 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1134 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1135 int n = qn->upper - qn->lower;
1136
1137 r = compile_tree_n_times(qn->target, qn->lower, reg);
1138 if (r) return r;
1139
1140 for (i = 0; i < n; i++) {
1141 r = add_opcode_rel_addr(reg, OP_PUSH,
1142 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1143 if (r) return r;
1144 r = compile_tree(qn->target, reg);
1145 if (r) return r;
1146 }
1147 }
1148 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1149 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1150 if (r) return r;
1151 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1152 if (r) return r;
1153 r = compile_tree(qn->target, reg);
1154 }
1155 else {
1156 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1157 }
1158 return r;
1159}
1160#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1161
1162static int
1163compile_length_option_node(EncloseNode* node, regex_t* reg)
1164{
1165 int tlen;
1166 OnigOptionType prev = reg->options;
1167
1168 reg->options = node->option;
1169 tlen = compile_length_tree(node->target, reg);
1170 reg->options = prev;
1171
1172 if (tlen < 0) return tlen;
1173
1174 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1175 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1176 + tlen + SIZE_OP_SET_OPTION;
1177 }
1178 else
1179 return tlen;
1180}
1181
1182static int
1183compile_option_node(EncloseNode* node, regex_t* reg)
1184{
1185 int r;
1186 OnigOptionType prev = reg->options;
1187
1188 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1189 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1190 if (r) return r;
1191 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1192 if (r) return r;
1193 r = add_opcode(reg, OP_FAIL);
1194 if (r) return r;
1195 }
1196
1197 reg->options = node->option;
1198 r = compile_tree(node->target, reg);
1199 reg->options = prev;
1200
1201 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1202 if (r) return r;
1203 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1204 }
1205 return r;
1206}
1207
1208static int
1209compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1210{
1211 int len;
1212 int tlen;
1213
1214 if (node->type == ENCLOSE_OPTION)
1215 return compile_length_option_node(node, reg);
1216
1217 if (node->target) {
1218 tlen = compile_length_tree(node->target, reg);
1219 if (tlen < 0) return tlen;
1220 }
1221 else
1222 tlen = 0;
1223
1224 switch (node->type) {
1225 case ENCLOSE_MEMORY:
1226#ifdef USE_SUBEXP_CALL
1227 if (IS_ENCLOSE_CALLED(node)) {
1228 len = SIZE_OP_MEMORY_START_PUSH + tlen
1229 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1230 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1231 len += (IS_ENCLOSE_RECURSION(node)
1232 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1233 else
1234 len += (IS_ENCLOSE_RECURSION(node)
1235 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1236 }
[331]1237 else if (IS_ENCLOSE_RECURSION(node)) {
1238 len = SIZE_OP_MEMORY_START_PUSH;
1239 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1240 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
1241 }
[279]1242 else
1243#endif
1244 {
1245 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1246 len = SIZE_OP_MEMORY_START_PUSH;
1247 else
1248 len = SIZE_OP_MEMORY_START;
1249
1250 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1251 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1252 }
1253 break;
1254
1255 case ENCLOSE_STOP_BACKTRACK:
1256 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1257 QtfrNode* qn = NQTFR(node->target);
1258 tlen = compile_length_tree(qn->target, reg);
1259 if (tlen < 0) return tlen;
1260
1261 len = tlen * qn->lower
1262 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1263 }
1264 else {
1265 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1266 }
1267 break;
1268
1269 case ENCLOSE_CONDITION:
1270 len = SIZE_OP_CONDITION;
1271 if (NTYPE(node->target) == NT_ALT) {
1272 Node* x = node->target;
1273
1274 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1275 if (tlen < 0) return tlen;
1276 len += tlen + SIZE_OP_JUMP;
1277 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1278 x = NCDR(x);
1279 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1280 if (tlen < 0) return tlen;
1281 len += tlen;
1282 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1283 }
1284 else {
1285 return ONIGERR_PARSER_BUG;
1286 }
1287 break;
1288
[331]1289 case ENCLOSE_ABSENT:
1290 len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
1291 break;
1292
[279]1293 default:
1294 return ONIGERR_TYPE_BUG;
1295 break;
1296 }
1297
1298 return len;
1299}
1300
1301static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1302
1303static int
1304compile_enclose_node(EncloseNode* node, regex_t* reg)
1305{
1306 int r, len;
1307
1308 if (node->type == ENCLOSE_OPTION)
1309 return compile_option_node(node, reg);
1310
1311 switch (node->type) {
1312 case ENCLOSE_MEMORY:
1313#ifdef USE_SUBEXP_CALL
1314 if (IS_ENCLOSE_CALLED(node)) {
1315 r = add_opcode(reg, OP_CALL);
1316 if (r) return r;
1317 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1318 node->state |= NST_ADDR_FIXED;
1319 r = add_abs_addr(reg, (int )node->call_addr);
1320 if (r) return r;
1321 len = compile_length_tree(node->target, reg);
1322 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1323 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1324 len += (IS_ENCLOSE_RECURSION(node)
1325 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1326 else
1327 len += (IS_ENCLOSE_RECURSION(node)
1328 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1329
1330 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1331 if (r) return r;
1332 }
1333#endif
1334 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1335 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1336 else
1337 r = add_opcode(reg, OP_MEMORY_START);
1338 if (r) return r;
1339 r = add_mem_num(reg, node->regnum);
1340 if (r) return r;
1341 r = compile_tree(node->target, reg);
1342 if (r) return r;
1343#ifdef USE_SUBEXP_CALL
1344 if (IS_ENCLOSE_CALLED(node)) {
1345 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1346 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1347 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1348 else
1349 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1350 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1351
1352 if (r) return r;
1353 r = add_mem_num(reg, node->regnum);
1354 if (r) return r;
1355 r = add_opcode(reg, OP_RETURN);
1356 }
[331]1357 else if (IS_ENCLOSE_RECURSION(node)) {
1358 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1359 r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
1360 else
1361 r = add_opcode(reg, OP_MEMORY_END_REC);
1362 if (r) return r;
1363 r = add_mem_num(reg, node->regnum);
1364 }
[279]1365 else
1366#endif
1367 {
1368 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1369 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1370 else
1371 r = add_opcode(reg, OP_MEMORY_END);
1372 if (r) return r;
1373 r = add_mem_num(reg, node->regnum);
1374 }
1375 break;
1376
1377 case ENCLOSE_STOP_BACKTRACK:
1378 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1379 QtfrNode* qn = NQTFR(node->target);
1380 r = compile_tree_n_times(qn->target, qn->lower, reg);
1381 if (r) return r;
1382
1383 len = compile_length_tree(qn->target, reg);
1384 if (len < 0) return len;
1385
1386 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1387 if (r) return r;
1388 r = compile_tree(qn->target, reg);
1389 if (r) return r;
1390 r = add_opcode(reg, OP_POP);
1391 if (r) return r;
1392 r = add_opcode_rel_addr(reg, OP_JUMP,
1393 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1394 }
1395 else {
1396 r = add_opcode(reg, OP_PUSH_STOP_BT);
1397 if (r) return r;
1398 r = compile_tree(node->target, reg);
1399 if (r) return r;
1400 r = add_opcode(reg, OP_POP_STOP_BT);
1401 }
1402 break;
1403
1404 case ENCLOSE_CONDITION:
1405 r = add_opcode(reg, OP_CONDITION);
1406 if (r) return r;
1407 r = add_mem_num(reg, node->regnum);
1408 if (r) return r;
1409
1410 if (NTYPE(node->target) == NT_ALT) {
1411 Node* x = node->target;
1412 int len2;
1413
1414 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1415 if (len < 0) return len;
1416 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1417 x = NCDR(x);
1418 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1419 if (len2 < 0) return len2;
1420 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1421
1422 x = node->target;
1423 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1424 if (r) return r;
1425 r = compile_tree(NCAR(x), reg); /* yes-node */
1426 if (r) return r;
1427 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1428 if (r) return r;
1429 x = NCDR(x);
1430 r = compile_tree(NCAR(x), reg); /* no-node */
1431 }
1432 else {
1433 return ONIGERR_PARSER_BUG;
1434 }
1435 break;
1436
[331]1437 case ENCLOSE_ABSENT:
1438 len = compile_length_tree(node->target, reg);
1439 if (len < 0) return len;
1440
1441 r = add_opcode(reg, OP_PUSH_ABSENT_POS);
1442 if (r) return r;
1443 r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
1444 if (r) return r;
1445 r = compile_tree(node->target, reg);
1446 if (r) return r;
1447 r = add_opcode(reg, OP_ABSENT_END);
1448 break;
1449
[279]1450 default:
1451 return ONIGERR_TYPE_BUG;
1452 break;
1453 }
1454
1455 return r;
1456}
1457
1458static int
1459compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1460{
1461 int len;
1462 int tlen = 0;
1463
1464 if (node->target) {
1465 tlen = compile_length_tree(node->target, reg);
1466 if (tlen < 0) return tlen;
1467 }
1468
1469 switch (node->type) {
1470 case ANCHOR_PREC_READ:
1471 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1472 break;
1473 case ANCHOR_PREC_READ_NOT:
1474 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1475 break;
1476 case ANCHOR_LOOK_BEHIND:
1477 len = SIZE_OP_LOOK_BEHIND + tlen;
1478 break;
1479 case ANCHOR_LOOK_BEHIND_NOT:
1480 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1481 break;
1482
1483 default:
1484 len = SIZE_OPCODE;
1485 break;
1486 }
1487
1488 return len;
1489}
1490
1491static int
1492compile_anchor_node(AnchorNode* node, regex_t* reg)
1493{
1494 int r, len;
1495
1496 switch (node->type) {
1497 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1498 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1499 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1500 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1501 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1502 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1503
1504 case ANCHOR_WORD_BOUND:
1505 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1506 else r = add_opcode(reg, OP_WORD_BOUND);
1507 break;
1508 case ANCHOR_NOT_WORD_BOUND:
1509 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1510 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1511 break;
1512#ifdef USE_WORD_BEGIN_END
1513 case ANCHOR_WORD_BEGIN:
1514 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1515 else r = add_opcode(reg, OP_WORD_BEGIN);
1516 break;
1517 case ANCHOR_WORD_END:
1518 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1519 else r = add_opcode(reg, OP_WORD_END);
1520 break;
1521#endif
1522 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1523
1524 case ANCHOR_PREC_READ:
1525 r = add_opcode(reg, OP_PUSH_POS);
1526 if (r) return r;
1527 r = compile_tree(node->target, reg);
1528 if (r) return r;
1529 r = add_opcode(reg, OP_POP_POS);
1530 break;
1531
1532 case ANCHOR_PREC_READ_NOT:
1533 len = compile_length_tree(node->target, reg);
1534 if (len < 0) return len;
1535 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1536 if (r) return r;
1537 r = compile_tree(node->target, reg);
1538 if (r) return r;
1539 r = add_opcode(reg, OP_FAIL_POS);
1540 break;
1541
1542 case ANCHOR_LOOK_BEHIND:
1543 {
1544 int n;
1545 r = add_opcode(reg, OP_LOOK_BEHIND);
1546 if (r) return r;
1547 if (node->char_len < 0) {
1548 r = get_char_length_tree(node->target, reg, &n);
1549 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1550 }
1551 else
1552 n = node->char_len;
1553 r = add_length(reg, n);
1554 if (r) return r;
1555 r = compile_tree(node->target, reg);
1556 }
1557 break;
1558
1559 case ANCHOR_LOOK_BEHIND_NOT:
1560 {
1561 int n;
1562 len = compile_length_tree(node->target, reg);
1563 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1564 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1565 if (r) return r;
1566 if (node->char_len < 0) {
1567 r = get_char_length_tree(node->target, reg, &n);
1568 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1569 }
1570 else
1571 n = node->char_len;
1572 r = add_length(reg, n);
1573 if (r) return r;
1574 r = compile_tree(node->target, reg);
1575 if (r) return r;
1576 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1577 }
1578 break;
1579
1580 default:
1581 return ONIGERR_TYPE_BUG;
1582 break;
1583 }
1584
1585 return r;
1586}
1587
1588static int
1589compile_length_tree(Node* node, regex_t* reg)
1590{
1591 int len, type, r;
1592
1593 type = NTYPE(node);
1594 switch (type) {
1595 case NT_LIST:
1596 len = 0;
1597 do {
1598 r = compile_length_tree(NCAR(node), reg);
1599 if (r < 0) return r;
1600 len += r;
1601 } while (IS_NOT_NULL(node = NCDR(node)));
1602 r = len;
1603 break;
1604
1605 case NT_ALT:
1606 {
1607 int n = 0;
1608 len = 0;
1609 do {
[331]1610 r = compile_length_tree(NCAR(node), reg);
1611 if (r < 0) return r;
1612 len += r;
1613 n++;
[279]1614 } while (IS_NOT_NULL(node = NCDR(node)));
1615 r = len;
1616 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1617 }
1618 break;
1619
1620 case NT_STR:
1621 if (NSTRING_IS_RAW(node))
1622 r = compile_length_string_raw_node(NSTR(node), reg);
1623 else
1624 r = compile_length_string_node(node, reg);
1625 break;
1626
1627 case NT_CCLASS:
1628 r = compile_length_cclass_node(NCCLASS(node), reg);
1629 break;
1630
1631 case NT_CTYPE:
1632 case NT_CANY:
1633 r = SIZE_OPCODE;
1634 break;
1635
1636 case NT_BREF:
1637 {
1638 BRefNode* br = NBREF(node);
1639
1640#ifdef USE_BACKREF_WITH_LEVEL
1641 if (IS_BACKREF_NEST_LEVEL(br)) {
[331]1642 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
[279]1643 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1644 }
1645 else
1646#endif
1647 if (br->back_num == 1) {
1648 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1649 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1650 }
1651 else {
1652 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1653 }
1654 }
1655 break;
1656
1657#ifdef USE_SUBEXP_CALL
1658 case NT_CALL:
1659 r = SIZE_OP_CALL;
1660 break;
1661#endif
1662
1663 case NT_QTFR:
1664 r = compile_length_quantifier_node(NQTFR(node), reg);
1665 break;
1666
1667 case NT_ENCLOSE:
1668 r = compile_length_enclose_node(NENCLOSE(node), reg);
1669 break;
1670
1671 case NT_ANCHOR:
1672 r = compile_length_anchor_node(NANCHOR(node), reg);
1673 break;
1674
1675 default:
1676 return ONIGERR_TYPE_BUG;
1677 break;
1678 }
1679
1680 return r;
1681}
1682
1683static int
1684compile_tree(Node* node, regex_t* reg)
1685{
1686 int n, type, len, pos, r = 0;
1687
1688 type = NTYPE(node);
1689 switch (type) {
1690 case NT_LIST:
1691 do {
1692 r = compile_tree(NCAR(node), reg);
1693 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1694 break;
1695
1696 case NT_ALT:
1697 {
1698 Node* x = node;
1699 len = 0;
1700 do {
1701 len += compile_length_tree(NCAR(x), reg);
1702 if (NCDR(x) != NULL) {
1703 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1704 }
1705 } while (IS_NOT_NULL(x = NCDR(x)));
1706 pos = reg->used + len; /* goal position */
1707
1708 do {
1709 len = compile_length_tree(NCAR(node), reg);
1710 if (IS_NOT_NULL(NCDR(node))) {
1711 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1712 if (r) break;
1713 }
1714 r = compile_tree(NCAR(node), reg);
1715 if (r) break;
1716 if (IS_NOT_NULL(NCDR(node))) {
1717 len = pos - (reg->used + SIZE_OP_JUMP);
1718 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1719 if (r) break;
1720 }
1721 } while (IS_NOT_NULL(node = NCDR(node)));
1722 }
1723 break;
1724
1725 case NT_STR:
1726 if (NSTRING_IS_RAW(node))
1727 r = compile_string_raw_node(NSTR(node), reg);
1728 else
1729 r = compile_string_node(node, reg);
1730 break;
1731
1732 case NT_CCLASS:
1733 r = compile_cclass_node(NCCLASS(node), reg);
1734 break;
1735
1736 case NT_CTYPE:
1737 {
1738 int op;
1739
1740 switch (NCTYPE(node)->ctype) {
1741 case ONIGENC_CTYPE_WORD:
1742 if (NCTYPE(node)->ascii_range != 0) {
1743 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1744 else op = OP_ASCII_WORD;
1745 }
1746 else {
1747 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1748 else op = OP_WORD;
1749 }
1750 break;
1751 default:
1752 return ONIGERR_TYPE_BUG;
1753 break;
1754 }
1755 r = add_opcode(reg, op);
1756 }
1757 break;
1758
1759 case NT_CANY:
1760 if (IS_MULTILINE(reg->options))
1761 r = add_opcode(reg, OP_ANYCHAR_ML);
1762 else
1763 r = add_opcode(reg, OP_ANYCHAR);
1764 break;
1765
1766 case NT_BREF:
1767 {
1768 BRefNode* br = NBREF(node);
1769
1770#ifdef USE_BACKREF_WITH_LEVEL
1771 if (IS_BACKREF_NEST_LEVEL(br)) {
1772 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1773 if (r) return r;
1774 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1775 if (r) return r;
1776 r = add_length(reg, br->nest_level);
1777 if (r) return r;
1778
1779 goto add_bacref_mems;
1780 }
1781 else
1782#endif
1783 if (br->back_num == 1) {
1784 n = br->back_static[0];
1785 if (IS_IGNORECASE(reg->options)) {
1786 r = add_opcode(reg, OP_BACKREFN_IC);
1787 if (r) return r;
1788 r = add_mem_num(reg, n);
1789 }
1790 else {
1791 switch (n) {
1792 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1793 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1794 default:
1795 r = add_opcode(reg, OP_BACKREFN);
1796 if (r) return r;
1797 r = add_mem_num(reg, n);
1798 break;
1799 }
1800 }
1801 }
1802 else {
1803 int i;
1804 int* p;
1805
[331]1806 if (IS_IGNORECASE(reg->options)) {
1807 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1808 }
1809 else {
1810 r = add_opcode(reg, OP_BACKREF_MULTI);
1811 }
[279]1812 if (r) return r;
1813
1814#ifdef USE_BACKREF_WITH_LEVEL
1815 add_bacref_mems:
1816#endif
1817 r = add_length(reg, br->back_num);
1818 if (r) return r;
1819 p = BACKREFS_P(br);
1820 for (i = br->back_num - 1; i >= 0; i--) {
1821 r = add_mem_num(reg, p[i]);
1822 if (r) return r;
1823 }
1824 }
1825 }
1826 break;
1827
1828#ifdef USE_SUBEXP_CALL
1829 case NT_CALL:
1830 r = compile_call(NCALL(node), reg);
1831 break;
1832#endif
1833
1834 case NT_QTFR:
1835 r = compile_quantifier_node(NQTFR(node), reg);
1836 break;
1837
1838 case NT_ENCLOSE:
1839 r = compile_enclose_node(NENCLOSE(node), reg);
1840 break;
1841
1842 case NT_ANCHOR:
1843 r = compile_anchor_node(NANCHOR(node), reg);
1844 break;
1845
1846 default:
1847#ifdef ONIG_DEBUG
1848 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1849#endif
1850 break;
1851 }
1852
1853 return r;
1854}
1855
1856#ifdef USE_NAMED_GROUP
1857
1858static int
1859noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1860{
1861 int r = 0;
1862 Node* node = *plink;
1863
1864 switch (NTYPE(node)) {
1865 case NT_LIST:
1866 case NT_ALT:
1867 do {
1868 r = noname_disable_map(&(NCAR(node)), map, counter);
1869 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1870 break;
1871
1872 case NT_QTFR:
1873 {
1874 Node** ptarget = &(NQTFR(node)->target);
1875 Node* old = *ptarget;
1876 r = noname_disable_map(ptarget, map, counter);
1877 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1878 onig_reduce_nested_quantifier(node, *ptarget);
1879 }
1880 }
1881 break;
1882
1883 case NT_ENCLOSE:
1884 {
1885 EncloseNode* en = NENCLOSE(node);
1886 if (en->type == ENCLOSE_MEMORY) {
1887 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1888 (*counter)++;
1889 map[en->regnum].new_val = *counter;
1890 en->regnum = *counter;
1891 }
1892 else if (en->regnum != 0) {
1893 *plink = en->target;
1894 en->target = NULL_NODE;
1895 onig_node_free(node);
1896 r = noname_disable_map(plink, map, counter);
1897 break;
1898 }
1899 }
1900 r = noname_disable_map(&(en->target), map, counter);
1901 }
1902 break;
1903
1904 case NT_ANCHOR:
[331]1905 if (NANCHOR(node)->target)
1906 r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
[279]1907 break;
1908
1909 default:
1910 break;
1911 }
1912
1913 return r;
1914}
1915
1916static int
1917renumber_node_backref(Node* node, GroupNumRemap* map)
1918{
1919 int i, pos, n, old_num;
1920 int *backs;
1921 BRefNode* bn = NBREF(node);
1922
1923 if (! IS_BACKREF_NAME_REF(bn))
1924 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1925
1926 old_num = bn->back_num;
1927 if (IS_NULL(bn->back_dynamic))
1928 backs = bn->back_static;
1929 else
1930 backs = bn->back_dynamic;
1931
1932 for (i = 0, pos = 0; i < old_num; i++) {
1933 n = map[backs[i]].new_val;
1934 if (n > 0) {
1935 backs[pos] = n;
1936 pos++;
1937 }
1938 }
1939
1940 bn->back_num = pos;
1941 return 0;
1942}
1943
1944static int
1945renumber_by_map(Node* node, GroupNumRemap* map)
1946{
1947 int r = 0;
1948
1949 switch (NTYPE(node)) {
1950 case NT_LIST:
1951 case NT_ALT:
1952 do {
1953 r = renumber_by_map(NCAR(node), map);
1954 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1955 break;
1956 case NT_QTFR:
1957 r = renumber_by_map(NQTFR(node)->target, map);
1958 break;
1959 case NT_ENCLOSE:
1960 {
1961 EncloseNode* en = NENCLOSE(node);
1962 if (en->type == ENCLOSE_CONDITION)
[331]1963 en->regnum = map[en->regnum].new_val;
[279]1964 r = renumber_by_map(en->target, map);
1965 }
1966 break;
1967
1968 case NT_BREF:
1969 r = renumber_node_backref(node, map);
1970 break;
1971
1972 case NT_ANCHOR:
[331]1973 if (NANCHOR(node)->target)
1974 r = renumber_by_map(NANCHOR(node)->target, map);
[279]1975 break;
1976
1977 default:
1978 break;
1979 }
1980
1981 return r;
1982}
1983
1984static int
1985numbered_ref_check(Node* node)
1986{
1987 int r = 0;
1988
1989 switch (NTYPE(node)) {
1990 case NT_LIST:
1991 case NT_ALT:
1992 do {
1993 r = numbered_ref_check(NCAR(node));
1994 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1995 break;
1996 case NT_QTFR:
1997 r = numbered_ref_check(NQTFR(node)->target);
1998 break;
1999 case NT_ENCLOSE:
2000 r = numbered_ref_check(NENCLOSE(node)->target);
2001 break;
2002
2003 case NT_BREF:
2004 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2005 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2006 break;
2007
[331]2008 case NT_ANCHOR:
2009 if (NANCHOR(node)->target)
2010 r = numbered_ref_check(NANCHOR(node)->target);
2011 break;
2012
[279]2013 default:
2014 break;
2015 }
2016
2017 return r;
2018}
2019
2020static int
[331]2021disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
[279]2022{
2023 int r, i, pos, counter;
2024 BitStatusType loc;
[331]2025 GroupNumRemap* map;
[279]2026
[331]2027 map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
[279]2028 CHECK_NULL_RETURN_MEMERR(map);
2029 for (i = 1; i <= env->num_mem; i++) {
2030 map[i].new_val = 0;
2031 }
2032 counter = 0;
2033 r = noname_disable_map(root, map, &counter);
2034 if (r != 0) return r;
2035
2036 r = renumber_by_map(*root, map);
2037 if (r != 0) return r;
2038
2039 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2040 if (map[i].new_val > 0) {
2041 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2042 pos++;
2043 }
2044 }
2045
2046 loc = env->capture_history;
2047 BIT_STATUS_CLEAR(env->capture_history);
2048 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2049 if (BIT_STATUS_AT(loc, i)) {
2050 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2051 }
2052 }
2053
2054 env->num_mem = env->num_named;
2055 reg->num_mem = env->num_named;
2056
2057 return onig_renumber_name_table(reg, map);
2058}
2059#endif /* USE_NAMED_GROUP */
2060
2061#ifdef USE_SUBEXP_CALL
2062static int
2063unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2064{
2065 int i, offset;
2066 EncloseNode* en;
2067 AbsAddrType addr;
2068
2069 for (i = 0; i < uslist->num; i++) {
2070 en = NENCLOSE(uslist->us[i].target);
2071 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2072 addr = en->call_addr;
2073 offset = uslist->us[i].offset;
2074
2075 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2076 }
2077 return 0;
2078}
2079#endif
2080
2081#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2082static int
2083quantifiers_memory_node_info(Node* node)
2084{
2085 int r = 0;
2086
2087 switch (NTYPE(node)) {
2088 case NT_LIST:
2089 case NT_ALT:
2090 {
2091 int v;
2092 do {
2093 v = quantifiers_memory_node_info(NCAR(node));
2094 if (v > r) r = v;
2095 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2096 }
2097 break;
2098
[331]2099# ifdef USE_SUBEXP_CALL
[279]2100 case NT_CALL:
2101 if (IS_CALL_RECURSION(NCALL(node))) {
2102 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2103 }
2104 else
2105 r = quantifiers_memory_node_info(NCALL(node)->target);
2106 break;
[331]2107# endif
[279]2108
2109 case NT_QTFR:
2110 {
2111 QtfrNode* qn = NQTFR(node);
2112 if (qn->upper != 0) {
2113 r = quantifiers_memory_node_info(qn->target);
2114 }
2115 }
2116 break;
2117
2118 case NT_ENCLOSE:
2119 {
2120 EncloseNode* en = NENCLOSE(node);
2121 switch (en->type) {
2122 case ENCLOSE_MEMORY:
2123 return NQ_TARGET_IS_EMPTY_MEM;
2124 break;
2125
2126 case ENCLOSE_OPTION:
2127 case ENCLOSE_STOP_BACKTRACK:
2128 case ENCLOSE_CONDITION:
[331]2129 case ENCLOSE_ABSENT:
[279]2130 r = quantifiers_memory_node_info(en->target);
2131 break;
2132 default:
2133 break;
2134 }
2135 }
2136 break;
2137
2138 case NT_BREF:
2139 case NT_STR:
2140 case NT_CTYPE:
2141 case NT_CCLASS:
2142 case NT_CANY:
2143 case NT_ANCHOR:
2144 default:
2145 break;
2146 }
2147
2148 return r;
2149}
2150#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2151
2152static int
2153get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2154{
2155 OnigDistance tmin;
2156 int r = 0;
2157
2158 *min = 0;
2159 switch (NTYPE(node)) {
2160 case NT_BREF:
2161 {
2162 int i;
2163 int* backs;
2164 Node** nodes = SCANENV_MEM_NODES(env);
2165 BRefNode* br = NBREF(node);
2166 if (br->state & NST_RECURSION) break;
2167
2168 backs = BACKREFS_P(br);
2169 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2170 r = get_min_match_length(nodes[backs[0]], min, env);
2171 if (r != 0) break;
2172 for (i = 1; i < br->back_num; i++) {
2173 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2174 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2175 if (r != 0) break;
2176 if (*min > tmin) *min = tmin;
2177 }
2178 }
2179 break;
2180
2181#ifdef USE_SUBEXP_CALL
2182 case NT_CALL:
2183 if (IS_CALL_RECURSION(NCALL(node))) {
2184 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2185 if (IS_ENCLOSE_MIN_FIXED(en))
2186 *min = en->min_len;
2187 }
2188 else
2189 r = get_min_match_length(NCALL(node)->target, min, env);
2190 break;
2191#endif
2192
2193 case NT_LIST:
2194 do {
2195 r = get_min_match_length(NCAR(node), &tmin, env);
2196 if (r == 0) *min += tmin;
2197 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2198 break;
2199
2200 case NT_ALT:
2201 {
2202 Node *x, *y;
2203 y = node;
2204 do {
2205 x = NCAR(y);
2206 r = get_min_match_length(x, &tmin, env);
2207 if (r != 0) break;
2208 if (y == node) *min = tmin;
2209 else if (*min > tmin) *min = tmin;
2210 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2211 }
2212 break;
2213
2214 case NT_STR:
2215 {
2216 StrNode* sn = NSTR(node);
2217 *min = sn->end - sn->s;
2218 }
2219 break;
2220
2221 case NT_CTYPE:
2222 *min = 1;
2223 break;
2224
2225 case NT_CCLASS:
2226 case NT_CANY:
2227 *min = 1;
2228 break;
2229
2230 case NT_QTFR:
2231 {
2232 QtfrNode* qn = NQTFR(node);
2233
2234 if (qn->lower > 0) {
2235 r = get_min_match_length(qn->target, min, env);
2236 if (r == 0)
2237 *min = distance_multiply(*min, qn->lower);
2238 }
2239 }
2240 break;
2241
2242 case NT_ENCLOSE:
2243 {
2244 EncloseNode* en = NENCLOSE(node);
2245 switch (en->type) {
2246 case ENCLOSE_MEMORY:
[331]2247 if (IS_ENCLOSE_MIN_FIXED(en))
2248 *min = en->min_len;
2249 else {
2250 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2251 *min = 0; /* recursive */
2252 else {
2253 SET_ENCLOSE_STATUS(node, NST_MARK1);
2254 r = get_min_match_length(en->target, min, env);
2255 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2256 if (r == 0) {
2257 en->min_len = *min;
2258 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2259 }
[279]2260 }
[331]2261 }
2262 break;
2263
[279]2264 case ENCLOSE_OPTION:
2265 case ENCLOSE_STOP_BACKTRACK:
2266 case ENCLOSE_CONDITION:
2267 r = get_min_match_length(en->target, min, env);
2268 break;
[331]2269
2270 case ENCLOSE_ABSENT:
2271 break;
[279]2272 }
2273 }
2274 break;
2275
2276 case NT_ANCHOR:
2277 default:
2278 break;
2279 }
2280
2281 return r;
2282}
2283
2284static int
2285get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2286{
2287 OnigDistance tmax;
2288 int r = 0;
2289
2290 *max = 0;
2291 switch (NTYPE(node)) {
2292 case NT_LIST:
2293 do {
2294 r = get_max_match_length(NCAR(node), &tmax, env);
2295 if (r == 0)
2296 *max = distance_add(*max, tmax);
2297 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2298 break;
2299
2300 case NT_ALT:
2301 do {
2302 r = get_max_match_length(NCAR(node), &tmax, env);
2303 if (r == 0 && *max < tmax) *max = tmax;
2304 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2305 break;
2306
2307 case NT_STR:
2308 {
2309 StrNode* sn = NSTR(node);
2310 *max = sn->end - sn->s;
2311 }
2312 break;
2313
2314 case NT_CTYPE:
2315 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2316 break;
2317
2318 case NT_CCLASS:
2319 case NT_CANY:
2320 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2321 break;
2322
2323 case NT_BREF:
2324 {
2325 int i;
2326 int* backs;
2327 Node** nodes = SCANENV_MEM_NODES(env);
2328 BRefNode* br = NBREF(node);
2329 if (br->state & NST_RECURSION) {
2330 *max = ONIG_INFINITE_DISTANCE;
2331 break;
2332 }
2333 backs = BACKREFS_P(br);
2334 for (i = 0; i < br->back_num; i++) {
2335 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2336 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2337 if (r != 0) break;
2338 if (*max < tmax) *max = tmax;
2339 }
2340 }
2341 break;
2342
2343#ifdef USE_SUBEXP_CALL
2344 case NT_CALL:
2345 if (! IS_CALL_RECURSION(NCALL(node)))
2346 r = get_max_match_length(NCALL(node)->target, max, env);
2347 else
2348 *max = ONIG_INFINITE_DISTANCE;
2349 break;
2350#endif
2351
2352 case NT_QTFR:
2353 {
2354 QtfrNode* qn = NQTFR(node);
2355
2356 if (qn->upper != 0) {
2357 r = get_max_match_length(qn->target, max, env);
2358 if (r == 0 && *max != 0) {
2359 if (! IS_REPEAT_INFINITE(qn->upper))
2360 *max = distance_multiply(*max, qn->upper);
2361 else
2362 *max = ONIG_INFINITE_DISTANCE;
2363 }
2364 }
2365 }
2366 break;
2367
2368 case NT_ENCLOSE:
2369 {
2370 EncloseNode* en = NENCLOSE(node);
2371 switch (en->type) {
2372 case ENCLOSE_MEMORY:
2373 if (IS_ENCLOSE_MAX_FIXED(en))
2374 *max = en->max_len;
2375 else {
[331]2376 if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2377 *max = ONIG_INFINITE_DISTANCE;
2378 else {
2379 SET_ENCLOSE_STATUS(node, NST_MARK1);
2380 r = get_max_match_length(en->target, max, env);
2381 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2382 if (r == 0) {
2383 en->max_len = *max;
2384 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2385 }
[279]2386 }
2387 }
2388 break;
[331]2389
[279]2390 case ENCLOSE_OPTION:
2391 case ENCLOSE_STOP_BACKTRACK:
2392 case ENCLOSE_CONDITION:
2393 r = get_max_match_length(en->target, max, env);
2394 break;
[331]2395
2396 case ENCLOSE_ABSENT:
2397 break;
[279]2398 }
2399 }
2400 break;
2401
2402 case NT_ANCHOR:
2403 default:
2404 break;
2405 }
2406
2407 return r;
2408}
2409
2410#define GET_CHAR_LEN_VARLEN -1
2411#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2412
2413/* fixed size pattern node only */
2414static int
2415get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2416{
2417 int tlen;
2418 int r = 0;
2419
2420 level++;
2421 *len = 0;
2422 switch (NTYPE(node)) {
2423 case NT_LIST:
2424 do {
2425 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2426 if (r == 0)
2427 *len = (int )distance_add(*len, tlen);
2428 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2429 break;
2430
2431 case NT_ALT:
2432 {
2433 int tlen2;
2434 int varlen = 0;
2435
2436 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2437 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2438 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2439 if (r == 0) {
2440 if (tlen != tlen2)
2441 varlen = 1;
2442 }
2443 }
2444 if (r == 0) {
2445 if (varlen != 0) {
2446 if (level == 1)
2447 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2448 else
2449 r = GET_CHAR_LEN_VARLEN;
2450 }
2451 else
2452 *len = tlen;
2453 }
2454 }
2455 break;
2456
2457 case NT_STR:
2458 {
2459 StrNode* sn = NSTR(node);
2460 UChar *s = sn->s;
2461 while (s < sn->end) {
[331]2462 s += enclen(reg->enc, s, sn->end);
[279]2463 (*len)++;
2464 }
2465 }
2466 break;
2467
2468 case NT_QTFR:
2469 {
2470 QtfrNode* qn = NQTFR(node);
2471 if (qn->lower == qn->upper) {
2472 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2473 if (r == 0)
2474 *len = (int )distance_multiply(tlen, qn->lower);
2475 }
2476 else
2477 r = GET_CHAR_LEN_VARLEN;
2478 }
2479 break;
2480
2481#ifdef USE_SUBEXP_CALL
2482 case NT_CALL:
2483 if (! IS_CALL_RECURSION(NCALL(node)))
2484 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2485 else
2486 r = GET_CHAR_LEN_VARLEN;
2487 break;
2488#endif
2489
2490 case NT_CTYPE:
2491 *len = 1;
2492 break;
2493
2494 case NT_CCLASS:
2495 case NT_CANY:
2496 *len = 1;
2497 break;
2498
2499 case NT_ENCLOSE:
2500 {
2501 EncloseNode* en = NENCLOSE(node);
2502 switch (en->type) {
2503 case ENCLOSE_MEMORY:
2504#ifdef USE_SUBEXP_CALL
2505 if (IS_ENCLOSE_CLEN_FIXED(en))
2506 *len = en->char_len;
2507 else {
2508 r = get_char_length_tree1(en->target, reg, len, level);
2509 if (r == 0) {
2510 en->char_len = *len;
2511 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2512 }
2513 }
2514 break;
2515#endif
2516 case ENCLOSE_OPTION:
2517 case ENCLOSE_STOP_BACKTRACK:
2518 case ENCLOSE_CONDITION:
2519 r = get_char_length_tree1(en->target, reg, len, level);
2520 break;
[331]2521 case ENCLOSE_ABSENT:
[279]2522 default:
2523 break;
2524 }
2525 }
2526 break;
2527
2528 case NT_ANCHOR:
2529 break;
2530
2531 default:
2532 r = GET_CHAR_LEN_VARLEN;
2533 break;
2534 }
2535
2536 return r;
2537}
2538
2539static int
2540get_char_length_tree(Node* node, regex_t* reg, int* len)
2541{
2542 return get_char_length_tree1(node, reg, len, 0);
2543}
2544
2545/* x is not included y ==> 1 : 0 */
2546static int
2547is_not_included(Node* x, Node* y, regex_t* reg)
2548{
2549 int i;
2550 OnigDistance len;
2551 OnigCodePoint code;
2552 UChar *p;
2553 int ytype;
2554
2555 retry:
2556 ytype = NTYPE(y);
2557 switch (NTYPE(x)) {
2558 case NT_CTYPE:
2559 {
2560 switch (ytype) {
2561 case NT_CTYPE:
2562 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2563 NCTYPE(y)->not != NCTYPE(x)->not &&
2564 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2565 return 1;
2566 else
2567 return 0;
2568 break;
2569
2570 case NT_CCLASS:
2571 swap:
2572 {
2573 Node* tmp;
2574 tmp = x; x = y; y = tmp;
2575 goto retry;
2576 }
2577 break;
2578
2579 case NT_STR:
2580 goto swap;
2581 break;
2582
2583 default:
2584 break;
2585 }
2586 }
2587 break;
2588
2589 case NT_CCLASS:
2590 {
2591 CClassNode* xc = NCCLASS(x);
2592 switch (ytype) {
2593 case NT_CTYPE:
2594 switch (NCTYPE(y)->ctype) {
2595 case ONIGENC_CTYPE_WORD:
2596 if (NCTYPE(y)->not == 0) {
2597 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2598 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2599 if (BITSET_AT(xc->bs, i)) {
2600 if (NCTYPE(y)->ascii_range) {
2601 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2602 }
2603 else {
2604 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2605 }
2606 }
2607 }
2608 return 1;
2609 }
2610 return 0;
2611 }
2612 else {
2613 if (IS_NOT_NULL(xc->mbuf)) return 0;
2614 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2615 int is_word;
2616 if (NCTYPE(y)->ascii_range)
2617 is_word = IS_CODE_SB_WORD(reg->enc, i);
2618 else
2619 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2620 if (! is_word) {
2621 if (!IS_NCCLASS_NOT(xc)) {
2622 if (BITSET_AT(xc->bs, i))
2623 return 0;
2624 }
2625 else {
2626 if (! BITSET_AT(xc->bs, i))
2627 return 0;
2628 }
2629 }
2630 }
2631 return 1;
2632 }
2633 break;
2634
2635 default:
2636 break;
2637 }
2638 break;
2639
2640 case NT_CCLASS:
2641 {
2642 int v;
2643 CClassNode* yc = NCCLASS(y);
2644
2645 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2646 v = BITSET_AT(xc->bs, i);
2647 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
[331]2648 (v == 0 && IS_NCCLASS_NOT(xc))) {
[279]2649 v = BITSET_AT(yc->bs, i);
2650 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
[331]2651 (v == 0 && IS_NCCLASS_NOT(yc)))
[279]2652 return 0;
2653 }
2654 }
2655 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2656 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2657 return 1;
2658 return 0;
2659 }
2660 break;
2661
2662 case NT_STR:
2663 goto swap;
2664 break;
2665
2666 default:
2667 break;
2668 }
2669 }
2670 break;
2671
2672 case NT_STR:
2673 {
2674 StrNode* xs = NSTR(x);
2675 if (NSTRING_LEN(x) == 0)
2676 break;
2677
2678 switch (ytype) {
2679 case NT_CTYPE:
2680 switch (NCTYPE(y)->ctype) {
2681 case ONIGENC_CTYPE_WORD:
2682 if (NCTYPE(y)->ascii_range) {
2683 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2684 return NCTYPE(y)->not;
2685 else
2686 return !(NCTYPE(y)->not);
2687 }
2688 else {
2689 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2690 return NCTYPE(y)->not;
2691 else
2692 return !(NCTYPE(y)->not);
2693 }
2694 break;
2695 default:
2696 break;
2697 }
2698 break;
2699
2700 case NT_CCLASS:
[331]2701 {
2702 CClassNode* cc = NCCLASS(y);
[279]2703
[331]2704 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2705 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2706 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2707 }
2708 break;
[279]2709
2710 case NT_STR:
[331]2711 {
2712 UChar *q;
2713 StrNode* ys = NSTR(y);
2714 len = NSTRING_LEN(x);
2715 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2716 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2717 /* tiny version */
2718 return 0;
[279]2719 }
2720 else {
2721 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2722 if (*p != *q) return 1;
2723 }
2724 }
2725 }
2726 break;
2727
2728 default:
[331]2729 break;
[279]2730 }
2731 }
2732 break;
2733
2734 default:
2735 break;
2736 }
2737
2738 return 0;
2739}
2740
2741static Node*
2742get_head_value_node(Node* node, int exact, regex_t* reg)
2743{
2744 Node* n = NULL_NODE;
2745
2746 switch (NTYPE(node)) {
2747 case NT_BREF:
2748 case NT_ALT:
2749 case NT_CANY:
2750#ifdef USE_SUBEXP_CALL
2751 case NT_CALL:
2752#endif
2753 break;
2754
2755 case NT_CTYPE:
2756 case NT_CCLASS:
2757 if (exact == 0) {
2758 n = node;
2759 }
2760 break;
2761
2762 case NT_LIST:
2763 n = get_head_value_node(NCAR(node), exact, reg);
2764 break;
2765
2766 case NT_STR:
2767 {
2768 StrNode* sn = NSTR(node);
2769
2770 if (sn->end <= sn->s)
2771 break;
2772
2773 if (exact != 0 &&
2774 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2775 }
2776 else {
2777 n = node;
2778 }
2779 }
2780 break;
2781
2782 case NT_QTFR:
2783 {
2784 QtfrNode* qn = NQTFR(node);
2785 if (qn->lower > 0) {
[331]2786#ifdef USE_OP_PUSH_OR_JUMP_EXACT
[279]2787 if (IS_NOT_NULL(qn->head_exact))
2788 n = qn->head_exact;
2789 else
[331]2790#endif
[279]2791 n = get_head_value_node(qn->target, exact, reg);
2792 }
2793 }
2794 break;
2795
2796 case NT_ENCLOSE:
2797 {
2798 EncloseNode* en = NENCLOSE(node);
2799 switch (en->type) {
2800 case ENCLOSE_OPTION:
2801 {
2802 OnigOptionType options = reg->options;
2803
2804 reg->options = NENCLOSE(node)->option;
2805 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2806 reg->options = options;
2807 }
2808 break;
2809
2810 case ENCLOSE_MEMORY:
2811 case ENCLOSE_STOP_BACKTRACK:
2812 case ENCLOSE_CONDITION:
2813 n = get_head_value_node(en->target, exact, reg);
2814 break;
[331]2815
2816 case ENCLOSE_ABSENT:
2817 break;
[279]2818 }
2819 }
2820 break;
2821
2822 case NT_ANCHOR:
2823 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2824 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2825 break;
2826
2827 default:
2828 break;
2829 }
2830
2831 return n;
2832}
2833
2834static int
2835check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2836{
2837 int type, r = 0;
2838
2839 type = NTYPE(node);
2840 if ((NTYPE2BIT(type) & type_mask) == 0)
2841 return 1;
2842
2843 switch (type) {
2844 case NT_LIST:
2845 case NT_ALT:
2846 do {
2847 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2848 anchor_mask);
2849 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2850 break;
2851
2852 case NT_QTFR:
2853 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2854 anchor_mask);
2855 break;
2856
2857 case NT_ENCLOSE:
2858 {
2859 EncloseNode* en = NENCLOSE(node);
2860 if ((en->type & enclose_mask) == 0)
2861 return 1;
2862
2863 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2864 }
2865 break;
2866
2867 case NT_ANCHOR:
2868 type = NANCHOR(node)->type;
2869 if ((type & anchor_mask) == 0)
2870 return 1;
2871
2872 if (NANCHOR(node)->target)
2873 r = check_type_tree(NANCHOR(node)->target,
2874 type_mask, enclose_mask, anchor_mask);
2875 break;
2876
2877 default:
2878 break;
2879 }
2880 return r;
2881}
2882
2883#ifdef USE_SUBEXP_CALL
2884
[331]2885# define RECURSION_EXIST 1
2886# define RECURSION_INFINITE 2
[279]2887
2888static int
2889subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2890{
2891 int type;
2892 int r = 0;
2893
2894 type = NTYPE(node);
2895 switch (type) {
2896 case NT_LIST:
2897 {
2898 Node *x;
2899 OnigDistance min;
2900 int ret;
2901
2902 x = node;
2903 do {
2904 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2905 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2906 r |= ret;
2907 if (head) {
2908 ret = get_min_match_length(NCAR(x), &min, env);
2909 if (ret != 0) return ret;
2910 if (min != 0) head = 0;
2911 }
2912 } while (IS_NOT_NULL(x = NCDR(x)));
2913 }
2914 break;
2915
2916 case NT_ALT:
2917 {
2918 int ret;
2919 r = RECURSION_EXIST;
2920 do {
2921 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2922 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2923 r &= ret;
2924 } while (IS_NOT_NULL(node = NCDR(node)));
2925 }
2926 break;
2927
2928 case NT_QTFR:
2929 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2930 if (r == RECURSION_EXIST) {
2931 if (NQTFR(node)->lower == 0) r = 0;
2932 }
2933 break;
2934
2935 case NT_ANCHOR:
2936 {
2937 AnchorNode* an = NANCHOR(node);
2938 switch (an->type) {
2939 case ANCHOR_PREC_READ:
2940 case ANCHOR_PREC_READ_NOT:
2941 case ANCHOR_LOOK_BEHIND:
2942 case ANCHOR_LOOK_BEHIND_NOT:
2943 r = subexp_inf_recursive_check(an->target, env, head);
2944 break;
2945 }
2946 }
2947 break;
2948
2949 case NT_CALL:
2950 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2951 break;
2952
2953 case NT_ENCLOSE:
2954 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2955 return 0;
2956 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2957 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2958 else {
2959 SET_ENCLOSE_STATUS(node, NST_MARK2);
2960 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2961 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2962 }
2963 break;
2964
2965 default:
2966 break;
2967 }
2968
2969 return r;
2970}
2971
2972static int
2973subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2974{
2975 int type;
2976 int r = 0;
2977
2978 type = NTYPE(node);
2979 switch (type) {
2980 case NT_LIST:
2981 case NT_ALT:
2982 do {
2983 r = subexp_inf_recursive_check_trav(NCAR(node), env);
2984 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2985 break;
2986
2987 case NT_QTFR:
2988 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2989 break;
2990
2991 case NT_ANCHOR:
2992 {
2993 AnchorNode* an = NANCHOR(node);
2994 switch (an->type) {
2995 case ANCHOR_PREC_READ:
2996 case ANCHOR_PREC_READ_NOT:
2997 case ANCHOR_LOOK_BEHIND:
2998 case ANCHOR_LOOK_BEHIND_NOT:
2999 r = subexp_inf_recursive_check_trav(an->target, env);
3000 break;
3001 }
3002 }
3003 break;
3004
3005 case NT_ENCLOSE:
3006 {
3007 EncloseNode* en = NENCLOSE(node);
3008
3009 if (IS_ENCLOSE_RECURSION(en)) {
3010 SET_ENCLOSE_STATUS(node, NST_MARK1);
3011 r = subexp_inf_recursive_check(en->target, env, 1);
3012 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
3013 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3014 }
3015 r = subexp_inf_recursive_check_trav(en->target, env);
3016 }
3017
3018 break;
3019
3020 default:
3021 break;
3022 }
3023
3024 return r;
3025}
3026
3027static int
3028subexp_recursive_check(Node* node)
3029{
3030 int r = 0;
3031
3032 switch (NTYPE(node)) {
3033 case NT_LIST:
3034 case NT_ALT:
3035 do {
3036 r |= subexp_recursive_check(NCAR(node));
3037 } while (IS_NOT_NULL(node = NCDR(node)));
3038 break;
3039
3040 case NT_QTFR:
3041 r = subexp_recursive_check(NQTFR(node)->target);
3042 break;
3043
3044 case NT_ANCHOR:
3045 {
3046 AnchorNode* an = NANCHOR(node);
3047 switch (an->type) {
3048 case ANCHOR_PREC_READ:
3049 case ANCHOR_PREC_READ_NOT:
3050 case ANCHOR_LOOK_BEHIND:
3051 case ANCHOR_LOOK_BEHIND_NOT:
3052 r = subexp_recursive_check(an->target);
3053 break;
3054 }
3055 }
3056 break;
3057
3058 case NT_CALL:
3059 r = subexp_recursive_check(NCALL(node)->target);
3060 if (r != 0) SET_CALL_RECURSION(node);
3061 break;
3062
3063 case NT_ENCLOSE:
3064 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3065 return 0;
3066 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3067 return 1; /* recursion */
3068 else {
3069 SET_ENCLOSE_STATUS(node, NST_MARK2);
3070 r = subexp_recursive_check(NENCLOSE(node)->target);
3071 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3072 }
3073 break;
3074
3075 default:
3076 break;
3077 }
3078
3079 return r;
3080}
3081
3082
3083static int
3084subexp_recursive_check_trav(Node* node, ScanEnv* env)
3085{
[331]3086# define FOUND_CALLED_NODE 1
[279]3087
3088 int type;
3089 int r = 0;
3090
3091 type = NTYPE(node);
3092 switch (type) {
3093 case NT_LIST:
3094 case NT_ALT:
3095 {
3096 int ret;
3097 do {
3098 ret = subexp_recursive_check_trav(NCAR(node), env);
3099 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3100 else if (ret < 0) return ret;
3101 } while (IS_NOT_NULL(node = NCDR(node)));
3102 }
3103 break;
3104
3105 case NT_QTFR:
3106 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3107 if (NQTFR(node)->upper == 0) {
3108 if (r == FOUND_CALLED_NODE)
3109 NQTFR(node)->is_refered = 1;
3110 }
3111 break;
3112
3113 case NT_ANCHOR:
3114 {
3115 AnchorNode* an = NANCHOR(node);
3116 switch (an->type) {
3117 case ANCHOR_PREC_READ:
3118 case ANCHOR_PREC_READ_NOT:
3119 case ANCHOR_LOOK_BEHIND:
3120 case ANCHOR_LOOK_BEHIND_NOT:
3121 r = subexp_recursive_check_trav(an->target, env);
3122 break;
3123 }
3124 }
3125 break;
3126
3127 case NT_ENCLOSE:
3128 {
3129 EncloseNode* en = NENCLOSE(node);
3130
3131 if (! IS_ENCLOSE_RECURSION(en)) {
3132 if (IS_ENCLOSE_CALLED(en)) {
3133 SET_ENCLOSE_STATUS(node, NST_MARK1);
3134 r = subexp_recursive_check(en->target);
3135 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3136 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3137 }
3138 }
3139 r = subexp_recursive_check_trav(en->target, env);
3140 if (IS_ENCLOSE_CALLED(en))
3141 r |= FOUND_CALLED_NODE;
3142 }
3143 break;
3144
3145 default:
3146 break;
3147 }
3148
3149 return r;
3150}
3151
3152static int
3153setup_subexp_call(Node* node, ScanEnv* env)
3154{
3155 int type;
3156 int r = 0;
3157
3158 type = NTYPE(node);
3159 switch (type) {
3160 case NT_LIST:
3161 do {
3162 r = setup_subexp_call(NCAR(node), env);
3163 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3164 break;
3165
3166 case NT_ALT:
3167 do {
3168 r = setup_subexp_call(NCAR(node), env);
3169 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3170 break;
3171
3172 case NT_QTFR:
3173 r = setup_subexp_call(NQTFR(node)->target, env);
3174 break;
3175 case NT_ENCLOSE:
3176 r = setup_subexp_call(NENCLOSE(node)->target, env);
3177 break;
3178
3179 case NT_CALL:
3180 {
3181 CallNode* cn = NCALL(node);
3182 Node** nodes = SCANENV_MEM_NODES(env);
3183
3184 if (cn->group_num != 0) {
3185 int gnum = cn->group_num;
3186
[331]3187# ifdef USE_NAMED_GROUP
[279]3188 if (env->num_named > 0 &&
3189 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3190 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3191 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3192 }
[331]3193# endif
[279]3194 if (gnum > env->num_mem) {
3195 onig_scan_env_set_error_string(env,
3196 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3197 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3198 }
3199
[331]3200# ifdef USE_NAMED_GROUP
[279]3201 set_call_attr:
[331]3202# endif
[279]3203 cn->target = nodes[cn->group_num];
3204 if (IS_NULL(cn->target)) {
3205 onig_scan_env_set_error_string(env,
3206 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3207 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3208 }
3209 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3210 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3211 cn->unset_addr_list = env->unset_addr_list;
3212 }
[331]3213# ifdef USE_NAMED_GROUP
3214# ifdef USE_PERL_SUBEXP_CALL
[279]3215 else if (cn->name == cn->name_end) {
3216 goto set_call_attr;
3217 }
[331]3218# endif
[279]3219 else {
3220 int *refs;
3221
3222 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3223 &refs);
3224 if (n <= 0) {
3225 onig_scan_env_set_error_string(env,
3226 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3227 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3228 }
3229 else if (n > 1 &&
3230 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3231 onig_scan_env_set_error_string(env,
3232 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3233 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3234 }
3235 else {
3236 cn->group_num = refs[0];
3237 goto set_call_attr;
3238 }
3239 }
[331]3240# endif
[279]3241 }
3242 break;
3243
3244 case NT_ANCHOR:
3245 {
3246 AnchorNode* an = NANCHOR(node);
3247
3248 switch (an->type) {
3249 case ANCHOR_PREC_READ:
3250 case ANCHOR_PREC_READ_NOT:
3251 case ANCHOR_LOOK_BEHIND:
3252 case ANCHOR_LOOK_BEHIND_NOT:
3253 r = setup_subexp_call(an->target, env);
3254 break;
3255 }
3256 }
3257 break;
3258
3259 default:
3260 break;
3261 }
3262
3263 return r;
3264}
3265#endif
3266
3267/* divide different length alternatives in look-behind.
3268 (?<=A|B) ==> (?<=A)|(?<=B)
3269 (?<!A|B) ==> (?<!A)(?<!B)
3270*/
3271static int
3272divide_look_behind_alternatives(Node* node)
3273{
3274 Node *head, *np, *insert_node;
3275 AnchorNode* an = NANCHOR(node);
3276 int anc_type = an->type;
3277
3278 head = an->target;
3279 np = NCAR(head);
3280 swap_node(node, head);
3281 NCAR(node) = head;
3282 NANCHOR(head)->target = np;
3283
3284 np = node;
3285 while ((np = NCDR(np)) != NULL_NODE) {
3286 insert_node = onig_node_new_anchor(anc_type);
3287 CHECK_NULL_RETURN_MEMERR(insert_node);
3288 NANCHOR(insert_node)->target = NCAR(np);
3289 NCAR(np) = insert_node;
3290 }
3291
3292 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3293 np = node;
3294 do {
3295 SET_NTYPE(np, NT_LIST); /* alt -> list */
3296 } while ((np = NCDR(np)) != NULL_NODE);
3297 }
3298 return 0;
3299}
3300
3301static int
3302setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3303{
3304 int r, len;
3305 AnchorNode* an = NANCHOR(node);
3306
3307 r = get_char_length_tree(an->target, reg, &len);
3308 if (r == 0)
3309 an->char_len = len;
3310 else if (r == GET_CHAR_LEN_VARLEN)
3311 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3312 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3313 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3314 r = divide_look_behind_alternatives(node);
3315 else
3316 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3317 }
3318
3319 return r;
3320}
3321
3322static int
[331]3323next_setup(Node* node, Node* next_node, regex_t* reg)
[279]3324{
3325 int type;
3326
3327 retry:
3328 type = NTYPE(node);
3329 if (type == NT_QTFR) {
3330 QtfrNode* qn = NQTFR(node);
3331 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3332#ifdef USE_QTFR_PEEK_NEXT
3333 Node* n = get_head_value_node(next_node, 1, reg);
3334 /* '\0': for UTF-16BE etc... */
3335 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3336 qn->next_head_exact = n;
3337 }
3338#endif
3339 /* automatic possessification a*b ==> (?>a*)b */
3340 if (qn->lower <= 1) {
3341 int ttype = NTYPE(qn->target);
3342 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3343 Node *x, *y;
3344 x = get_head_value_node(qn->target, 0, reg);
3345 if (IS_NOT_NULL(x)) {
3346 y = get_head_value_node(next_node, 0, reg);
3347 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3348 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3349 CHECK_NULL_RETURN_MEMERR(en);
3350 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3351 swap_node(node, en);
3352 NENCLOSE(node)->target = en;
3353 }
3354 }
3355 }
3356 }
3357 }
3358 }
3359 else if (type == NT_ENCLOSE) {
3360 EncloseNode* en = NENCLOSE(node);
3361 if (en->type == ENCLOSE_MEMORY) {
3362 node = en->target;
3363 goto retry;
3364 }
3365 }
3366 return 0;
3367}
3368
3369
3370static int
3371update_string_node_case_fold(regex_t* reg, Node *node)
3372{
3373 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3374 UChar *sbuf, *ebuf, *sp;
3375 int r, i, len;
3376 OnigDistance sbuf_size;
3377 StrNode* sn = NSTR(node);
3378
3379 end = sn->end;
3380 sbuf_size = (end - sn->s) * 2;
3381 sbuf = (UChar* )xmalloc(sbuf_size);
3382 CHECK_NULL_RETURN_MEMERR(sbuf);
3383 ebuf = sbuf + sbuf_size;
3384
3385 sp = sbuf;
3386 p = sn->s;
3387 while (p < end) {
3388 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3389 for (i = 0; i < len; i++) {
3390 if (sp >= ebuf) {
3391 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3392 if (IS_NULL(p)) {
3393 xfree(sbuf);
3394 return ONIGERR_MEMORY;
3395 }
3396 sbuf = p;
3397 sp = sbuf + sbuf_size;
3398 sbuf_size *= 2;
3399 ebuf = sbuf + sbuf_size;
3400 }
3401
3402 *sp++ = buf[i];
3403 }
3404 }
3405
3406 r = onig_node_str_set(node, sbuf, sp);
3407
3408 xfree(sbuf);
3409 return r;
3410}
3411
3412static int
3413expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3414 regex_t* reg)
3415{
3416 int r;
3417 Node *node;
3418
3419 node = onig_node_new_str(s, end);
3420 if (IS_NULL(node)) return ONIGERR_MEMORY;
3421
3422 r = update_string_node_case_fold(reg, node);
3423 if (r != 0) {
3424 onig_node_free(node);
3425 return r;
3426 }
3427
3428 NSTRING_SET_AMBIG(node);
3429 NSTRING_SET_DONT_GET_OPT_INFO(node);
3430 *rnode = node;
3431 return 0;
3432}
3433
3434static int
3435is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3436 int slen)
3437{
3438 int i;
3439
3440 for (i = 0; i < item_num; i++) {
3441 if (items[i].byte_len != slen) {
3442 return 1;
3443 }
3444 if (items[i].code_len != 1) {
3445 return 1;
3446 }
3447 }
3448 return 0;
3449}
3450
3451static int
3452expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3453 UChar *p, int slen, UChar *end,
3454 regex_t* reg, Node **rnode)
3455{
3456 int r, i, j, len, varlen;
3457 Node *anode, *var_anode, *snode, *xnode, *an;
3458 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3459
3460 *rnode = var_anode = NULL_NODE;
3461
3462 varlen = 0;
3463 for (i = 0; i < item_num; i++) {
3464 if (items[i].byte_len != slen) {
3465 varlen = 1;
3466 break;
3467 }
3468 }
3469
3470 if (varlen != 0) {
3471 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3472 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3473
3474 xnode = onig_node_new_list(NULL, NULL);
3475 if (IS_NULL(xnode)) goto mem_err;
3476 NCAR(var_anode) = xnode;
3477
3478 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3479 if (IS_NULL(anode)) goto mem_err;
3480 NCAR(xnode) = anode;
3481 }
3482 else {
3483 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3484 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3485 }
3486
3487 snode = onig_node_new_str(p, p + slen);
3488 if (IS_NULL(snode)) goto mem_err;
3489
3490 NCAR(anode) = snode;
3491
3492 for (i = 0; i < item_num; i++) {
3493 snode = onig_node_new_str(NULL, NULL);
3494 if (IS_NULL(snode)) goto mem_err;
3495
3496 for (j = 0; j < items[i].code_len; j++) {
3497 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3498 if (len < 0) {
3499 r = len;
3500 goto mem_err2;
3501 }
3502
3503 r = onig_node_str_cat(snode, buf, buf + len);
3504 if (r != 0) goto mem_err2;
3505 }
3506
3507 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3508 if (IS_NULL(an)) {
3509 goto mem_err2;
3510 }
3511
3512 if (items[i].byte_len != slen) {
3513 Node *rem;
3514 UChar *q = p + items[i].byte_len;
3515
3516 if (q < end) {
[331]3517 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3518 if (r != 0) {
3519 onig_node_free(an);
3520 goto mem_err2;
3521 }
[279]3522
[331]3523 xnode = onig_node_list_add(NULL_NODE, snode);
3524 if (IS_NULL(xnode)) {
3525 onig_node_free(an);
3526 onig_node_free(rem);
3527 goto mem_err2;
3528 }
3529 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3530 onig_node_free(an);
3531 onig_node_free(xnode);
3532 onig_node_free(rem);
3533 goto mem_err;
3534 }
[279]3535
[331]3536 NCAR(an) = xnode;
[279]3537 }
3538 else {
[331]3539 NCAR(an) = snode;
[279]3540 }
3541
3542 NCDR(var_anode) = an;
3543 var_anode = an;
3544 }
3545 else {
3546 NCAR(an) = snode;
3547 NCDR(anode) = an;
3548 anode = an;
3549 }
3550 }
3551
3552 return varlen;
3553
3554 mem_err2:
3555 onig_node_free(snode);
3556
3557 mem_err:
3558 onig_node_free(*rnode);
3559
3560 return ONIGERR_MEMORY;
3561}
3562
3563static int
3564expand_case_fold_string(Node* node, regex_t* reg)
3565{
3566#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3567
3568 int r, n, len, alt_num;
3569 int varlen = 0;
3570 UChar *start, *end, *p;
3571 Node *top_root, *root, *snode, *prev_node;
3572 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3573 StrNode* sn = NSTR(node);
3574
3575 if (NSTRING_IS_AMBIG(node)) return 0;
3576
3577 start = sn->s;
3578 end = sn->end;
3579 if (start >= end) return 0;
3580
3581 r = 0;
3582 top_root = root = prev_node = snode = NULL_NODE;
3583 alt_num = 1;
3584 p = start;
3585 while (p < end) {
3586 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3587 p, end, items);
3588 if (n < 0) {
3589 r = n;
3590 goto err;
3591 }
3592
[331]3593 len = enclen(reg->enc, p, end);
[279]3594
3595 varlen = is_case_fold_variable_len(n, items, len);
3596 if (n == 0 || varlen == 0) {
3597 if (IS_NULL(snode)) {
3598 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3599 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3600 if (IS_NULL(root)) {
3601 onig_node_free(prev_node);
3602 goto mem_err;
3603 }
3604 }
3605
3606 prev_node = snode = onig_node_new_str(NULL, NULL);
3607 if (IS_NULL(snode)) goto mem_err;
3608 if (IS_NOT_NULL(root)) {
3609 if (IS_NULL(onig_node_list_add(root, snode))) {
3610 onig_node_free(snode);
3611 goto mem_err;
3612 }
3613 }
3614 }
3615
3616 r = onig_node_str_cat(snode, p, p + len);
3617 if (r != 0) goto err;
3618 }
3619 else {
3620 alt_num *= (n + 1);
3621 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3622
3623 if (IS_NOT_NULL(snode)) {
3624 r = update_string_node_case_fold(reg, snode);
3625 if (r == 0) {
3626 NSTRING_SET_AMBIG(snode);
3627 }
3628 }
3629 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3630 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3631 if (IS_NULL(root)) {
3632 onig_node_free(prev_node);
3633 goto mem_err;
3634 }
3635 }
3636
3637 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3638 if (r < 0) goto mem_err;
3639 if (r == 1) {
3640 if (IS_NULL(root)) {
3641 top_root = prev_node;
3642 }
3643 else {
3644 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3645 onig_node_free(prev_node);
3646 goto mem_err;
3647 }
3648 }
3649
3650 root = NCAR(prev_node);
3651 }
3652 else { /* r == 0 */
3653 if (IS_NOT_NULL(root)) {
3654 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3655 onig_node_free(prev_node);
3656 goto mem_err;
3657 }
3658 }
3659 }
3660
3661 snode = NULL_NODE;
3662 }
3663
3664 p += len;
3665 }
3666 if (IS_NOT_NULL(snode)) {
3667 r = update_string_node_case_fold(reg, snode);
3668 if (r == 0) {
3669 NSTRING_SET_AMBIG(snode);
3670 }
3671 }
3672
3673 if (p < end) {
3674 Node *srem;
3675
3676 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3677 if (r != 0) goto mem_err;
3678
3679 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3680 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3681 if (IS_NULL(root)) {
3682 onig_node_free(srem);
3683 onig_node_free(prev_node);
3684 goto mem_err;
3685 }
3686 }
3687
3688 if (IS_NULL(root)) {
3689 prev_node = srem;
3690 }
3691 else {
3692 if (IS_NULL(onig_node_list_add(root, srem))) {
3693 onig_node_free(srem);
3694 goto mem_err;
3695 }
3696 }
3697 }
3698
3699 /* ending */
3700 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3701 swap_node(node, top_root);
3702 onig_node_free(top_root);
3703 return 0;
3704
3705 mem_err:
3706 r = ONIGERR_MEMORY;
3707
3708 err:
3709 onig_node_free(top_root);
3710 return r;
3711}
3712
3713
3714#ifdef USE_COMBINATION_EXPLOSION_CHECK
3715
[331]3716# define CEC_THRES_NUM_BIG_REPEAT 512
3717# define CEC_INFINITE_NUM 0x7fffffff
[279]3718
[331]3719# define CEC_IN_INFINITE_REPEAT (1<<0)
3720# define CEC_IN_FINITE_REPEAT (1<<1)
3721# define CEC_CONT_BIG_REPEAT (1<<2)
[279]3722
3723static int
3724setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3725{
3726 int type;
3727 int r = state;
3728
3729 type = NTYPE(node);
3730 switch (type) {
3731 case NT_LIST:
3732 {
3733 Node* prev = NULL_NODE;
3734 do {
3735 r = setup_comb_exp_check(NCAR(node), r, env);
3736 prev = NCAR(node);
3737 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3738 }
3739 break;
3740
3741 case NT_ALT:
3742 {
3743 int ret;
3744 do {
3745 ret = setup_comb_exp_check(NCAR(node), state, env);
3746 r |= ret;
3747 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3748 }
3749 break;
3750
3751 case NT_QTFR:
3752 {
3753 int child_state = state;
3754 int add_state = 0;
3755 QtfrNode* qn = NQTFR(node);
3756 Node* target = qn->target;
3757 int var_num;
3758
3759 if (! IS_REPEAT_INFINITE(qn->upper)) {
3760 if (qn->upper > 1) {
3761 /* {0,1}, {1,1} are allowed */
3762 child_state |= CEC_IN_FINITE_REPEAT;
3763
3764 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3765 if (env->backrefed_mem == 0) {
3766 if (NTYPE(qn->target) == NT_ENCLOSE) {
3767 EncloseNode* en = NENCLOSE(qn->target);
3768 if (en->type == ENCLOSE_MEMORY) {
3769 if (NTYPE(en->target) == NT_QTFR) {
3770 QtfrNode* q = NQTFR(en->target);
3771 if (IS_REPEAT_INFINITE(q->upper)
3772 && q->greedy == qn->greedy) {
3773 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3774 if (qn->upper == 1)
3775 child_state = state;
3776 }
3777 }
3778 }
3779 }
3780 }
3781 }
3782 }
3783
3784 if (state & CEC_IN_FINITE_REPEAT) {
3785 qn->comb_exp_check_num = -1;
3786 }
3787 else {
3788 if (IS_REPEAT_INFINITE(qn->upper)) {
3789 var_num = CEC_INFINITE_NUM;
3790 child_state |= CEC_IN_INFINITE_REPEAT;
3791 }
3792 else {
3793 var_num = qn->upper - qn->lower;
3794 }
3795
3796 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3797 add_state |= CEC_CONT_BIG_REPEAT;
3798
3799 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3800 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3801 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3802 if (qn->comb_exp_check_num == 0) {
3803 env->num_comb_exp_check++;
3804 qn->comb_exp_check_num = env->num_comb_exp_check;
3805 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3806 env->comb_exp_max_regnum = env->curr_max_regnum;
3807 }
3808 }
3809 }
3810
3811 r = setup_comb_exp_check(target, child_state, env);
3812 r |= add_state;
3813 }
3814 break;
3815
3816 case NT_ENCLOSE:
3817 {
3818 EncloseNode* en = NENCLOSE(node);
3819
3820 switch (en->type) {
3821 case ENCLOSE_MEMORY:
3822 {
3823 if (env->curr_max_regnum < en->regnum)
3824 env->curr_max_regnum = en->regnum;
3825
3826 r = setup_comb_exp_check(en->target, state, env);
3827 }
3828 break;
3829
3830 default:
3831 r = setup_comb_exp_check(en->target, state, env);
3832 break;
3833 }
3834 }
3835 break;
3836
[331]3837# ifdef USE_SUBEXP_CALL
[279]3838 case NT_CALL:
3839 if (IS_CALL_RECURSION(NCALL(node)))
3840 env->has_recursion = 1;
3841 else
3842 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3843 break;
[331]3844# endif
[279]3845
3846 default:
3847 break;
3848 }
3849
3850 return r;
3851}
3852#endif
3853
3854#define IN_ALT (1<<0)
3855#define IN_NOT (1<<1)
3856#define IN_REPEAT (1<<2)
3857#define IN_VAR_REPEAT (1<<3)
[331]3858#define IN_CALL (1<<4)
3859#define IN_RECCALL (1<<5)
[279]3860
3861/* setup_tree does the following work.
3862 1. check empty loop. (set qn->target_empty_info)
3863 2. expand ignore-case in char class.
3864 3. set memory status bit flags. (reg->mem_stats)
3865 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3866 5. find invalid patterns in look-behind.
3867 6. expand repeated string.
3868 */
3869static int
3870setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3871{
3872 int type;
3873 int r = 0;
3874
3875restart:
3876 type = NTYPE(node);
3877 switch (type) {
3878 case NT_LIST:
3879 {
3880 Node* prev = NULL_NODE;
3881 do {
3882 r = setup_tree(NCAR(node), reg, state, env);
3883 if (IS_NOT_NULL(prev) && r == 0) {
[331]3884 r = next_setup(prev, NCAR(node), reg);
[279]3885 }
3886 prev = NCAR(node);
3887 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3888 }
3889 break;
3890
3891 case NT_ALT:
3892 do {
3893 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3894 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3895 break;
3896
3897 case NT_CCLASS:
3898 break;
3899
3900 case NT_STR:
3901 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3902 r = expand_case_fold_string(node, reg);
3903 }
3904 break;
3905
3906 case NT_CTYPE:
3907 case NT_CANY:
3908 break;
3909
3910#ifdef USE_SUBEXP_CALL
3911 case NT_CALL:
3912 break;
3913#endif
3914
3915 case NT_BREF:
3916 {
3917 int i;
3918 int* p;
3919 Node** nodes = SCANENV_MEM_NODES(env);
3920 BRefNode* br = NBREF(node);
3921 p = BACKREFS_P(br);
3922 for (i = 0; i < br->back_num; i++) {
3923 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3924 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3925 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3926#ifdef USE_BACKREF_WITH_LEVEL
3927 if (IS_BACKREF_NEST_LEVEL(br)) {
3928 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3929 }
3930#endif
3931 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3932 }
3933 }
3934 break;
3935
3936 case NT_QTFR:
3937 {
3938 OnigDistance d;
3939 QtfrNode* qn = NQTFR(node);
3940 Node* target = qn->target;
3941
3942 if ((state & IN_REPEAT) != 0) {
[331]3943 qn->state |= NST_IN_REPEAT;
[279]3944 }
3945
3946 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3947 r = get_min_match_length(target, &d, env);
3948 if (r) break;
3949 if (d == 0) {
3950 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3951#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3952 r = quantifiers_memory_node_info(target);
3953 if (r < 0) break;
3954 if (r > 0) {
3955 qn->target_empty_info = r;
3956 }
3957#endif
3958#if 0
3959 r = get_max_match_length(target, &d, env);
3960 if (r == 0 && d == 0) {
3961 /* ()* ==> ()?, ()+ ==> () */
3962 qn->upper = 1;
3963 if (qn->lower > 1) qn->lower = 1;
3964 if (NTYPE(target) == NT_STR) {
3965 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3966 }
3967 }
3968#endif
3969 }
3970 }
3971
3972 state |= IN_REPEAT;
3973 if (qn->lower != qn->upper)
3974 state |= IN_VAR_REPEAT;
3975 r = setup_tree(target, reg, state, env);
3976 if (r) break;
3977
3978 /* expand string */
3979#define EXPAND_STRING_MAX_LENGTH 100
3980 if (NTYPE(target) == NT_STR) {
3981 if (qn->lower > 1) {
3982 int i, n = qn->lower;
3983 OnigDistance len = NSTRING_LEN(target);
3984 StrNode* sn = NSTR(target);
3985 Node* np;
3986
3987 np = onig_node_new_str(sn->s, sn->end);
3988 if (IS_NULL(np)) return ONIGERR_MEMORY;
3989 NSTR(np)->flag = sn->flag;
3990
3991 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3992 r = onig_node_str_cat(np, sn->s, sn->end);
3993 if (r) {
3994 onig_node_free(np);
3995 return r;
3996 }
3997 }
3998 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
3999 Node *np1, *np2;
4000
4001 qn->lower -= i;
4002 if (! IS_REPEAT_INFINITE(qn->upper))
4003 qn->upper -= i;
4004
4005 np1 = onig_node_new_list(np, NULL);
4006 if (IS_NULL(np1)) {
4007 onig_node_free(np);
4008 return ONIGERR_MEMORY;
4009 }
4010 swap_node(np1, node);
4011 np2 = onig_node_list_add(node, np1);
4012 if (IS_NULL(np2)) {
4013 onig_node_free(np1);
4014 return ONIGERR_MEMORY;
4015 }
4016 }
4017 else {
4018 swap_node(np, node);
4019 onig_node_free(np);
4020 }
4021 break; /* break case NT_QTFR: */
4022 }
4023 }
4024
4025#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4026 if (qn->greedy && (qn->target_empty_info != 0)) {
4027 if (NTYPE(target) == NT_QTFR) {
4028 QtfrNode* tqn = NQTFR(target);
4029 if (IS_NOT_NULL(tqn->head_exact)) {
4030 qn->head_exact = tqn->head_exact;
4031 tqn->head_exact = NULL;
4032 }
4033 }
4034 else {
4035 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4036 }
4037 }
4038#endif
4039 }
4040 break;
4041
4042 case NT_ENCLOSE:
4043 {
4044 EncloseNode* en = NENCLOSE(node);
4045
4046 switch (en->type) {
4047 case ENCLOSE_OPTION:
4048 {
4049 OnigOptionType options = reg->options;
4050 reg->options = NENCLOSE(node)->option;
4051 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4052 reg->options = options;
4053 }
4054 break;
4055
4056 case ENCLOSE_MEMORY:
[331]4057 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
[279]4058 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4059 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4060 }
[331]4061 if (IS_ENCLOSE_CALLED(en))
4062 state |= IN_CALL;
4063 if (IS_ENCLOSE_RECURSION(en))
4064 state |= IN_RECCALL;
4065 else if ((state & IN_RECCALL) != 0)
4066 SET_CALL_RECURSION(node);
4067 r = setup_tree(en->target, reg, state, env);
4068 break;
[279]4069
4070 case ENCLOSE_STOP_BACKTRACK:
4071 {
4072 Node* target = en->target;
4073 r = setup_tree(target, reg, state, env);
4074 if (NTYPE(target) == NT_QTFR) {
4075 QtfrNode* tqn = NQTFR(target);
4076 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4077 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4078 int qtype = NTYPE(tqn->target);
4079 if (IS_NODE_TYPE_SIMPLE(qtype))
4080 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4081 }
4082 }
4083 }
4084 break;
4085
4086 case ENCLOSE_CONDITION:
4087#ifdef USE_NAMED_GROUP
4088 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4089 env->num_named > 0 &&
4090 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4091 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4092 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4093 }
4094#endif
[331]4095 if (NENCLOSE(node)->regnum > env->num_mem)
4096 return ONIGERR_INVALID_BACKREF;
[279]4097 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4098 break;
[331]4099
4100 case ENCLOSE_ABSENT:
4101 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4102 break;
[279]4103 }
4104 }
4105 break;
4106
4107 case NT_ANCHOR:
4108 {
4109 AnchorNode* an = NANCHOR(node);
4110
4111 switch (an->type) {
4112 case ANCHOR_PREC_READ:
4113 r = setup_tree(an->target, reg, state, env);
4114 break;
4115 case ANCHOR_PREC_READ_NOT:
4116 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4117 break;
4118
4119/* allowed node types in look-behind */
4120#define ALLOWED_TYPE_IN_LB \
4121 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4122 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4123
4124#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4125#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4126
4127#define ALLOWED_ANCHOR_IN_LB \
4128( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4129 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4130 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4131 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4132#define ALLOWED_ANCHOR_IN_LB_NOT \
4133( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4134 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4135 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4136 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4137
4138 case ANCHOR_LOOK_BEHIND:
4139 {
4140 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4141 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4142 if (r < 0) return r;
4143 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4144 if (NTYPE(node) != NT_ANCHOR) goto restart;
4145 r = setup_tree(an->target, reg, state, env);
[331]4146 if (r != 0) return r;
4147 r = setup_look_behind(node, reg, env);
[279]4148 }
4149 break;
4150
4151 case ANCHOR_LOOK_BEHIND_NOT:
4152 {
4153 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4154 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4155 if (r < 0) return r;
4156 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4157 if (NTYPE(node) != NT_ANCHOR) goto restart;
4158 r = setup_tree(an->target, reg, (state | IN_NOT), env);
[331]4159 if (r != 0) return r;
4160 r = setup_look_behind(node, reg, env);
[279]4161 }
4162 break;
4163 }
4164 }
4165 break;
4166
4167 default:
4168 break;
4169 }
4170
4171 return r;
4172}
4173
4174#ifndef USE_SUNDAY_QUICK_SEARCH
4175/* set skip map for Boyer-Moore search */
4176static int
4177set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4178 UChar skip[], int** int_skip, int ignore_case)
4179{
4180 OnigDistance i, len;
4181 int clen, flen, n, j, k;
4182 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4183 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4184 OnigEncoding enc = reg->enc;
4185
4186 len = end - s;
4187 if (len < ONIG_CHAR_TABLE_SIZE) {
4188 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4189
4190 n = 0;
4191 for (i = 0; i < len - 1; i += clen) {
4192 p = s + i;
4193 if (ignore_case)
4194 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4195 p, end, items);
[331]4196 clen = enclen(enc, p, end);
4197 if (p + clen > end)
4198 clen = (int )(end - p);
[279]4199
4200 for (j = 0; j < n; j++) {
4201 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4202 return 1; /* different length isn't supported. */
4203 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4204 if (flen != clen)
4205 return 1; /* different length isn't supported. */
4206 }
4207 for (j = 0; j < clen; j++) {
4208 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4209 for (k = 0; k < n; k++) {
4210 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4211 }
4212 }
4213 }
4214 }
4215 else {
[331]4216# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4217 /* This should not happen. */
4218 return ONIGERR_TYPE_BUG;
4219# else
[279]4220 if (IS_NULL(*int_skip)) {
4221 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4222 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4223 }
4224 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4225
4226 n = 0;
4227 for (i = 0; i < len - 1; i += clen) {
4228 p = s + i;
4229 if (ignore_case)
4230 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4231 p, end, items);
[331]4232 clen = enclen(enc, p, end);
4233 if (p + clen > end)
4234 clen = (int )(end - p);
[279]4235
4236 for (j = 0; j < n; j++) {
4237 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4238 return 1; /* different length isn't supported. */
4239 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4240 if (flen != clen)
4241 return 1; /* different length isn't supported. */
4242 }
4243 for (j = 0; j < clen; j++) {
4244 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4245 for (k = 0; k < n; k++) {
4246 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4247 }
4248 }
4249 }
[331]4250# endif
[279]4251 }
4252 return 0;
4253}
4254
4255#else /* USE_SUNDAY_QUICK_SEARCH */
4256
4257/* set skip map for Sunday's quick search */
4258static int
4259set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4260 UChar skip[], int** int_skip, int ignore_case)
4261{
4262 OnigDistance i, len;
4263 int clen, flen, n, j, k;
4264 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4265 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4266 OnigEncoding enc = reg->enc;
4267
4268 len = end - s;
4269 if (len < ONIG_CHAR_TABLE_SIZE) {
4270 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4271
4272 n = 0;
4273 for (i = 0; i < len; i += clen) {
4274 p = s + i;
4275 if (ignore_case)
4276 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4277 p, end, items);
[331]4278 clen = enclen(enc, p, end);
4279 if (p + clen > end)
4280 clen = (int )(end - p);
[279]4281
4282 for (j = 0; j < n; j++) {
4283 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4284 return 1; /* different length isn't supported. */
4285 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4286 if (flen != clen)
4287 return 1; /* different length isn't supported. */
4288 }
4289 for (j = 0; j < clen; j++) {
4290 skip[s[i + j]] = (UChar )(len - i - j);
4291 for (k = 0; k < n; k++) {
4292 skip[buf[k][j]] = (UChar )(len - i - j);
4293 }
4294 }
4295 }
4296 }
4297 else {
[331]4298# if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
4299 /* This should not happen. */
4300 return ONIGERR_TYPE_BUG;
4301# else
[279]4302 if (IS_NULL(*int_skip)) {
4303 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4304 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4305 }
4306 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4307
4308 n = 0;
4309 for (i = 0; i < len; i += clen) {
4310 p = s + i;
4311 if (ignore_case)
4312 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4313 p, end, items);
[331]4314 clen = enclen(enc, p, end);
4315 if (p + clen > end)
4316 clen = (int )(end - p);
[279]4317
4318 for (j = 0; j < n; j++) {
4319 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4320 return 1; /* different length isn't supported. */
4321 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4322 if (flen != clen)
4323 return 1; /* different length isn't supported. */
4324 }
4325 for (j = 0; j < clen; j++) {
4326 (*int_skip)[s[i + j]] = (int )(len - i - j);
4327 for (k = 0; k < n; k++) {
4328 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4329 }
4330 }
4331 }
[331]4332# endif
[279]4333 }
4334 return 0;
4335}
4336#endif /* USE_SUNDAY_QUICK_SEARCH */
4337
4338typedef struct {
4339 OnigDistance min; /* min byte length */
4340 OnigDistance max; /* max byte length */
4341} MinMaxLen;
4342
4343typedef struct {
4344 MinMaxLen mmd;
4345 OnigEncoding enc;
4346 OnigOptionType options;
4347 OnigCaseFoldType case_fold_flag;
4348 ScanEnv* scan_env;
4349} OptEnv;
4350
4351typedef struct {
4352 int left_anchor;
4353 int right_anchor;
4354} OptAncInfo;
4355
4356typedef struct {
4357 MinMaxLen mmd; /* info position */
4358 OptAncInfo anc;
4359
4360 int reach_end;
4361 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4362 int len;
4363 UChar s[OPT_EXACT_MAXLEN];
4364} OptExactInfo;
4365
4366typedef struct {
4367 MinMaxLen mmd; /* info position */
4368 OptAncInfo anc;
4369
4370 int value; /* weighted value */
4371 UChar map[ONIG_CHAR_TABLE_SIZE];
4372} OptMapInfo;
4373
4374typedef struct {
4375 MinMaxLen len;
4376
4377 OptAncInfo anc;
4378 OptExactInfo exb; /* boundary */
4379 OptExactInfo exm; /* middle */
4380 OptExactInfo expr; /* prec read (?=...) */
4381
4382 OptMapInfo map; /* boundary */
4383} NodeOptInfo;
4384
4385
4386static int
4387map_position_value(OnigEncoding enc, int i)
4388{
4389 static const short int ByteValTable[] = {
4390 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4391 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4392 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4393 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4394 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4395 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4396 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4397 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4398 };
4399
4400 if (i < numberof(ByteValTable)) {
4401 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4402 return 20;
4403 else
4404 return (int )ByteValTable[i];
4405 }
4406 else
4407 return 4; /* Take it easy. */
4408}
4409
4410static int
4411distance_value(MinMaxLen* mm)
4412{
4413 /* 1000 / (min-max-dist + 1) */
4414 static const short int dist_vals[] = {
4415 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4416 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4417 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4418 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4419 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4420 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4421 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4422 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4423 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4424 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4425 };
4426
4427 OnigDistance d;
4428
4429 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4430
4431 d = mm->max - mm->min;
4432 if (d < numberof(dist_vals))
4433 /* return dist_vals[d] * 16 / (mm->min + 12); */
4434 return (int )dist_vals[d];
4435 else
4436 return 1;
4437}
4438
4439static int
4440comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4441{
4442 if (v2 <= 0) return -1;
4443 if (v1 <= 0) return 1;
4444
4445 v1 *= distance_value(d1);
4446 v2 *= distance_value(d2);
4447
4448 if (v2 > v1) return 1;
4449 if (v2 < v1) return -1;
4450
4451 if (d2->min < d1->min) return 1;
4452 if (d2->min > d1->min) return -1;
4453 return 0;
4454}
4455
4456static int
4457is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4458{
4459 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4460}
4461
4462
4463static void
4464set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4465{
4466 mml->min = min;
4467 mml->max = max;
4468}
4469
4470static void
4471clear_mml(MinMaxLen* mml)
4472{
4473 mml->min = mml->max = 0;
4474}
4475
4476static void
4477copy_mml(MinMaxLen* to, MinMaxLen* from)
4478{
4479 to->min = from->min;
4480 to->max = from->max;
4481}
4482
4483static void
4484add_mml(MinMaxLen* to, MinMaxLen* from)
4485{
4486 to->min = distance_add(to->min, from->min);
4487 to->max = distance_add(to->max, from->max);
4488}
4489
4490#if 0
4491static void
4492add_len_mml(MinMaxLen* to, OnigDistance len)
4493{
4494 to->min = distance_add(to->min, len);
4495 to->max = distance_add(to->max, len);
4496}
4497#endif
4498
4499static void
4500alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4501{
4502 if (to->min > from->min) to->min = from->min;
4503 if (to->max < from->max) to->max = from->max;
4504}
4505
4506static void
4507copy_opt_env(OptEnv* to, OptEnv* from)
4508{
4509 *to = *from;
4510}
4511
4512static void
4513clear_opt_anc_info(OptAncInfo* anc)
4514{
4515 anc->left_anchor = 0;
4516 anc->right_anchor = 0;
4517}
4518
4519static void
4520copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4521{
4522 *to = *from;
4523}
4524
4525static void
4526concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4527 OnigDistance left_len, OnigDistance right_len)
4528{
4529 clear_opt_anc_info(to);
4530
4531 to->left_anchor = left->left_anchor;
4532 if (left_len == 0) {
4533 to->left_anchor |= right->left_anchor;
4534 }
4535
4536 to->right_anchor = right->right_anchor;
4537 if (right_len == 0) {
4538 to->right_anchor |= left->right_anchor;
4539 }
4540 else {
4541 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4542 }
4543}
4544
4545static int
4546is_left_anchor(int anc)
4547{
4548 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4549 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4550 anc == ANCHOR_PREC_READ_NOT)
4551 return 0;
4552
4553 return 1;
4554}
4555
4556static int
4557is_set_opt_anc_info(OptAncInfo* to, int anc)
4558{
4559 if ((to->left_anchor & anc) != 0) return 1;
4560
4561 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4562}
4563
4564static void
4565add_opt_anc_info(OptAncInfo* to, int anc)
4566{
4567 if (is_left_anchor(anc))
4568 to->left_anchor |= anc;
4569 else
4570 to->right_anchor |= anc;
4571}
4572
4573static void
4574remove_opt_anc_info(OptAncInfo* to, int anc)
4575{
4576 if (is_left_anchor(anc))
4577 to->left_anchor &= ~anc;
4578 else
4579 to->right_anchor &= ~anc;
4580}
4581
4582static void
4583alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4584{
4585 to->left_anchor &= add->left_anchor;
4586 to->right_anchor &= add->right_anchor;
4587}
4588
4589static int
4590is_full_opt_exact_info(OptExactInfo* ex)
4591{
4592 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4593}
4594
4595static void
4596clear_opt_exact_info(OptExactInfo* ex)
4597{
4598 clear_mml(&ex->mmd);
4599 clear_opt_anc_info(&ex->anc);
4600 ex->reach_end = 0;
4601 ex->ignore_case = -1; /* unset */
4602 ex->len = 0;
4603 ex->s[0] = '\0';
4604}
4605
4606static void
4607copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4608{
4609 *to = *from;
4610}
4611
4612static void
4613concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4614{
4615 int i, j, len;
4616 UChar *p, *end;
4617 OptAncInfo tanc;
4618
4619 if (to->ignore_case < 0)
4620 to->ignore_case = add->ignore_case;
4621 else if (to->ignore_case != add->ignore_case)
4622 return ; /* avoid */
4623
4624 p = add->s;
4625 end = p + add->len;
4626 for (i = to->len; p < end; ) {
[331]4627 len = enclen(enc, p, end);
[279]4628 if (i + len > OPT_EXACT_MAXLEN) break;
4629 for (j = 0; j < len && p < end; j++)
4630 to->s[i++] = *p++;
4631 }
4632
4633 to->len = i;
4634 to->reach_end = (p == end ? add->reach_end : 0);
4635
4636 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4637 if (! to->reach_end) tanc.right_anchor = 0;
4638 copy_opt_anc_info(&to->anc, &tanc);
4639}
4640
4641static void
4642concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4643 int raw ARG_UNUSED, OnigEncoding enc)
4644{
4645 int i, j, len;
4646 UChar *p;
4647
4648 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
[331]4649 len = enclen(enc, p, end);
[279]4650 if (i + len > OPT_EXACT_MAXLEN) break;
4651 for (j = 0; j < len && p < end; j++)
4652 to->s[i++] = *p++;
4653 }
4654
4655 to->len = i;
4656}
4657
4658static void
4659alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4660{
4661 int i, j, len;
4662
4663 if (add->len == 0 || to->len == 0) {
4664 clear_opt_exact_info(to);
4665 return ;
4666 }
4667
4668 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4669 clear_opt_exact_info(to);
4670 return ;
4671 }
4672
4673 for (i = 0; i < to->len && i < add->len; ) {
4674 if (to->s[i] != add->s[i]) break;
[331]4675 len = enclen(env->enc, to->s + i, to->s + to->len);
[279]4676
4677 for (j = 1; j < len; j++) {
4678 if (to->s[i+j] != add->s[i+j]) break;
4679 }
4680 if (j < len) break;
4681 i += len;
4682 }
4683
4684 if (! add->reach_end || i < add->len || i < to->len) {
4685 to->reach_end = 0;
4686 }
4687 to->len = i;
4688 if (to->ignore_case < 0)
4689 to->ignore_case = add->ignore_case;
4690 else if (add->ignore_case >= 0)
4691 to->ignore_case |= add->ignore_case;
4692
4693 alt_merge_opt_anc_info(&to->anc, &add->anc);
4694 if (! to->reach_end) to->anc.right_anchor = 0;
4695}
4696
4697static void
4698select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4699{
4700 int v1, v2;
4701
4702 v1 = now->len;
4703 v2 = alt->len;
4704
4705 if (v2 == 0) {
4706 return ;
4707 }
4708 else if (v1 == 0) {
4709 copy_opt_exact_info(now, alt);
4710 return ;
4711 }
4712 else if (v1 <= 2 && v2 <= 2) {
4713 /* ByteValTable[x] is big value --> low price */
4714 v2 = map_position_value(enc, now->s[0]);
4715 v1 = map_position_value(enc, alt->s[0]);
4716
4717 if (now->len > 1) v1 += 5;
4718 if (alt->len > 1) v2 += 5;
4719 }
4720
4721 if (now->ignore_case <= 0) v1 *= 2;
4722 if (alt->ignore_case <= 0) v2 *= 2;
4723
4724 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4725 copy_opt_exact_info(now, alt);
4726}
4727
4728static void
4729clear_opt_map_info(OptMapInfo* map)
4730{
4731 static const OptMapInfo clean_info = {
4732 {0, 0}, {0, 0}, 0,
4733 {
4734 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4735 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4736 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4737 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4738 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4739 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4740 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4741 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4742 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4743 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4744 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4745 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4746 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4747 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4748 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4749 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4750 }
4751 };
4752
4753 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4754}
4755
4756static void
4757copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4758{
4759 *to = *from;
4760}
4761
4762static void
4763add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4764{
4765 if (map->map[c] == 0) {
4766 map->map[c] = 1;
4767 map->value += map_position_value(enc, c);
4768 }
4769}
4770
4771static int
4772add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4773 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4774{
4775 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4776 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4777 int i, n;
4778
4779 add_char_opt_map_info(map, p[0], enc);
4780
4781 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4782 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4783 if (n < 0) return n;
4784
4785 for (i = 0; i < n; i++) {
4786 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4787 add_char_opt_map_info(map, buf[0], enc);
4788 }
4789
4790 return 0;
4791}
4792
4793static void
4794select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4795{
4796 const int z = 1<<15; /* 32768: something big value */
4797
4798 int v1, v2;
4799
4800 if (alt->value == 0) return ;
4801 if (now->value == 0) {
4802 copy_opt_map_info(now, alt);
4803 return ;
4804 }
4805
4806 v1 = z / now->value;
4807 v2 = z / alt->value;
4808 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4809 copy_opt_map_info(now, alt);
4810}
4811
4812static int
4813comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4814{
4815#define COMP_EM_BASE 20
4816 int ve, vm;
4817
4818 if (m->value <= 0) return -1;
4819
4820 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4821 vm = COMP_EM_BASE * 5 * 2 / m->value;
4822 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4823}
4824
4825static void
4826alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4827{
4828 int i, val;
4829
4830 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4831 if (to->value == 0) return ;
4832 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4833 clear_opt_map_info(to);
4834 return ;
4835 }
4836
4837 alt_merge_mml(&to->mmd, &add->mmd);
4838
4839 val = 0;
4840 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4841 if (add->map[i])
4842 to->map[i] = 1;
4843
4844 if (to->map[i])
4845 val += map_position_value(enc, i);
4846 }
4847 to->value = val;
4848
4849 alt_merge_opt_anc_info(&to->anc, &add->anc);
4850}
4851
4852static void
4853set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4854{
4855 copy_mml(&(opt->exb.mmd), mmd);
4856 copy_mml(&(opt->expr.mmd), mmd);
4857 copy_mml(&(opt->map.mmd), mmd);
4858}
4859
4860static void
4861clear_node_opt_info(NodeOptInfo* opt)
4862{
4863 clear_mml(&opt->len);
4864 clear_opt_anc_info(&opt->anc);
4865 clear_opt_exact_info(&opt->exb);
4866 clear_opt_exact_info(&opt->exm);
4867 clear_opt_exact_info(&opt->expr);
4868 clear_opt_map_info(&opt->map);
4869}
4870
4871static void
4872copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4873{
4874 *to = *from;
4875}
4876
4877static void
4878concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4879{
4880 int exb_reach, exm_reach;
4881 OptAncInfo tanc;
4882
4883 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4884 copy_opt_anc_info(&to->anc, &tanc);
4885
4886 if (add->exb.len > 0 && to->len.max == 0) {
4887 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4888 to->len.max, add->len.max);
4889 copy_opt_anc_info(&add->exb.anc, &tanc);
4890 }
4891
4892 if (add->map.value > 0 && to->len.max == 0) {
4893 if (add->map.mmd.max == 0)
4894 add->map.anc.left_anchor |= to->anc.left_anchor;
4895 }
4896
4897 exb_reach = to->exb.reach_end;
4898 exm_reach = to->exm.reach_end;
4899
4900 if (add->len.max != 0)
4901 to->exb.reach_end = to->exm.reach_end = 0;
4902
4903 if (add->exb.len > 0) {
4904 if (exb_reach) {
4905 concat_opt_exact_info(&to->exb, &add->exb, enc);
4906 clear_opt_exact_info(&add->exb);
4907 }
4908 else if (exm_reach) {
4909 concat_opt_exact_info(&to->exm, &add->exb, enc);
4910 clear_opt_exact_info(&add->exb);
4911 }
4912 }
4913 select_opt_exact_info(enc, &to->exm, &add->exb);
4914 select_opt_exact_info(enc, &to->exm, &add->exm);
4915
4916 if (to->expr.len > 0) {
4917 if (add->len.max > 0) {
4918 if (to->expr.len > (int )add->len.max)
4919 to->expr.len = (int )add->len.max;
4920
4921 if (to->expr.mmd.max == 0)
4922 select_opt_exact_info(enc, &to->exb, &to->expr);
4923 else
4924 select_opt_exact_info(enc, &to->exm, &to->expr);
4925 }
4926 }
4927 else if (add->expr.len > 0) {
4928 copy_opt_exact_info(&to->expr, &add->expr);
4929 }
4930
4931 select_opt_map_info(&to->map, &add->map);
4932
4933 add_mml(&to->len, &add->len);
4934}
4935
4936static void
4937alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4938{
4939 alt_merge_opt_anc_info (&to->anc, &add->anc);
4940 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4941 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4942 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4943 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4944
4945 alt_merge_mml(&to->len, &add->len);
4946}
4947
4948
4949#define MAX_NODE_OPT_INFO_REF_COUNT 5
4950
4951static int
4952optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4953{
4954 int type;
4955 int r = 0;
4956
4957 clear_node_opt_info(opt);
4958 set_bound_node_opt_info(opt, &env->mmd);
4959
4960 type = NTYPE(node);
4961 switch (type) {
4962 case NT_LIST:
4963 {
4964 OptEnv nenv;
4965 NodeOptInfo nopt;
4966 Node* nd = node;
4967
4968 copy_opt_env(&nenv, env);
4969 do {
4970 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4971 if (r == 0) {
4972 add_mml(&nenv.mmd, &nopt.len);
4973 concat_left_node_opt_info(env->enc, opt, &nopt);
4974 }
4975 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4976 }
4977 break;
4978
4979 case NT_ALT:
4980 {
4981 NodeOptInfo nopt;
4982 Node* nd = node;
4983
4984 do {
4985 r = optimize_node_left(NCAR(nd), &nopt, env);
4986 if (r == 0) {
4987 if (nd == node) copy_node_opt_info(opt, &nopt);
4988 else alt_merge_node_opt_info(opt, &nopt, env);
4989 }
4990 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4991 }
4992 break;
4993
4994 case NT_STR:
4995 {
4996 StrNode* sn = NSTR(node);
4997 OnigDistance slen = sn->end - sn->s;
4998 int is_raw = NSTRING_IS_RAW(node);
4999
5000 if (! NSTRING_IS_AMBIG(node)) {
5001 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5002 is_raw, env->enc);
5003 opt->exb.ignore_case = 0;
5004 if (slen > 0) {
5005 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
5006 }
[331]5007 set_mml(&opt->len, slen, slen);
[279]5008 }
5009 else {
[331]5010 OnigDistance max;
[279]5011
5012 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
[331]5013 int n = onigenc_strlen(env->enc, sn->s, sn->end);
5014 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
[279]5015 }
5016 else {
5017 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
5018 is_raw, env->enc);
5019 opt->exb.ignore_case = 1;
5020
5021 if (slen > 0) {
5022 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5023 env->enc, env->case_fold_flag);
5024 if (r != 0) break;
5025 }
5026
5027 max = slen;
5028 }
5029
[331]5030 set_mml(&opt->len, slen, max);
[279]5031 }
5032
5033 if ((OnigDistance )opt->exb.len == slen)
5034 opt->exb.reach_end = 1;
5035 }
5036 break;
5037
5038 case NT_CCLASS:
5039 {
5040 int i, z;
5041 CClassNode* cc = NCCLASS(node);
5042
5043 /* no need to check ignore case. (set in setup_tree()) */
5044
5045 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
[331]5046 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
[279]5047 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5048
5049 set_mml(&opt->len, min, max);
5050 }
5051 else {
[331]5052 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5053 z = BITSET_AT(cc->bs, i);
5054 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5055 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5056 }
5057 }
[279]5058 set_mml(&opt->len, 1, 1);
5059 }
5060 }
5061 break;
5062
5063 case NT_CTYPE:
5064 {
5065 int i, min, max;
5066 int maxcode;
5067
5068 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5069
5070 if (max == 1) {
[331]5071 min = 1;
[279]5072
5073 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5074 switch (NCTYPE(node)->ctype) {
5075 case ONIGENC_CTYPE_WORD:
5076 if (NCTYPE(node)->not != 0) {
5077 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5078 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5079 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5080 }
5081 }
5082 }
5083 else {
5084 for (i = 0; i < maxcode; i++) {
5085 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5086 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5087 }
5088 }
5089 }
5090 break;
5091 }
5092 }
5093 else {
[331]5094 min = ONIGENC_MBC_MINLEN(env->enc);
[279]5095 }
5096 set_mml(&opt->len, min, max);
5097 }
5098 break;
5099
5100 case NT_CANY:
5101 {
5102 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5103 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5104 set_mml(&opt->len, min, max);
5105 }
5106 break;
5107
5108 case NT_ANCHOR:
5109 switch (NANCHOR(node)->type) {
5110 case ANCHOR_BEGIN_BUF:
5111 case ANCHOR_BEGIN_POSITION:
5112 case ANCHOR_BEGIN_LINE:
5113 case ANCHOR_END_BUF:
5114 case ANCHOR_SEMI_END_BUF:
5115 case ANCHOR_END_LINE:
5116 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5117 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5118 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5119 break;
5120
5121 case ANCHOR_PREC_READ:
5122 {
5123 NodeOptInfo nopt;
5124
5125 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5126 if (r == 0) {
5127 if (nopt.exb.len > 0)
5128 copy_opt_exact_info(&opt->expr, &nopt.exb);
5129 else if (nopt.exm.len > 0)
5130 copy_opt_exact_info(&opt->expr, &nopt.exm);
5131
5132 opt->expr.reach_end = 0;
5133
5134 if (nopt.map.value > 0)
5135 copy_opt_map_info(&opt->map, &nopt.map);
5136 }
5137 }
5138 break;
5139
5140 case ANCHOR_LOOK_BEHIND_NOT:
5141 break;
5142 }
5143 break;
5144
5145 case NT_BREF:
5146 {
5147 int i;
5148 int* backs;
5149 OnigDistance min, max, tmin, tmax;
5150 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5151 BRefNode* br = NBREF(node);
5152
5153 if (br->state & NST_RECURSION) {
5154 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5155 break;
5156 }
5157 backs = BACKREFS_P(br);
5158 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5159 if (r != 0) break;
5160 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5161 if (r != 0) break;
5162 for (i = 1; i < br->back_num; i++) {
5163 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5164 if (r != 0) break;
5165 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5166 if (r != 0) break;
5167 if (min > tmin) min = tmin;
5168 if (max < tmax) max = tmax;
5169 }
5170 if (r == 0) set_mml(&opt->len, min, max);
5171 }
5172 break;
5173
5174#ifdef USE_SUBEXP_CALL
5175 case NT_CALL:
5176 if (IS_CALL_RECURSION(NCALL(node)))
5177 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5178 else {
5179 OnigOptionType save = env->options;
5180 env->options = NENCLOSE(NCALL(node)->target)->option;
5181 r = optimize_node_left(NCALL(node)->target, opt, env);
5182 env->options = save;
5183 }
5184 break;
5185#endif
5186
5187 case NT_QTFR:
5188 {
5189 int i;
5190 OnigDistance min, max;
5191 NodeOptInfo nopt;
5192 QtfrNode* qn = NQTFR(node);
5193
5194 r = optimize_node_left(qn->target, &nopt, env);
5195 if (r) break;
5196
5197 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5198 if (env->mmd.max == 0 &&
5199 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5200 if (IS_MULTILINE(env->options))
5201 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5202 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5203 else
5204 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5205 }
5206 }
5207 else {
5208 if (qn->lower > 0) {
5209 copy_node_opt_info(opt, &nopt);
5210 if (nopt.exb.len > 0) {
5211 if (nopt.exb.reach_end) {
5212 for (i = 2; i <= qn->lower &&
[331]5213 ! is_full_opt_exact_info(&opt->exb); i++) {
[279]5214 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5215 }
5216 if (i < qn->lower) {
5217 opt->exb.reach_end = 0;
5218 }
5219 }
5220 }
5221
5222 if (qn->lower != qn->upper) {
5223 opt->exb.reach_end = 0;
5224 opt->exm.reach_end = 0;
5225 }
5226 if (qn->lower > 1)
5227 opt->exm.reach_end = 0;
5228 }
5229 }
5230
5231 min = distance_multiply(nopt.len.min, qn->lower);
5232 if (IS_REPEAT_INFINITE(qn->upper))
5233 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5234 else
5235 max = distance_multiply(nopt.len.max, qn->upper);
5236
5237 set_mml(&opt->len, min, max);
5238 }
5239 break;
5240
5241 case NT_ENCLOSE:
5242 {
5243 EncloseNode* en = NENCLOSE(node);
5244
5245 switch (en->type) {
5246 case ENCLOSE_OPTION:
5247 {
5248 OnigOptionType save = env->options;
5249
5250 env->options = en->option;
5251 r = optimize_node_left(en->target, opt, env);
5252 env->options = save;
5253 }
5254 break;
5255
5256 case ENCLOSE_MEMORY:
5257#ifdef USE_SUBEXP_CALL
5258 en->opt_count++;
5259 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5260 OnigDistance min, max;
5261
5262 min = 0;
5263 max = ONIG_INFINITE_DISTANCE;
5264 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5265 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5266 set_mml(&opt->len, min, max);
5267 }
5268 else
5269#endif
5270 {
5271 r = optimize_node_left(en->target, opt, env);
5272
5273 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5274 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5275 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5276 }
5277 }
5278 break;
5279
5280 case ENCLOSE_STOP_BACKTRACK:
5281 case ENCLOSE_CONDITION:
5282 r = optimize_node_left(en->target, opt, env);
5283 break;
[331]5284
5285 case ENCLOSE_ABSENT:
5286 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5287 break;
[279]5288 }
5289 }
5290 break;
5291
5292 default:
5293#ifdef ONIG_DEBUG
5294 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5295 NTYPE(node));
5296#endif
5297 r = ONIGERR_TYPE_BUG;
5298 break;
5299 }
5300
5301 return r;
5302}
5303
5304static int
5305set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5306{
5307 int r;
5308 int allow_reverse;
5309
5310 if (e->len == 0) return 0;
5311
5312 reg->exact = (UChar* )xmalloc(e->len);
5313 CHECK_NULL_RETURN_MEMERR(reg->exact);
5314 xmemcpy(reg->exact, e->s, e->len);
5315 reg->exact_end = reg->exact + e->len;
5316
5317 allow_reverse =
5318 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5319
5320 if (e->ignore_case > 0) {
5321 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5322 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5323 reg->map, &(reg->int_map), 1);
5324 if (r == 0) {
5325 reg->optimize = (allow_reverse != 0
5326 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5327 }
5328 else {
5329 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5330 }
5331 }
5332 else {
5333 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5334 }
5335 }
5336 else {
5337 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5338 r = set_bm_skip(reg->exact, reg->exact_end, reg,
[331]5339 reg->map, &(reg->int_map), 0);
5340 if (r == 0) {
5341 reg->optimize = (allow_reverse != 0
[279]5342 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
[331]5343 }
5344 else {
5345 reg->optimize = ONIG_OPTIMIZE_EXACT;
5346 }
[279]5347 }
5348 else {
5349 reg->optimize = ONIG_OPTIMIZE_EXACT;
5350 }
5351 }
5352
5353 reg->dmin = e->mmd.min;
5354 reg->dmax = e->mmd.max;
5355
5356 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5357 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5358 }
5359
5360 return 0;
5361}
5362
5363static void
5364set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5365{
5366 int i;
5367
5368 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5369 reg->map[i] = m->map[i];
5370
5371 reg->optimize = ONIG_OPTIMIZE_MAP;
5372 reg->dmin = m->mmd.min;
5373 reg->dmax = m->mmd.max;
5374
5375 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5376 reg->threshold_len = (int )(reg->dmin + 1);
5377 }
5378}
5379
5380static void
5381set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5382{
5383 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5384 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5385}
5386
5387#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5388static void print_optimize_info(FILE* f, regex_t* reg);
5389#endif
5390
5391static int
5392set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5393{
5394
5395 int r;
5396 NodeOptInfo opt;
5397 OptEnv env;
5398
5399 env.enc = reg->enc;
5400 env.options = reg->options;
5401 env.case_fold_flag = reg->case_fold_flag;
5402 env.scan_env = scan_env;
5403 clear_mml(&env.mmd);
5404
5405 r = optimize_node_left(node, &opt, &env);
5406 if (r) return r;
5407
5408 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5409 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5410 ANCHOR_LOOK_BEHIND);
5411
[331]5412 if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
5413 reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
5414
[279]5415 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5416 ANCHOR_PREC_READ_NOT);
5417
5418 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5419 reg->anchor_dmin = opt.len.min;
5420 reg->anchor_dmax = opt.len.max;
5421 }
5422
5423 if (opt.exb.len > 0 || opt.exm.len > 0) {
5424 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5425 if (opt.map.value > 0 &&
5426 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5427 goto set_map;
5428 }
5429 else {
5430 r = set_optimize_exact_info(reg, &opt.exb);
5431 set_sub_anchor(reg, &opt.exb.anc);
5432 }
5433 }
5434 else if (opt.map.value > 0) {
5435 set_map:
5436 set_optimize_map_info(reg, &opt.map);
5437 set_sub_anchor(reg, &opt.map.anc);
5438 }
5439 else {
5440 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5441 if (opt.len.max == 0)
5442 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5443 }
5444
5445#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5446 print_optimize_info(stderr, reg);
5447#endif
5448 return r;
5449}
5450
5451static void
5452clear_optimize_info(regex_t* reg)
5453{
5454 reg->optimize = ONIG_OPTIMIZE_NONE;
5455 reg->anchor = 0;
5456 reg->anchor_dmin = 0;
5457 reg->anchor_dmax = 0;
5458 reg->sub_anchor = 0;
5459 reg->exact_end = (UChar* )NULL;
5460 reg->threshold_len = 0;
5461 if (IS_NOT_NULL(reg->exact)) {
5462 xfree(reg->exact);
5463 reg->exact = (UChar* )NULL;
5464 }
5465}
5466
5467#ifdef ONIG_DEBUG
5468
5469static void print_enc_string(FILE* fp, OnigEncoding enc,
5470 const UChar *s, const UChar *end)
5471{
5472 fprintf(fp, "\nPATTERN: /");
5473
5474 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5475 const UChar *p;
5476 OnigCodePoint code;
5477
5478 p = s;
5479 while (p < end) {
5480 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5481 if (code >= 0x80) {
5482 fprintf(fp, " 0x%04x ", (int )code);
5483 }
5484 else {
5485 fputc((int )code, fp);
5486 }
5487
[331]5488 p += enclen(enc, p, end);
[279]5489 }
5490 }
5491 else {
5492 while (s < end) {
5493 fputc((int )*s, fp);
5494 s++;
5495 }
5496 }
5497
5498 fprintf(fp, "/ (%s)\n", enc->name);
5499}
5500#endif /* ONIG_DEBUG */
5501
5502#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5503static void
5504print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5505{
5506 if (a == ONIG_INFINITE_DISTANCE)
5507 fputs("inf", f);
5508 else
5509 fprintf(f, "(%"PRIuPTR")", a);
5510
5511 fputs("-", f);
5512
5513 if (b == ONIG_INFINITE_DISTANCE)
5514 fputs("inf", f);
5515 else
5516 fprintf(f, "(%"PRIuPTR")", b);
5517}
5518
5519static void
5520print_anchor(FILE* f, int anchor)
5521{
5522 int q = 0;
5523
5524 fprintf(f, "[");
5525
5526 if (anchor & ANCHOR_BEGIN_BUF) {
5527 fprintf(f, "begin-buf");
5528 q = 1;
5529 }
5530 if (anchor & ANCHOR_BEGIN_LINE) {
5531 if (q) fprintf(f, ", ");
5532 q = 1;
5533 fprintf(f, "begin-line");
5534 }
5535 if (anchor & ANCHOR_BEGIN_POSITION) {
5536 if (q) fprintf(f, ", ");
5537 q = 1;
5538 fprintf(f, "begin-pos");
5539 }
5540 if (anchor & ANCHOR_END_BUF) {
5541 if (q) fprintf(f, ", ");
5542 q = 1;
5543 fprintf(f, "end-buf");
5544 }
5545 if (anchor & ANCHOR_SEMI_END_BUF) {
5546 if (q) fprintf(f, ", ");
5547 q = 1;
5548 fprintf(f, "semi-end-buf");
5549 }
5550 if (anchor & ANCHOR_END_LINE) {
5551 if (q) fprintf(f, ", ");
5552 q = 1;
5553 fprintf(f, "end-line");
5554 }
5555 if (anchor & ANCHOR_ANYCHAR_STAR) {
5556 if (q) fprintf(f, ", ");
5557 q = 1;
5558 fprintf(f, "anychar-star");
5559 }
5560 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5561 if (q) fprintf(f, ", ");
5562 fprintf(f, "anychar-star-ml");
5563 }
5564
5565 fprintf(f, "]");
5566}
5567
5568static void
5569print_optimize_info(FILE* f, regex_t* reg)
5570{
5571 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5572 "EXACT_IC", "MAP",
5573 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5574
5575 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5576 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5577 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5578 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5579 fprintf(f, "\n");
5580
5581 if (reg->optimize) {
5582 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5583 fprintf(f, "\n");
5584 }
5585 fprintf(f, "\n");
5586
5587 if (reg->exact) {
5588 UChar *p;
5589 fprintf(f, "exact: [");
5590 for (p = reg->exact; p < reg->exact_end; p++) {
5591 fputc(*p, f);
5592 }
5593 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5594 }
5595 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5596 int c, i, n = 0;
5597
5598 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5599 if (reg->map[i]) n++;
5600
5601 fprintf(f, "map: n=%d\n", n);
5602 if (n > 0) {
5603 c = 0;
5604 fputc('[', f);
5605 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5606 if (reg->map[i] != 0) {
[331]5607 if (c > 0) fputs(", ", f);
5608 c++;
5609 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5610 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5611 fputc(i, f);
5612 else
5613 fprintf(f, "%d", i);
5614 }
[279]5615 }
5616 fprintf(f, "]\n");
5617 }
5618 }
5619}
5620#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5621
5622
5623extern void
5624onig_free_body(regex_t* reg)
5625{
5626 if (IS_NOT_NULL(reg)) {
5627 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5628 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5629 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5630 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5631 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5632 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5633
5634#ifdef USE_NAMED_GROUP
5635 onig_names_free(reg);
5636#endif
5637 }
5638}
5639
5640extern void
5641onig_free(regex_t* reg)
5642{
5643 if (IS_NOT_NULL(reg)) {
5644 onig_free_body(reg);
5645 xfree(reg);
5646 }
5647}
5648
[331]5649#ifdef RUBY
[279]5650size_t
5651onig_memsize(const regex_t *reg)
5652{
5653 size_t size = sizeof(regex_t);
5654 if (IS_NULL(reg)) return 0;
5655 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5656 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5657 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5658 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5659 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5660 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5661
5662 return size;
5663}
5664
5665size_t
5666onig_region_memsize(const OnigRegion *regs)
5667{
5668 size_t size = sizeof(*regs);
5669 if (IS_NULL(regs)) return 0;
5670 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5671 return size;
5672}
[331]5673#endif
[279]5674
5675#define REGEX_TRANSFER(to,from) do {\
5676 onig_free_body(to);\
5677 xmemcpy(to, from, sizeof(regex_t));\
5678 xfree(from);\
5679} while (0)
5680
[331]5681#if 0
[279]5682extern void
5683onig_transfer(regex_t* to, regex_t* from)
5684{
5685 REGEX_TRANSFER(to, from);
5686}
[331]5687#endif
[279]5688
5689#ifdef ONIG_DEBUG_COMPILE
[331]5690static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
[279]5691#endif
5692#ifdef ONIG_DEBUG_PARSE_TREE
[331]5693static void print_tree(FILE* f, Node* node);
[279]5694#endif
5695
[331]5696#ifdef RUBY
[279]5697extern int
5698onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
[331]5699 OnigErrorInfo* einfo)
[279]5700{
[331]5701 return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
5702}
5703#endif
5704
5705#ifdef RUBY
5706extern int
5707onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5708 OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
5709#else
5710extern int
5711onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5712 OnigErrorInfo* einfo)
5713#endif
5714{
[279]5715#define COMPILE_INIT_SIZE 20
5716
5717 int r;
5718 OnigDistance init_size;
5719 Node* root;
5720 ScanEnv scan_env = {0};
5721#ifdef USE_SUBEXP_CALL
5722 UnsetAddrList uslist;
5723#endif
5724
5725 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5726
[331]5727#ifdef RUBY
5728 scan_env.sourcefile = sourcefile;
5729 scan_env.sourceline = sourceline;
5730#endif
[279]5731
5732#ifdef ONIG_DEBUG
5733 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5734#endif
5735
5736 if (reg->alloc == 0) {
5737 init_size = (pattern_end - pattern) * 2;
5738 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5739 r = BBUF_INIT(reg, init_size);
5740 if (r != 0) goto end;
5741 }
5742 else
5743 reg->used = 0;
5744
5745 reg->num_mem = 0;
5746 reg->num_repeat = 0;
5747 reg->num_null_check = 0;
5748 reg->repeat_range_alloc = 0;
5749 reg->repeat_range = (OnigRepeatRange* )NULL;
5750#ifdef USE_COMBINATION_EXPLOSION_CHECK
5751 reg->num_comb_exp_check = 0;
5752#endif
5753
5754 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5755 if (r != 0) goto err;
5756
[331]5757#ifdef ONIG_DEBUG_PARSE_TREE
5758# if 0
5759 fprintf(stderr, "ORIGINAL PARSE TREE:\n");
5760 print_tree(stderr, root);
5761# endif
5762#endif
5763
[279]5764#ifdef USE_NAMED_GROUP
5765 /* mixed use named group and no-named group */
5766 if (scan_env.num_named > 0 &&
5767 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5768 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5769 if (scan_env.num_named != scan_env.num_mem)
5770 r = disable_noname_group_capture(&root, reg, &scan_env);
5771 else
5772 r = numbered_ref_check(root);
5773
5774 if (r != 0) goto err;
5775 }
5776#endif
5777
5778#ifdef USE_SUBEXP_CALL
5779 if (scan_env.num_call > 0) {
5780 r = unset_addr_list_init(&uslist, scan_env.num_call);
5781 if (r != 0) goto err;
5782 scan_env.unset_addr_list = &uslist;
5783 r = setup_subexp_call(root, &scan_env);
5784 if (r != 0) goto err_unset;
5785 r = subexp_recursive_check_trav(root, &scan_env);
5786 if (r < 0) goto err_unset;
5787 r = subexp_inf_recursive_check_trav(root, &scan_env);
5788 if (r != 0) goto err_unset;
5789
5790 reg->num_call = scan_env.num_call;
5791 }
5792 else
5793 reg->num_call = 0;
5794#endif
5795
[331]5796 r = setup_tree(root, reg, 0, &scan_env);
[279]5797 if (r != 0) goto err_unset;
5798
5799#ifdef ONIG_DEBUG_PARSE_TREE
5800 print_tree(stderr, root);
5801#endif
5802
5803 reg->capture_history = scan_env.capture_history;
5804 reg->bt_mem_start = scan_env.bt_mem_start;
5805 reg->bt_mem_start |= reg->capture_history;
5806 if (IS_FIND_CONDITION(reg->options))
5807 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5808 else {
5809 reg->bt_mem_end = scan_env.bt_mem_end;
5810 reg->bt_mem_end |= reg->capture_history;
5811 }
5812
5813#ifdef USE_COMBINATION_EXPLOSION_CHECK
5814 if (scan_env.backrefed_mem == 0
[331]5815# ifdef USE_SUBEXP_CALL
[279]5816 || scan_env.num_call == 0
[331]5817# endif
[279]5818 ) {
5819 setup_comb_exp_check(root, 0, &scan_env);
[331]5820# ifdef USE_SUBEXP_CALL
[279]5821 if (scan_env.has_recursion != 0) {
5822 scan_env.num_comb_exp_check = 0;
5823 }
5824 else
[331]5825# endif
[279]5826 if (scan_env.comb_exp_max_regnum > 0) {
5827 int i;
5828 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5829 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5830 scan_env.num_comb_exp_check = 0;
5831 break;
5832 }
5833 }
5834 }
5835 }
5836
5837 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5838#endif
5839
5840 clear_optimize_info(reg);
5841#ifndef ONIG_DONT_OPTIMIZE
5842 r = set_optimize_info_from_tree(root, reg, &scan_env);
5843 if (r != 0) goto err_unset;
5844#endif
5845
5846 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5847 xfree(scan_env.mem_nodes_dynamic);
5848 scan_env.mem_nodes_dynamic = (Node** )NULL;
5849 }
5850
5851 r = compile_tree(root, reg);
5852 if (r == 0) {
5853 r = add_opcode(reg, OP_END);
5854#ifdef USE_SUBEXP_CALL
5855 if (scan_env.num_call > 0) {
5856 r = unset_addr_list_fix(&uslist, reg);
5857 unset_addr_list_end(&uslist);
5858 if (r) goto err;
5859 }
5860#endif
5861
5862 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5863 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5864 else {
5865 if (reg->bt_mem_start != 0)
5866 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5867 else
5868 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5869 }
5870 }
5871#ifdef USE_SUBEXP_CALL
5872 else if (scan_env.num_call > 0) {
5873 unset_addr_list_end(&uslist);
5874 }
5875#endif
5876 onig_node_free(root);
5877
5878#ifdef ONIG_DEBUG_COMPILE
[331]5879# ifdef USE_NAMED_GROUP
[279]5880 onig_print_names(stderr, reg);
[331]5881# endif
[279]5882 print_compiled_byte_code_list(stderr, reg);
5883#endif
5884
5885 end:
5886 return r;
5887
5888 err_unset:
5889#ifdef USE_SUBEXP_CALL
5890 if (scan_env.num_call > 0) {
5891 unset_addr_list_end(&uslist);
5892 }
5893#endif
5894 err:
5895 if (IS_NOT_NULL(scan_env.error)) {
5896 if (IS_NOT_NULL(einfo)) {
5897 einfo->enc = scan_env.enc;
5898 einfo->par = scan_env.error;
5899 einfo->par_end = scan_env.error_end;
5900 }
5901 }
5902
5903 onig_node_free(root);
5904 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5905 xfree(scan_env.mem_nodes_dynamic);
5906 return r;
5907}
5908
5909static int onig_inited = 0;
5910
5911extern int
5912onig_reg_init(regex_t* reg, OnigOptionType option,
5913 OnigCaseFoldType case_fold_flag,
[331]5914 OnigEncoding enc, const OnigSyntaxType* syntax)
[279]5915{
5916 if (! onig_inited)
5917 onig_init();
5918
5919 if (IS_NULL(reg))
5920 return ONIGERR_INVALID_ARGUMENT;
5921
5922 if (ONIGENC_IS_UNDEF(enc))
5923 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5924
5925 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5926 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5927 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5928 }
5929
5930 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5931 option |= syntax->options;
5932 option &= ~ONIG_OPTION_SINGLELINE;
5933 }
5934 else
5935 option |= syntax->options;
5936
5937 (reg)->enc = enc;
5938 (reg)->options = option;
5939 (reg)->syntax = syntax;
5940 (reg)->optimize = 0;
5941 (reg)->exact = (UChar* )NULL;
5942 (reg)->int_map = (int* )NULL;
5943 (reg)->int_map_backward = (int* )NULL;
5944 (reg)->chain = (regex_t* )NULL;
5945
5946 (reg)->p = (UChar* )NULL;
5947 (reg)->alloc = 0;
5948 (reg)->used = 0;
5949 (reg)->name_table = (void* )NULL;
5950
5951 (reg)->case_fold_flag = case_fold_flag;
5952 return 0;
5953}
5954
5955extern int
5956onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5957 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
[331]5958 const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
[279]5959{
5960 int r;
5961
5962 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5963 if (r) return r;
5964
5965 r = onig_compile(reg, pattern, pattern_end, einfo);
5966 return r;
5967}
5968
5969extern int
5970onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
[331]5971 OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
[279]5972 OnigErrorInfo* einfo)
5973{
5974 int r;
5975
5976 *reg = (regex_t* )xmalloc(sizeof(regex_t));
5977 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5978
5979 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5980 if (r) goto err;
5981
5982 r = onig_compile(*reg, pattern, pattern_end, einfo);
5983 if (r) {
5984 err:
5985 onig_free(*reg);
5986 *reg = NULL;
5987 }
5988 return r;
5989}
5990
[331]5991extern int
5992onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
5993{
5994 return onig_init();
5995}
[279]5996
5997extern int
5998onig_init(void)
5999{
6000 if (onig_inited != 0)
6001 return 0;
6002
6003 onig_inited = 1;
6004
[331]6005#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6006 _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
6007#endif
6008
[279]6009 onigenc_init();
6010 /* onigenc_set_default_caseconv_table((UChar* )0); */
6011
6012#ifdef ONIG_DEBUG_STATISTICS
6013 onig_statistics_init();
6014#endif
6015
6016 return 0;
6017}
6018
6019
6020static OnigEndCallListItemType* EndCallTop;
6021
6022extern void onig_add_end_call(void (*func)(void))
6023{
6024 OnigEndCallListItemType* item;
6025
6026 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6027 if (item == 0) return ;
6028
6029 item->next = EndCallTop;
6030 item->func = func;
6031
6032 EndCallTop = item;
6033}
6034
6035static void
6036exec_end_call_list(void)
6037{
6038 OnigEndCallListItemType* prev;
6039 void (*func)(void);
6040
6041 while (EndCallTop != 0) {
6042 func = EndCallTop->func;
6043 (*func)();
6044
6045 prev = EndCallTop;
6046 EndCallTop = EndCallTop->next;
6047 xfree(prev);
6048 }
6049}
6050
6051extern int
6052onig_end(void)
6053{
6054 exec_end_call_list();
6055
6056#ifdef ONIG_DEBUG_STATISTICS
6057 onig_print_statistics(stderr);
6058#endif
6059
[331]6060#if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
6061 _CrtDumpMemoryLeaks();
[279]6062#endif
6063
6064 onig_inited = 0;
6065
6066 return 0;
6067}
6068
6069extern int
6070onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6071{
6072 OnigCodePoint n, *data;
6073 OnigCodePoint low, high, x;
6074
6075 GET_CODE_POINT(n, p);
6076 data = (OnigCodePoint* )p;
6077 data++;
6078
6079 for (low = 0, high = n; low < high; ) {
6080 x = (low + high) >> 1;
6081 if (code > data[x * 2 + 1])
6082 low = x + 1;
6083 else
6084 high = x;
6085 }
6086
6087 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6088}
6089
6090extern int
6091onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6092{
6093 int found;
6094
6095 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6096 if (IS_NULL(cc->mbuf)) {
6097 found = 0;
6098 }
6099 else {
6100 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6101 }
6102 }
6103 else {
6104 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6105 }
6106
6107 if (IS_NCCLASS_NOT(cc))
6108 return !found;
6109 else
6110 return found;
6111}
6112
6113extern int
6114onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6115{
6116 int len;
6117
6118 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6119 len = 2;
6120 }
6121 else {
6122 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6123 }
6124 return onig_is_code_in_cc_len(len, code, cc);
6125}
6126
6127
6128#ifdef ONIG_DEBUG
6129
6130/* arguments type */
[331]6131# define ARG_SPECIAL -1
6132# define ARG_NON 0
6133# define ARG_RELADDR 1
6134# define ARG_ABSADDR 2
6135# define ARG_LENGTH 3
6136# define ARG_MEMNUM 4
6137# define ARG_OPTION 5
6138# define ARG_STATE_CHECK 6
[279]6139
6140OnigOpInfoType OnigOpInfo[] = {
6141 { OP_FINISH, "finish", ARG_NON },
6142 { OP_END, "end", ARG_NON },
6143 { OP_EXACT1, "exact1", ARG_SPECIAL },
6144 { OP_EXACT2, "exact2", ARG_SPECIAL },
6145 { OP_EXACT3, "exact3", ARG_SPECIAL },
6146 { OP_EXACT4, "exact4", ARG_SPECIAL },
6147 { OP_EXACT5, "exact5", ARG_SPECIAL },
6148 { OP_EXACTN, "exactn", ARG_SPECIAL },
6149 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6150 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6151 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6152 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6153 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6154 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6155 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6156 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6157 { OP_CCLASS, "cclass", ARG_SPECIAL },
6158 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6159 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6160 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6161 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6162 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6163 { OP_ANYCHAR, "anychar", ARG_NON },
6164 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6165 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6166 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6167 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6168 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6169 { OP_WORD, "word", ARG_NON },
6170 { OP_NOT_WORD, "not-word", ARG_NON },
6171 { OP_WORD_BOUND, "word-bound", ARG_NON },
6172 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6173 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6174 { OP_WORD_END, "word-end", ARG_NON },
6175 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6176 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6177 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6178 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6179 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6180 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6181 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6182 { OP_END_BUF, "end-buf", ARG_NON },
6183 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6184 { OP_END_LINE, "end-line", ARG_NON },
6185 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6186 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6187 { OP_BACKREF1, "backref1", ARG_NON },
6188 { OP_BACKREF2, "backref2", ARG_NON },
6189 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6190 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6191 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6192 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6193 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6194 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6195 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6196 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6197 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6198 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6199 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6200 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6201 { OP_SET_OPTION, "set-option", ARG_OPTION },
6202 { OP_KEEP, "keep", ARG_NON },
6203 { OP_FAIL, "fail", ARG_NON },
6204 { OP_JUMP, "jump", ARG_RELADDR },
6205 { OP_PUSH, "push", ARG_RELADDR },
6206 { OP_POP, "pop", ARG_NON },
6207 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6208 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6209 { OP_REPEAT, "repeat", ARG_SPECIAL },
6210 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6211 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6212 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6213 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6214 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6215 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6216 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6217 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6218 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6219 { OP_PUSH_POS, "push-pos", ARG_NON },
6220 { OP_POP_POS, "pop-pos", ARG_NON },
6221 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6222 { OP_FAIL_POS, "fail-pos", ARG_NON },
6223 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6224 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6225 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6226 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6227 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
[331]6228 { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
6229 { OP_ABSENT, "absent", ARG_RELADDR },
6230 { OP_ABSENT_END, "absent-end", ARG_NON },
[279]6231 { OP_CALL, "call", ARG_ABSADDR },
6232 { OP_RETURN, "return", ARG_NON },
6233 { OP_CONDITION, "condition", ARG_SPECIAL },
6234 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6235 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6236 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6237 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6238 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6239 "state-check-anychar-ml*", ARG_STATE_CHECK },
6240 { -1, "", ARG_NON }
6241};
6242
6243static const char*
6244op2name(int opcode)
6245{
6246 int i;
6247
6248 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6249 if (opcode == OnigOpInfo[i].opcode)
6250 return OnigOpInfo[i].name;
6251 }
6252 return "";
6253}
6254
6255static int
6256op2arg_type(int opcode)
6257{
6258 int i;
6259
6260 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6261 if (opcode == OnigOpInfo[i].opcode)
6262 return OnigOpInfo[i].arg_type;
6263 }
6264 return ARG_SPECIAL;
6265}
6266
[331]6267# ifdef ONIG_DEBUG_PARSE_TREE
[279]6268static void
6269Indent(FILE* f, int indent)
6270{
6271 int i;
6272 for (i = 0; i < indent; i++) putc(' ', f);
6273}
[331]6274# endif /* ONIG_DEBUG_PARSE_TREE */
[279]6275
6276static void
6277p_string(FILE* f, ptrdiff_t len, UChar* s)
6278{
6279 fputs(":", f);
6280 while (len-- > 0) { fputc(*s++, f); }
6281}
6282
6283static void
6284p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6285{
6286 int x = len * mb_len;
6287
6288 fprintf(f, ":%d:", len);
6289 while (x-- > 0) { fputc(*s++, f); }
6290}
6291
6292extern void
[331]6293onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
[279]6294 OnigEncoding enc)
6295{
6296 int i, n, arg_type;
6297 RelAddrType addr;
6298 LengthType len;
6299 MemNumType mem;
6300 StateCheckNumType scn;
6301 OnigCodePoint code;
6302 UChar *q;
6303
6304 fprintf(f, "[%s", op2name(*bp));
6305 arg_type = op2arg_type(*bp);
6306 if (arg_type != ARG_SPECIAL) {
6307 bp++;
6308 switch (arg_type) {
6309 case ARG_NON:
6310 break;
6311 case ARG_RELADDR:
6312 GET_RELADDR_INC(addr, bp);
[331]6313 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
[279]6314 break;
6315 case ARG_ABSADDR:
6316 GET_ABSADDR_INC(addr, bp);
6317 fprintf(f, ":(%d)", addr);
6318 break;
6319 case ARG_LENGTH:
6320 GET_LENGTH_INC(len, bp);
6321 fprintf(f, ":%d", len);
6322 break;
6323 case ARG_MEMNUM:
6324 mem = *((MemNumType* )bp);
6325 bp += SIZE_MEMNUM;
6326 fprintf(f, ":%d", mem);
6327 break;
6328 case ARG_OPTION:
6329 {
6330 OnigOptionType option = *((OnigOptionType* )bp);
6331 bp += SIZE_OPTION;
6332 fprintf(f, ":%d", option);
6333 }
6334 break;
6335
6336 case ARG_STATE_CHECK:
6337 scn = *((StateCheckNumType* )bp);
6338 bp += SIZE_STATE_CHECK_NUM;
6339 fprintf(f, ":%d", scn);
6340 break;
6341 }
6342 }
6343 else {
6344 switch (*bp++) {
6345 case OP_EXACT1:
6346 case OP_ANYCHAR_STAR_PEEK_NEXT:
6347 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6348 p_string(f, 1, bp++); break;
6349 case OP_EXACT2:
6350 p_string(f, 2, bp); bp += 2; break;
6351 case OP_EXACT3:
6352 p_string(f, 3, bp); bp += 3; break;
6353 case OP_EXACT4:
6354 p_string(f, 4, bp); bp += 4; break;
6355 case OP_EXACT5:
6356 p_string(f, 5, bp); bp += 5; break;
6357 case OP_EXACTN:
6358 GET_LENGTH_INC(len, bp);
6359 p_len_string(f, len, 1, bp);
6360 bp += len;
6361 break;
6362
6363 case OP_EXACTMB2N1:
6364 p_string(f, 2, bp); bp += 2; break;
6365 case OP_EXACTMB2N2:
6366 p_string(f, 4, bp); bp += 4; break;
6367 case OP_EXACTMB2N3:
6368 p_string(f, 6, bp); bp += 6; break;
6369 case OP_EXACTMB2N:
6370 GET_LENGTH_INC(len, bp);
6371 p_len_string(f, len, 2, bp);
6372 bp += len * 2;
6373 break;
6374 case OP_EXACTMB3N:
6375 GET_LENGTH_INC(len, bp);
6376 p_len_string(f, len, 3, bp);
6377 bp += len * 3;
6378 break;
6379 case OP_EXACTMBN:
6380 {
6381 int mb_len;
6382
6383 GET_LENGTH_INC(mb_len, bp);
6384 GET_LENGTH_INC(len, bp);
6385 fprintf(f, ":%d:%d:", mb_len, len);
6386 n = len * mb_len;
6387 while (n-- > 0) { fputc(*bp++, f); }
6388 }
6389 break;
6390
6391 case OP_EXACT1_IC:
[331]6392 len = enclen(enc, bp, bpend);
[279]6393 p_string(f, len, bp);
6394 bp += len;
6395 break;
6396 case OP_EXACTN_IC:
6397 GET_LENGTH_INC(len, bp);
6398 p_len_string(f, len, 1, bp);
6399 bp += len;
6400 break;
6401
6402 case OP_CCLASS:
6403 n = bitset_on_num((BitSetRef )bp);
6404 bp += SIZE_BITSET;
6405 fprintf(f, ":%d", n);
6406 break;
6407
6408 case OP_CCLASS_NOT:
6409 n = bitset_on_num((BitSetRef )bp);
6410 bp += SIZE_BITSET;
6411 fprintf(f, ":%d", n);
6412 break;
6413
6414 case OP_CCLASS_MB:
6415 case OP_CCLASS_MB_NOT:
6416 GET_LENGTH_INC(len, bp);
6417 q = bp;
[331]6418# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
[279]6419 ALIGNMENT_RIGHT(q);
[331]6420# endif
[279]6421 GET_CODE_POINT(code, q);
6422 bp += len;
6423 fprintf(f, ":%d:%d", (int )code, len);
6424 break;
6425
6426 case OP_CCLASS_MIX:
6427 case OP_CCLASS_MIX_NOT:
6428 n = bitset_on_num((BitSetRef )bp);
6429 bp += SIZE_BITSET;
6430 GET_LENGTH_INC(len, bp);
6431 q = bp;
[331]6432# ifndef PLATFORM_UNALIGNED_WORD_ACCESS
[279]6433 ALIGNMENT_RIGHT(q);
[331]6434# endif
[279]6435 GET_CODE_POINT(code, q);
6436 bp += len;
6437 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6438 break;
6439
6440 case OP_BACKREFN_IC:
6441 mem = *((MemNumType* )bp);
6442 bp += SIZE_MEMNUM;
6443 fprintf(f, ":%d", mem);
6444 break;
6445
6446 case OP_BACKREF_MULTI_IC:
6447 case OP_BACKREF_MULTI:
6448 fputs(" ", f);
6449 GET_LENGTH_INC(len, bp);
6450 for (i = 0; i < len; i++) {
6451 GET_MEMNUM_INC(mem, bp);
6452 if (i > 0) fputs(", ", f);
6453 fprintf(f, "%d", mem);
6454 }
6455 break;
6456
6457 case OP_BACKREF_WITH_LEVEL:
6458 {
6459 OnigOptionType option;
6460 LengthType level;
6461
6462 GET_OPTION_INC(option, bp);
6463 fprintf(f, ":%d", option);
6464 GET_LENGTH_INC(level, bp);
6465 fprintf(f, ":%d", level);
6466
6467 fputs(" ", f);
6468 GET_LENGTH_INC(len, bp);
6469 for (i = 0; i < len; i++) {
6470 GET_MEMNUM_INC(mem, bp);
6471 if (i > 0) fputs(", ", f);
6472 fprintf(f, "%d", mem);
6473 }
6474 }
6475 break;
6476
6477 case OP_REPEAT:
6478 case OP_REPEAT_NG:
6479 {
6480 mem = *((MemNumType* )bp);
6481 bp += SIZE_MEMNUM;
6482 addr = *((RelAddrType* )bp);
6483 bp += SIZE_RELADDR;
6484 fprintf(f, ":%d:%d", mem, addr);
6485 }
6486 break;
6487
6488 case OP_PUSH_OR_JUMP_EXACT1:
6489 case OP_PUSH_IF_PEEK_NEXT:
6490 addr = *((RelAddrType* )bp);
6491 bp += SIZE_RELADDR;
[331]6492 fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
[279]6493 p_string(f, 1, bp);
6494 bp += 1;
6495 break;
6496
6497 case OP_LOOK_BEHIND:
6498 GET_LENGTH_INC(len, bp);
6499 fprintf(f, ":%d", len);
6500 break;
6501
6502 case OP_PUSH_LOOK_BEHIND_NOT:
6503 GET_RELADDR_INC(addr, bp);
6504 GET_LENGTH_INC(len, bp);
[331]6505 fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
[279]6506 break;
6507
6508 case OP_STATE_CHECK_PUSH:
6509 case OP_STATE_CHECK_PUSH_OR_JUMP:
6510 scn = *((StateCheckNumType* )bp);
6511 bp += SIZE_STATE_CHECK_NUM;
6512 addr = *((RelAddrType* )bp);
6513 bp += SIZE_RELADDR;
[331]6514 fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
[279]6515 break;
6516
6517 case OP_CONDITION:
6518 GET_MEMNUM_INC(mem, bp);
6519 GET_RELADDR_INC(addr, bp);
[331]6520 fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
[279]6521 break;
6522
6523 default:
6524 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
[331]6525 bp[-1]);
[279]6526 }
6527 }
6528 fputs("]", f);
6529 if (nextp) *nextp = bp;
6530}
6531
[331]6532# ifdef ONIG_DEBUG_COMPILE
[279]6533static void
6534print_compiled_byte_code_list(FILE* f, regex_t* reg)
6535{
6536 int ncode;
6537 UChar* bp = reg->p;
6538 UChar* end = reg->p + reg->used;
6539
6540 fprintf(f, "code length: %d", reg->used);
6541
6542 ncode = -1;
6543 while (bp < end) {
6544 ncode++;
6545 if (ncode % 5 == 0)
6546 fprintf(f, "\n%ld:", bp - reg->p);
6547 else
6548 fprintf(f, " %ld:", bp - reg->p);
[331]6549 onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
[279]6550 }
6551
6552 fprintf(f, "\n");
6553}
[331]6554# endif /* ONIG_DEBUG_COMPILE */
[279]6555
[331]6556# ifdef ONIG_DEBUG_PARSE_TREE
[279]6557static void
6558print_indent_tree(FILE* f, Node* node, int indent)
6559{
6560 int i, type, container_p = 0;
6561 int add = 3;
6562 UChar* p;
6563
6564 Indent(f, indent);
6565 if (IS_NULL(node)) {
6566 fprintf(f, "ERROR: null node!!!\n");
6567 exit (0);
6568 }
6569
6570 type = NTYPE(node);
6571 switch (type) {
6572 case NT_LIST:
6573 case NT_ALT:
6574 if (NTYPE(node) == NT_LIST)
6575 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6576 else
6577 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6578
6579 print_indent_tree(f, NCAR(node), indent + add);
6580 while (IS_NOT_NULL(node = NCDR(node))) {
6581 if (NTYPE(node) != type) {
6582 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6583 exit(0);
6584 }
6585 print_indent_tree(f, NCAR(node), indent + add);
6586 }
6587 break;
6588
6589 case NT_STR:
6590 fprintf(f, "<string%s:%"PRIxPTR">",
6591 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6592 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6593 if (*p >= 0x20 && *p < 0x7f)
6594 fputc(*p, f);
6595 else {
6596 fprintf(f, " 0x%02x", *p);
6597 }
6598 }
6599 break;
6600
6601 case NT_CCLASS:
6602 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
[331]6603 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
[279]6604 if (NCCLASS(node)->mbuf) {
6605 BBuf* bbuf = NCCLASS(node)->mbuf;
[331]6606 OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
6607 OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
6608 fprintf(f, "%d", *data++);
6609 for (; data < end; data+=2) {
6610 fprintf(f, ",");
6611 fprintf(f, "%04x-%04x", data[0], data[1]);
[279]6612 }
6613 }
6614 break;
6615
6616 case NT_CTYPE:
6617 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6618 switch (NCTYPE(node)->ctype) {
6619 case ONIGENC_CTYPE_WORD:
6620 if (NCTYPE(node)->not != 0)
6621 fputs("not word", f);
6622 else
6623 fputs("word", f);
6624 break;
6625
6626 default:
6627 fprintf(f, "ERROR: undefined ctype.\n");
6628 exit(0);
6629 }
6630 break;
6631
6632 case NT_CANY:
6633 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6634 break;
6635
6636 case NT_ANCHOR:
6637 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6638 switch (NANCHOR(node)->type) {
6639 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6640 case ANCHOR_END_BUF: fputs("end buf", f); break;
6641 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6642 case ANCHOR_END_LINE: fputs("end line", f); break;
6643 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6644 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6645
6646 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6647 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
[331]6648# ifdef USE_WORD_BEGIN_END
[279]6649 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6650 case ANCHOR_WORD_END: fputs("word end", f); break;
[331]6651# endif
[279]6652 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6653 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6654 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6655 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6656 case ANCHOR_KEEP: fputs("keep",f); break;
6657
6658 default:
6659 fprintf(f, "ERROR: undefined anchor type.\n");
6660 break;
6661 }
6662 break;
6663
6664 case NT_BREF:
6665 {
6666 int* p;
6667 BRefNode* br = NBREF(node);
6668 p = BACKREFS_P(br);
6669 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6670 for (i = 0; i < br->back_num; i++) {
6671 if (i > 0) fputs(", ", f);
6672 fprintf(f, "%d", p[i]);
6673 }
6674 }
6675 break;
6676
[331]6677# ifdef USE_SUBEXP_CALL
[279]6678 case NT_CALL:
6679 {
6680 CallNode* cn = NCALL(node);
6681 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6682 p_string(f, cn->name_end - cn->name, cn->name);
6683 }
6684 break;
[331]6685# endif
[279]6686
6687 case NT_QTFR:
6688 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6689 NQTFR(node)->lower, NQTFR(node)->upper,
6690 (NQTFR(node)->greedy ? "" : "?"));
6691 print_indent_tree(f, NQTFR(node)->target, indent + add);
6692 break;
6693
6694 case NT_ENCLOSE:
6695 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6696 switch (NENCLOSE(node)->type) {
6697 case ENCLOSE_OPTION:
6698 fprintf(f, "option:%d", NENCLOSE(node)->option);
6699 break;
6700 case ENCLOSE_MEMORY:
6701 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6702 break;
6703 case ENCLOSE_STOP_BACKTRACK:
6704 fprintf(f, "stop-bt");
6705 break;
6706 case ENCLOSE_CONDITION:
6707 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6708 break;
[331]6709 case ENCLOSE_ABSENT:
6710 fprintf(f, "absent");
6711 break;
[279]6712
6713 default:
6714 break;
6715 }
6716 fprintf(f, "\n");
6717 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6718 break;
6719
6720 default:
6721 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6722 break;
6723 }
6724
6725 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6726 type != NT_ENCLOSE)
6727 fprintf(f, "\n");
6728
6729 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6730
6731 fflush(f);
6732}
6733
6734static void
6735print_tree(FILE* f, Node* node)
6736{
6737 print_indent_tree(f, node, 0);
6738}
[331]6739# endif /* ONIG_DEBUG_PARSE_TREE */
[279]6740#endif /* ONIG_DEBUG */
Note: See TracBrowser for help on using the repository browser.