source: EcnlProtoTool/trunk/onigmo-5.15.0/src/regcomp.c@ 279

Last change on this file since 279 was 279, checked in by coas-nagasima, 7 years ago

ファイルを追加、更新。

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
  • Property svn:mime-type set to text/x-csrc
File size: 155.8 KB
Line 
1/**********************************************************************
2 regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regparse.h"
32
33#if defined(USE_MULTI_THREAD_SYSTEM) \
34 && defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
35#ifdef _WIN32
36CRITICAL_SECTION gOnigMutex;
37#else
38pthread_mutex_t gOnigMutex;
39#endif
40#endif
41
42OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
43
44extern OnigCaseFoldType
45onig_get_default_case_fold_flag(void)
46{
47 return OnigDefaultCaseFoldFlag;
48}
49
50extern int
51onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
52{
53 OnigDefaultCaseFoldFlag = case_fold_flag;
54 return 0;
55}
56
57
58#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
59static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
60#endif
61
62#if 0
63static UChar*
64str_dup(UChar* s, UChar* end)
65{
66 ptrdiff_t len = end - s;
67
68 if (len > 0) {
69 UChar* r = (UChar* )xmalloc(len + 1);
70 CHECK_NULL_RETURN(r);
71 xmemcpy(r, s, len);
72 r[len] = (UChar )0;
73 return r;
74 }
75 else return NULL;
76}
77#endif
78
79static void
80swap_node(Node* a, Node* b)
81{
82 Node c;
83 c = *a; *a = *b; *b = c;
84
85 if (NTYPE(a) == NT_STR) {
86 StrNode* sn = NSTR(a);
87 if (sn->capa == 0) {
88 size_t len = sn->end - sn->s;
89 sn->s = sn->buf;
90 sn->end = sn->s + len;
91 }
92 }
93
94 if (NTYPE(b) == NT_STR) {
95 StrNode* sn = NSTR(b);
96 if (sn->capa == 0) {
97 size_t len = sn->end - sn->s;
98 sn->s = sn->buf;
99 sn->end = sn->s + len;
100 }
101 }
102}
103
104static OnigDistance
105distance_add(OnigDistance d1, OnigDistance d2)
106{
107 if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
108 return ONIG_INFINITE_DISTANCE;
109 else {
110 if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
111 else return ONIG_INFINITE_DISTANCE;
112 }
113}
114
115static OnigDistance
116distance_multiply(OnigDistance d, int m)
117{
118 if (m == 0) return 0;
119
120 if (d < ONIG_INFINITE_DISTANCE / m)
121 return d * m;
122 else
123 return ONIG_INFINITE_DISTANCE;
124}
125
126static int
127bitset_is_empty(BitSetRef bs)
128{
129 int i;
130 for (i = 0; i < BITSET_SIZE; i++) {
131 if (bs[i] != 0) return 0;
132 }
133 return 1;
134}
135
136#ifdef ONIG_DEBUG
137static int
138bitset_on_num(BitSetRef bs)
139{
140 int i, n;
141
142 n = 0;
143 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
144 if (BITSET_AT(bs, i)) n++;
145 }
146 return n;
147}
148#endif
149
150extern int
151onig_bbuf_init(BBuf* buf, OnigDistance size)
152{
153 if (size <= 0) {
154 size = 0;
155 buf->p = NULL;
156 }
157 else {
158 buf->p = (UChar* )xmalloc(size);
159 if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
160 }
161
162 buf->alloc = (unsigned int )size;
163 buf->used = 0;
164 return 0;
165}
166
167
168#ifdef USE_SUBEXP_CALL
169
170static int
171unset_addr_list_init(UnsetAddrList* uslist, int size)
172{
173 UnsetAddr* p;
174
175 p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
176 CHECK_NULL_RETURN_MEMERR(p);
177 uslist->num = 0;
178 uslist->alloc = size;
179 uslist->us = p;
180 return 0;
181}
182
183static void
184unset_addr_list_end(UnsetAddrList* uslist)
185{
186 if (IS_NOT_NULL(uslist->us))
187 xfree(uslist->us);
188}
189
190static int
191unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
192{
193 UnsetAddr* p;
194 int size;
195
196 if (uslist->num >= uslist->alloc) {
197 size = uslist->alloc * 2;
198 p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
199 CHECK_NULL_RETURN_MEMERR(p);
200 uslist->alloc = size;
201 uslist->us = p;
202 }
203
204 uslist->us[uslist->num].offset = offset;
205 uslist->us[uslist->num].target = node;
206 uslist->num++;
207 return 0;
208}
209#endif /* USE_SUBEXP_CALL */
210
211
212static int
213add_opcode(regex_t* reg, int opcode)
214{
215 BBUF_ADD1(reg, opcode);
216 return 0;
217}
218
219#ifdef USE_COMBINATION_EXPLOSION_CHECK
220static int
221add_state_check_num(regex_t* reg, int num)
222{
223 StateCheckNumType n = (StateCheckNumType )num;
224
225 BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
226 return 0;
227}
228#endif
229
230static int
231add_rel_addr(regex_t* reg, int addr)
232{
233 RelAddrType ra = (RelAddrType )addr;
234
235 BBUF_ADD(reg, &ra, SIZE_RELADDR);
236 return 0;
237}
238
239static int
240add_abs_addr(regex_t* reg, int addr)
241{
242 AbsAddrType ra = (AbsAddrType )addr;
243
244 BBUF_ADD(reg, &ra, SIZE_ABSADDR);
245 return 0;
246}
247
248static int
249add_length(regex_t* reg, OnigDistance len)
250{
251 LengthType l = (LengthType )len;
252
253 BBUF_ADD(reg, &l, SIZE_LENGTH);
254 return 0;
255}
256
257static int
258add_mem_num(regex_t* reg, int num)
259{
260 MemNumType n = (MemNumType )num;
261
262 BBUF_ADD(reg, &n, SIZE_MEMNUM);
263 return 0;
264}
265
266static int
267add_pointer(regex_t* reg, void* addr)
268{
269 PointerType ptr = (PointerType )addr;
270
271 BBUF_ADD(reg, &ptr, SIZE_POINTER);
272 return 0;
273}
274
275static int
276add_option(regex_t* reg, OnigOptionType option)
277{
278 BBUF_ADD(reg, &option, SIZE_OPTION);
279 return 0;
280}
281
282static int
283add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
284{
285 int r;
286
287 r = add_opcode(reg, opcode);
288 if (r) return r;
289 r = add_rel_addr(reg, addr);
290 return r;
291}
292
293static int
294add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
295{
296 BBUF_ADD(reg, bytes, len);
297 return 0;
298}
299
300static int
301add_bitset(regex_t* reg, BitSetRef bs)
302{
303 BBUF_ADD(reg, bs, SIZE_BITSET);
304 return 0;
305}
306
307static int
308add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
309{
310 int r;
311
312 r = add_opcode(reg, opcode);
313 if (r) return r;
314 r = add_option(reg, option);
315 return r;
316}
317
318static int compile_length_tree(Node* node, regex_t* reg);
319static int compile_tree(Node* node, regex_t* reg);
320
321
322#define IS_NEED_STR_LEN_OP_EXACT(op) \
323 ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
324 (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
325
326static int
327select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
328{
329 int op;
330 OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
331
332 if (ignore_case) {
333 switch (str_len) {
334 case 1: op = OP_EXACT1_IC; break;
335 default: op = OP_EXACTN_IC; break;
336 }
337 }
338 else {
339 switch (mb_len) {
340 case 1:
341 switch (str_len) {
342 case 1: op = OP_EXACT1; break;
343 case 2: op = OP_EXACT2; break;
344 case 3: op = OP_EXACT3; break;
345 case 4: op = OP_EXACT4; break;
346 case 5: op = OP_EXACT5; break;
347 default: op = OP_EXACTN; break;
348 }
349 break;
350
351 case 2:
352 switch (str_len) {
353 case 1: op = OP_EXACTMB2N1; break;
354 case 2: op = OP_EXACTMB2N2; break;
355 case 3: op = OP_EXACTMB2N3; break;
356 default: op = OP_EXACTMB2N; break;
357 }
358 break;
359
360 case 3:
361 op = OP_EXACTMB3N;
362 break;
363
364 default:
365 op = OP_EXACTMBN;
366 break;
367 }
368 }
369 return op;
370}
371
372static int
373compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
374{
375 int r;
376 int saved_num_null_check = reg->num_null_check;
377
378 if (empty_info != 0) {
379 r = add_opcode(reg, OP_NULL_CHECK_START);
380 if (r) return r;
381 r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
382 if (r) return r;
383 reg->num_null_check++;
384 }
385
386 r = compile_tree(node, reg);
387 if (r) return r;
388
389 if (empty_info != 0) {
390 if (empty_info == NQ_TARGET_IS_EMPTY)
391 r = add_opcode(reg, OP_NULL_CHECK_END);
392 else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
393 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
394 else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
395 r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
396
397 if (r) return r;
398 r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
399 }
400 return r;
401}
402
403#ifdef USE_SUBEXP_CALL
404static int
405compile_call(CallNode* node, regex_t* reg)
406{
407 int r;
408
409 r = add_opcode(reg, OP_CALL);
410 if (r) return r;
411 r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
412 node->target);
413 if (r) return r;
414 r = add_abs_addr(reg, 0 /*dummy addr.*/);
415 return r;
416}
417#endif
418
419static int
420compile_tree_n_times(Node* node, int n, regex_t* reg)
421{
422 int i, r;
423
424 for (i = 0; i < n; i++) {
425 r = compile_tree(node, reg);
426 if (r) return r;
427 }
428 return 0;
429}
430
431static int
432add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
433 regex_t* reg ARG_UNUSED, int ignore_case)
434{
435 int len;
436 int op = select_str_opcode(mb_len, byte_len, ignore_case);
437
438 len = SIZE_OPCODE;
439
440 if (op == OP_EXACTMBN) len += SIZE_LENGTH;
441 if (IS_NEED_STR_LEN_OP_EXACT(op))
442 len += SIZE_LENGTH;
443
444 len += (int )byte_len;
445 return len;
446}
447
448static int
449add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
450 regex_t* reg, int ignore_case)
451{
452 int op = select_str_opcode(mb_len, byte_len, ignore_case);
453 add_opcode(reg, op);
454
455 if (op == OP_EXACTMBN)
456 add_length(reg, mb_len);
457
458 if (IS_NEED_STR_LEN_OP_EXACT(op)) {
459 if (op == OP_EXACTN_IC)
460 add_length(reg, byte_len);
461 else
462 add_length(reg, byte_len / mb_len);
463 }
464
465 add_bytes(reg, s, byte_len);
466 return 0;
467}
468
469
470static int
471compile_length_string_node(Node* node, regex_t* reg)
472{
473 int rlen, r, len, prev_len, blen, ambig;
474 OnigEncoding enc = reg->enc;
475 UChar *p, *prev;
476 StrNode* sn;
477
478 sn = NSTR(node);
479 if (sn->end <= sn->s)
480 return 0;
481
482 ambig = NSTRING_IS_AMBIG(node);
483
484 p = prev = sn->s;
485 prev_len = enclen(enc, p);
486 p += prev_len;
487 blen = prev_len;
488 rlen = 0;
489
490 for (; p < sn->end; ) {
491 len = enclen(enc, p);
492 if (len == prev_len || ambig) {
493 blen += len;
494 }
495 else {
496 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
497 rlen += r;
498 prev = p;
499 blen = len;
500 prev_len = len;
501 }
502 p += len;
503 }
504 r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
505 rlen += r;
506 return rlen;
507}
508
509static int
510compile_length_string_raw_node(StrNode* sn, regex_t* reg)
511{
512 if (sn->end <= sn->s)
513 return 0;
514
515 return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
516}
517
518static int
519compile_string_node(Node* node, regex_t* reg)
520{
521 int r, len, prev_len, blen, ambig;
522 OnigEncoding enc = reg->enc;
523 UChar *p, *prev, *end;
524 StrNode* sn;
525
526 sn = NSTR(node);
527 if (sn->end <= sn->s)
528 return 0;
529
530 end = sn->end;
531 ambig = NSTRING_IS_AMBIG(node);
532
533 p = prev = sn->s;
534 prev_len = enclen(enc, p);
535 p += prev_len;
536 blen = prev_len;
537
538 for (; p < end; ) {
539 len = enclen(enc, p);
540 if (len == prev_len || ambig) {
541 blen += len;
542 }
543 else {
544 r = add_compile_string(prev, prev_len, blen, reg, ambig);
545 if (r) return r;
546
547 prev = p;
548 blen = len;
549 prev_len = len;
550 }
551
552 p += len;
553 }
554 return add_compile_string(prev, prev_len, blen, reg, ambig);
555}
556
557static int
558compile_string_raw_node(StrNode* sn, regex_t* reg)
559{
560 if (sn->end <= sn->s)
561 return 0;
562
563 return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
564}
565
566static int
567add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
568{
569#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
570 add_length(reg, mbuf->used);
571 return add_bytes(reg, mbuf->p, mbuf->used);
572#else
573 int r, pad_size;
574 UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
575
576 GET_ALIGNMENT_PAD_SIZE(p, pad_size);
577 add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
578 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
579
580 r = add_bytes(reg, mbuf->p, mbuf->used);
581
582 /* padding for return value from compile_length_cclass_node() to be fix. */
583 pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
584 if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
585 return r;
586#endif
587}
588
589static int
590compile_length_cclass_node(CClassNode* cc, regex_t* reg)
591{
592 int len;
593
594 if (IS_NCCLASS_SHARE(cc)) {
595 len = SIZE_OPCODE + SIZE_POINTER;
596 return len;
597 }
598
599 if (IS_NULL(cc->mbuf)) {
600 len = SIZE_OPCODE + SIZE_BITSET;
601 }
602 else {
603 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
604 len = SIZE_OPCODE;
605 }
606 else {
607 len = SIZE_OPCODE + SIZE_BITSET;
608 }
609#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
610 len += SIZE_LENGTH + cc->mbuf->used;
611#else
612 len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
613#endif
614 }
615
616 return len;
617}
618
619static int
620compile_cclass_node(CClassNode* cc, regex_t* reg)
621{
622 int r;
623
624 if (IS_NCCLASS_SHARE(cc)) {
625 add_opcode(reg, OP_CCLASS_NODE);
626 r = add_pointer(reg, cc);
627 return r;
628 }
629
630 if (IS_NULL(cc->mbuf)) {
631 if (IS_NCCLASS_NOT(cc))
632 add_opcode(reg, OP_CCLASS_NOT);
633 else
634 add_opcode(reg, OP_CCLASS);
635
636 r = add_bitset(reg, cc->bs);
637 }
638 else {
639 if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
640 if (IS_NCCLASS_NOT(cc))
641 add_opcode(reg, OP_CCLASS_MB_NOT);
642 else
643 add_opcode(reg, OP_CCLASS_MB);
644
645 r = add_multi_byte_cclass(cc->mbuf, reg);
646 }
647 else {
648 if (IS_NCCLASS_NOT(cc))
649 add_opcode(reg, OP_CCLASS_MIX_NOT);
650 else
651 add_opcode(reg, OP_CCLASS_MIX);
652
653 r = add_bitset(reg, cc->bs);
654 if (r) return r;
655 r = add_multi_byte_cclass(cc->mbuf, reg);
656 }
657 }
658
659 return r;
660}
661
662static int
663entry_repeat_range(regex_t* reg, int id, int lower, int upper)
664{
665#define REPEAT_RANGE_ALLOC 4
666
667 OnigRepeatRange* p;
668
669 if (reg->repeat_range_alloc == 0) {
670 p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
671 CHECK_NULL_RETURN_MEMERR(p);
672 reg->repeat_range = p;
673 reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
674 }
675 else if (reg->repeat_range_alloc <= id) {
676 int n;
677 n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
678 p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
679 sizeof(OnigRepeatRange) * n);
680 CHECK_NULL_RETURN_MEMERR(p);
681 reg->repeat_range = p;
682 reg->repeat_range_alloc = n;
683 }
684 else {
685 p = reg->repeat_range;
686 }
687
688 p[id].lower = lower;
689 p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
690 return 0;
691}
692
693static int
694compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
695 regex_t* reg)
696{
697 int r;
698 int num_repeat = reg->num_repeat;
699
700 r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
701 if (r) return r;
702 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
703 reg->num_repeat++;
704 if (r) return r;
705 r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
706 if (r) return r;
707
708 r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
709 if (r) return r;
710
711 r = compile_tree_empty_check(qn->target, reg, empty_info);
712 if (r) return r;
713
714 if (
715#ifdef USE_SUBEXP_CALL
716 reg->num_call > 0 ||
717#endif
718 IS_QUANTIFIER_IN_REPEAT(qn)) {
719 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
720 }
721 else {
722 r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
723 }
724 if (r) return r;
725 r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
726 return r;
727}
728
729static int
730is_anychar_star_quantifier(QtfrNode* qn)
731{
732 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
733 NTYPE(qn->target) == NT_CANY)
734 return 1;
735 else
736 return 0;
737}
738
739#define QUANTIFIER_EXPAND_LIMIT_SIZE 50
740#define CKN_ON (ckn > 0)
741
742#ifdef USE_COMBINATION_EXPLOSION_CHECK
743
744static int
745compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
746{
747 int len, mod_tlen, cklen;
748 int ckn;
749 int infinite = IS_REPEAT_INFINITE(qn->upper);
750 int empty_info = qn->target_empty_info;
751 int tlen = compile_length_tree(qn->target, reg);
752
753 if (tlen < 0) return tlen;
754
755 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
756
757 cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
758
759 /* anychar repeat */
760 if (NTYPE(qn->target) == NT_CANY) {
761 if (qn->greedy && infinite) {
762 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
763 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
764 else
765 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
766 }
767 }
768
769 if (empty_info != 0)
770 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
771 else
772 mod_tlen = tlen;
773
774 if (infinite && qn->lower <= 1) {
775 if (qn->greedy) {
776 if (qn->lower == 1)
777 len = SIZE_OP_JUMP;
778 else
779 len = 0;
780
781 len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
782 }
783 else {
784 if (qn->lower == 0)
785 len = SIZE_OP_JUMP;
786 else
787 len = 0;
788
789 len += mod_tlen + SIZE_OP_PUSH + cklen;
790 }
791 }
792 else if (qn->upper == 0) {
793 if (qn->is_refered != 0) /* /(?<n>..){0}/ */
794 len = SIZE_OP_JUMP + tlen;
795 else
796 len = 0;
797 }
798 else if (qn->upper == 1 && qn->greedy) {
799 if (qn->lower == 0) {
800 if (CKN_ON) {
801 len = SIZE_OP_STATE_CHECK_PUSH + tlen;
802 }
803 else {
804 len = SIZE_OP_PUSH + tlen;
805 }
806 }
807 else {
808 len = tlen;
809 }
810 }
811 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
812 len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
813 }
814 else {
815 len = SIZE_OP_REPEAT_INC
816 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
817 if (CKN_ON)
818 len += SIZE_OP_STATE_CHECK;
819 }
820
821 return len;
822}
823
824static int
825compile_quantifier_node(QtfrNode* qn, regex_t* reg)
826{
827 int r, mod_tlen;
828 int ckn;
829 int infinite = IS_REPEAT_INFINITE(qn->upper);
830 int empty_info = qn->target_empty_info;
831 int tlen = compile_length_tree(qn->target, reg);
832
833 if (tlen < 0) return tlen;
834
835 ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
836
837 if (is_anychar_star_quantifier(qn)) {
838 r = compile_tree_n_times(qn->target, qn->lower, reg);
839 if (r) return r;
840 if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
841 if (IS_MULTILINE(reg->options))
842 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
843 else
844 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
845 if (r) return r;
846 if (CKN_ON) {
847 r = add_state_check_num(reg, ckn);
848 if (r) return r;
849 }
850
851 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
852 }
853 else {
854 if (IS_MULTILINE(reg->options)) {
855 r = add_opcode(reg, (CKN_ON ?
856 OP_STATE_CHECK_ANYCHAR_ML_STAR
857 : OP_ANYCHAR_ML_STAR));
858 }
859 else {
860 r = add_opcode(reg, (CKN_ON ?
861 OP_STATE_CHECK_ANYCHAR_STAR
862 : OP_ANYCHAR_STAR));
863 }
864 if (r) return r;
865 if (CKN_ON)
866 r = add_state_check_num(reg, ckn);
867
868 return r;
869 }
870 }
871
872 if (empty_info != 0)
873 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
874 else
875 mod_tlen = tlen;
876
877 if (infinite && qn->lower <= 1) {
878 if (qn->greedy) {
879 if (qn->lower == 1) {
880 r = add_opcode_rel_addr(reg, OP_JUMP,
881 (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
882 if (r) return r;
883 }
884
885 if (CKN_ON) {
886 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
887 if (r) return r;
888 r = add_state_check_num(reg, ckn);
889 if (r) return r;
890 r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
891 }
892 else {
893 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
894 }
895 if (r) return r;
896 r = compile_tree_empty_check(qn->target, reg, empty_info);
897 if (r) return r;
898 r = add_opcode_rel_addr(reg, OP_JUMP,
899 -(mod_tlen + (int )SIZE_OP_JUMP
900 + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
901 }
902 else {
903 if (qn->lower == 0) {
904 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
905 if (r) return r;
906 }
907 r = compile_tree_empty_check(qn->target, reg, empty_info);
908 if (r) return r;
909 if (CKN_ON) {
910 r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
911 if (r) return r;
912 r = add_state_check_num(reg, ckn);
913 if (r) return r;
914 r = add_rel_addr(reg,
915 -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
916 }
917 else
918 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
919 }
920 }
921 else if (qn->upper == 0) {
922 if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
923 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
924 if (r) return r;
925 r = compile_tree(qn->target, reg);
926 }
927 else
928 r = 0;
929 }
930 else if (qn->upper == 1 && qn->greedy) {
931 if (qn->lower == 0) {
932 if (CKN_ON) {
933 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
934 if (r) return r;
935 r = add_state_check_num(reg, ckn);
936 if (r) return r;
937 r = add_rel_addr(reg, tlen);
938 }
939 else {
940 r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
941 }
942 if (r) return r;
943 }
944
945 r = compile_tree(qn->target, reg);
946 }
947 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
948 if (CKN_ON) {
949 r = add_opcode(reg, OP_STATE_CHECK_PUSH);
950 if (r) return r;
951 r = add_state_check_num(reg, ckn);
952 if (r) return r;
953 r = add_rel_addr(reg, SIZE_OP_JUMP);
954 }
955 else {
956 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
957 }
958
959 if (r) return r;
960 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
961 if (r) return r;
962 r = compile_tree(qn->target, reg);
963 }
964 else {
965 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
966 if (CKN_ON) {
967 if (r) return r;
968 r = add_opcode(reg, OP_STATE_CHECK);
969 if (r) return r;
970 r = add_state_check_num(reg, ckn);
971 }
972 }
973 return r;
974}
975
976#else /* USE_COMBINATION_EXPLOSION_CHECK */
977
978static int
979compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
980{
981 int len, mod_tlen;
982 int infinite = IS_REPEAT_INFINITE(qn->upper);
983 int empty_info = qn->target_empty_info;
984 int tlen = compile_length_tree(qn->target, reg);
985
986 if (tlen < 0) return tlen;
987
988 /* anychar repeat */
989 if (NTYPE(qn->target) == NT_CANY) {
990 if (qn->greedy && infinite) {
991 if (IS_NOT_NULL(qn->next_head_exact))
992 return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
993 else
994 return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
995 }
996 }
997
998 if (empty_info != 0)
999 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1000 else
1001 mod_tlen = tlen;
1002
1003 if (infinite &&
1004 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1005 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1006 len = SIZE_OP_JUMP;
1007 }
1008 else {
1009 len = tlen * qn->lower;
1010 }
1011
1012 if (qn->greedy) {
1013 if (IS_NOT_NULL(qn->head_exact))
1014 len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
1015 else if (IS_NOT_NULL(qn->next_head_exact))
1016 len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
1017 else
1018 len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
1019 }
1020 else
1021 len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
1022 }
1023 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1024 len = SIZE_OP_JUMP + tlen;
1025 }
1026 else if (!infinite && qn->greedy &&
1027 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1028 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1029 len = tlen * qn->lower;
1030 len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
1031 }
1032 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1033 len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
1034 }
1035 else {
1036 len = SIZE_OP_REPEAT_INC
1037 + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
1038 }
1039
1040 return len;
1041}
1042
1043static int
1044compile_quantifier_node(QtfrNode* qn, regex_t* reg)
1045{
1046 int i, r, mod_tlen;
1047 int infinite = IS_REPEAT_INFINITE(qn->upper);
1048 int empty_info = qn->target_empty_info;
1049 int tlen = compile_length_tree(qn->target, reg);
1050
1051 if (tlen < 0) return tlen;
1052
1053 if (is_anychar_star_quantifier(qn)) {
1054 r = compile_tree_n_times(qn->target, qn->lower, reg);
1055 if (r) return r;
1056 if (IS_NOT_NULL(qn->next_head_exact)) {
1057 if (IS_MULTILINE(reg->options))
1058 r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
1059 else
1060 r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
1061 if (r) return r;
1062 return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1063 }
1064 else {
1065 if (IS_MULTILINE(reg->options))
1066 return add_opcode(reg, OP_ANYCHAR_ML_STAR);
1067 else
1068 return add_opcode(reg, OP_ANYCHAR_STAR);
1069 }
1070 }
1071
1072 if (empty_info != 0)
1073 mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
1074 else
1075 mod_tlen = tlen;
1076
1077 if (infinite &&
1078 (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1079 if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
1080 if (qn->greedy) {
1081 if (IS_NOT_NULL(qn->head_exact))
1082 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
1083 else if (IS_NOT_NULL(qn->next_head_exact))
1084 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
1085 else
1086 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
1087 }
1088 else {
1089 r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
1090 }
1091 if (r) return r;
1092 }
1093 else {
1094 r = compile_tree_n_times(qn->target, qn->lower, reg);
1095 if (r) return r;
1096 }
1097
1098 if (qn->greedy) {
1099 if (IS_NOT_NULL(qn->head_exact)) {
1100 r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
1101 mod_tlen + SIZE_OP_JUMP);
1102 if (r) return r;
1103 add_bytes(reg, NSTR(qn->head_exact)->s, 1);
1104 r = compile_tree_empty_check(qn->target, reg, empty_info);
1105 if (r) return r;
1106 r = add_opcode_rel_addr(reg, OP_JUMP,
1107 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
1108 }
1109 else if (IS_NOT_NULL(qn->next_head_exact)) {
1110 r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
1111 mod_tlen + SIZE_OP_JUMP);
1112 if (r) return r;
1113 add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
1114 r = compile_tree_empty_check(qn->target, reg, empty_info);
1115 if (r) return r;
1116 r = add_opcode_rel_addr(reg, OP_JUMP,
1117 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
1118 }
1119 else {
1120 r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
1121 if (r) return r;
1122 r = compile_tree_empty_check(qn->target, reg, empty_info);
1123 if (r) return r;
1124 r = add_opcode_rel_addr(reg, OP_JUMP,
1125 -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
1126 }
1127 }
1128 else {
1129 r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
1130 if (r) return r;
1131 r = compile_tree_empty_check(qn->target, reg, empty_info);
1132 if (r) return r;
1133 r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
1134 }
1135 }
1136 else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
1137 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1138 if (r) return r;
1139 r = compile_tree(qn->target, reg);
1140 }
1141 else if (!infinite && qn->greedy &&
1142 (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
1143 <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
1144 int n = qn->upper - qn->lower;
1145
1146 r = compile_tree_n_times(qn->target, qn->lower, reg);
1147 if (r) return r;
1148
1149 for (i = 0; i < n; i++) {
1150 r = add_opcode_rel_addr(reg, OP_PUSH,
1151 (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
1152 if (r) return r;
1153 r = compile_tree(qn->target, reg);
1154 if (r) return r;
1155 }
1156 }
1157 else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
1158 r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
1159 if (r) return r;
1160 r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
1161 if (r) return r;
1162 r = compile_tree(qn->target, reg);
1163 }
1164 else {
1165 r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
1166 }
1167 return r;
1168}
1169#endif /* USE_COMBINATION_EXPLOSION_CHECK */
1170
1171static int
1172compile_length_option_node(EncloseNode* node, regex_t* reg)
1173{
1174 int tlen;
1175 OnigOptionType prev = reg->options;
1176
1177 reg->options = node->option;
1178 tlen = compile_length_tree(node->target, reg);
1179 reg->options = prev;
1180
1181 if (tlen < 0) return tlen;
1182
1183 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1184 return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
1185 + tlen + SIZE_OP_SET_OPTION;
1186 }
1187 else
1188 return tlen;
1189}
1190
1191static int
1192compile_option_node(EncloseNode* node, regex_t* reg)
1193{
1194 int r;
1195 OnigOptionType prev = reg->options;
1196
1197 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1198 r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
1199 if (r) return r;
1200 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1201 if (r) return r;
1202 r = add_opcode(reg, OP_FAIL);
1203 if (r) return r;
1204 }
1205
1206 reg->options = node->option;
1207 r = compile_tree(node->target, reg);
1208 reg->options = prev;
1209
1210 if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
1211 if (r) return r;
1212 r = add_opcode_option(reg, OP_SET_OPTION, prev);
1213 }
1214 return r;
1215}
1216
1217static int
1218compile_length_enclose_node(EncloseNode* node, regex_t* reg)
1219{
1220 int len;
1221 int tlen;
1222
1223 if (node->type == ENCLOSE_OPTION)
1224 return compile_length_option_node(node, reg);
1225
1226 if (node->target) {
1227 tlen = compile_length_tree(node->target, reg);
1228 if (tlen < 0) return tlen;
1229 }
1230 else
1231 tlen = 0;
1232
1233 switch (node->type) {
1234 case ENCLOSE_MEMORY:
1235#ifdef USE_SUBEXP_CALL
1236 if (IS_ENCLOSE_CALLED(node)) {
1237 len = SIZE_OP_MEMORY_START_PUSH + tlen
1238 + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
1239 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1240 len += (IS_ENCLOSE_RECURSION(node)
1241 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1242 else
1243 len += (IS_ENCLOSE_RECURSION(node)
1244 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1245 }
1246 else
1247#endif
1248 {
1249 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1250 len = SIZE_OP_MEMORY_START_PUSH;
1251 else
1252 len = SIZE_OP_MEMORY_START;
1253
1254 len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
1255 ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
1256 }
1257 break;
1258
1259 case ENCLOSE_STOP_BACKTRACK:
1260 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1261 QtfrNode* qn = NQTFR(node->target);
1262 tlen = compile_length_tree(qn->target, reg);
1263 if (tlen < 0) return tlen;
1264
1265 len = tlen * qn->lower
1266 + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
1267 }
1268 else {
1269 len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
1270 }
1271 break;
1272
1273 case ENCLOSE_CONDITION:
1274 len = SIZE_OP_CONDITION;
1275 if (NTYPE(node->target) == NT_ALT) {
1276 Node* x = node->target;
1277
1278 tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
1279 if (tlen < 0) return tlen;
1280 len += tlen + SIZE_OP_JUMP;
1281 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1282 x = NCDR(x);
1283 tlen = compile_length_tree(NCAR(x), reg); /* no-node */
1284 if (tlen < 0) return tlen;
1285 len += tlen;
1286 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1287 }
1288 else {
1289 return ONIGERR_PARSER_BUG;
1290 }
1291 break;
1292
1293 default:
1294 return ONIGERR_TYPE_BUG;
1295 break;
1296 }
1297
1298 return len;
1299}
1300
1301static int get_char_length_tree(Node* node, regex_t* reg, int* len);
1302
1303static int
1304compile_enclose_node(EncloseNode* node, regex_t* reg)
1305{
1306 int r, len;
1307
1308 if (node->type == ENCLOSE_OPTION)
1309 return compile_option_node(node, reg);
1310
1311 switch (node->type) {
1312 case ENCLOSE_MEMORY:
1313#ifdef USE_SUBEXP_CALL
1314 if (IS_ENCLOSE_CALLED(node)) {
1315 r = add_opcode(reg, OP_CALL);
1316 if (r) return r;
1317 node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
1318 node->state |= NST_ADDR_FIXED;
1319 r = add_abs_addr(reg, (int )node->call_addr);
1320 if (r) return r;
1321 len = compile_length_tree(node->target, reg);
1322 len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
1323 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1324 len += (IS_ENCLOSE_RECURSION(node)
1325 ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
1326 else
1327 len += (IS_ENCLOSE_RECURSION(node)
1328 ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
1329
1330 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1331 if (r) return r;
1332 }
1333#endif
1334 if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
1335 r = add_opcode(reg, OP_MEMORY_START_PUSH);
1336 else
1337 r = add_opcode(reg, OP_MEMORY_START);
1338 if (r) return r;
1339 r = add_mem_num(reg, node->regnum);
1340 if (r) return r;
1341 r = compile_tree(node->target, reg);
1342 if (r) return r;
1343#ifdef USE_SUBEXP_CALL
1344 if (IS_ENCLOSE_CALLED(node)) {
1345 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1346 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1347 ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
1348 else
1349 r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
1350 ? OP_MEMORY_END_REC : OP_MEMORY_END));
1351
1352 if (r) return r;
1353 r = add_mem_num(reg, node->regnum);
1354 if (r) return r;
1355 r = add_opcode(reg, OP_RETURN);
1356 }
1357 else
1358#endif
1359 {
1360 if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
1361 r = add_opcode(reg, OP_MEMORY_END_PUSH);
1362 else
1363 r = add_opcode(reg, OP_MEMORY_END);
1364 if (r) return r;
1365 r = add_mem_num(reg, node->regnum);
1366 }
1367 break;
1368
1369 case ENCLOSE_STOP_BACKTRACK:
1370 if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
1371 QtfrNode* qn = NQTFR(node->target);
1372 r = compile_tree_n_times(qn->target, qn->lower, reg);
1373 if (r) return r;
1374
1375 len = compile_length_tree(qn->target, reg);
1376 if (len < 0) return len;
1377
1378 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
1379 if (r) return r;
1380 r = compile_tree(qn->target, reg);
1381 if (r) return r;
1382 r = add_opcode(reg, OP_POP);
1383 if (r) return r;
1384 r = add_opcode_rel_addr(reg, OP_JUMP,
1385 -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
1386 }
1387 else {
1388 r = add_opcode(reg, OP_PUSH_STOP_BT);
1389 if (r) return r;
1390 r = compile_tree(node->target, reg);
1391 if (r) return r;
1392 r = add_opcode(reg, OP_POP_STOP_BT);
1393 }
1394 break;
1395
1396 case ENCLOSE_CONDITION:
1397 r = add_opcode(reg, OP_CONDITION);
1398 if (r) return r;
1399 r = add_mem_num(reg, node->regnum);
1400 if (r) return r;
1401
1402 if (NTYPE(node->target) == NT_ALT) {
1403 Node* x = node->target;
1404 int len2;
1405
1406 len = compile_length_tree(NCAR(x), reg); /* yes-node */
1407 if (len < 0) return len;
1408 if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
1409 x = NCDR(x);
1410 len2 = compile_length_tree(NCAR(x), reg); /* no-node */
1411 if (len2 < 0) return len2;
1412 if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
1413
1414 x = node->target;
1415 r = add_rel_addr(reg, len + SIZE_OP_JUMP);
1416 if (r) return r;
1417 r = compile_tree(NCAR(x), reg); /* yes-node */
1418 if (r) return r;
1419 r = add_opcode_rel_addr(reg, OP_JUMP, len2);
1420 if (r) return r;
1421 x = NCDR(x);
1422 r = compile_tree(NCAR(x), reg); /* no-node */
1423 }
1424 else {
1425 return ONIGERR_PARSER_BUG;
1426 }
1427 break;
1428
1429 default:
1430 return ONIGERR_TYPE_BUG;
1431 break;
1432 }
1433
1434 return r;
1435}
1436
1437static int
1438compile_length_anchor_node(AnchorNode* node, regex_t* reg)
1439{
1440 int len;
1441 int tlen = 0;
1442
1443 if (node->target) {
1444 tlen = compile_length_tree(node->target, reg);
1445 if (tlen < 0) return tlen;
1446 }
1447
1448 switch (node->type) {
1449 case ANCHOR_PREC_READ:
1450 len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
1451 break;
1452 case ANCHOR_PREC_READ_NOT:
1453 len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
1454 break;
1455 case ANCHOR_LOOK_BEHIND:
1456 len = SIZE_OP_LOOK_BEHIND + tlen;
1457 break;
1458 case ANCHOR_LOOK_BEHIND_NOT:
1459 len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
1460 break;
1461
1462 default:
1463 len = SIZE_OPCODE;
1464 break;
1465 }
1466
1467 return len;
1468}
1469
1470static int
1471compile_anchor_node(AnchorNode* node, regex_t* reg)
1472{
1473 int r, len;
1474
1475 switch (node->type) {
1476 case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
1477 case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
1478 case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
1479 case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
1480 case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
1481 case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
1482
1483 /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
1484 case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
1485
1486 case ANCHOR_WORD_BOUND:
1487 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
1488 else r = add_opcode(reg, OP_WORD_BOUND);
1489 break;
1490 case ANCHOR_NOT_WORD_BOUND:
1491 if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
1492 else r = add_opcode(reg, OP_NOT_WORD_BOUND);
1493 break;
1494#ifdef USE_WORD_BEGIN_END
1495 case ANCHOR_WORD_BEGIN:
1496 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
1497 else r = add_opcode(reg, OP_WORD_BEGIN);
1498 break;
1499 case ANCHOR_WORD_END:
1500 if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
1501 else r = add_opcode(reg, OP_WORD_END);
1502 break;
1503#endif
1504 case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
1505
1506 case ANCHOR_PREC_READ:
1507 r = add_opcode(reg, OP_PUSH_POS);
1508 if (r) return r;
1509 r = compile_tree(node->target, reg);
1510 if (r) return r;
1511 r = add_opcode(reg, OP_POP_POS);
1512 break;
1513
1514 case ANCHOR_PREC_READ_NOT:
1515 len = compile_length_tree(node->target, reg);
1516 if (len < 0) return len;
1517 r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
1518 if (r) return r;
1519 r = compile_tree(node->target, reg);
1520 if (r) return r;
1521 r = add_opcode(reg, OP_FAIL_POS);
1522 break;
1523
1524 case ANCHOR_LOOK_BEHIND:
1525 {
1526 int n;
1527 r = add_opcode(reg, OP_LOOK_BEHIND);
1528 if (r) return r;
1529 if (node->char_len < 0) {
1530 r = get_char_length_tree(node->target, reg, &n);
1531 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1532 }
1533 else
1534 n = node->char_len;
1535 r = add_length(reg, n);
1536 if (r) return r;
1537 r = compile_tree(node->target, reg);
1538 }
1539 break;
1540
1541 case ANCHOR_LOOK_BEHIND_NOT:
1542 {
1543 int n;
1544 len = compile_length_tree(node->target, reg);
1545 r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
1546 len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
1547 if (r) return r;
1548 if (node->char_len < 0) {
1549 r = get_char_length_tree(node->target, reg, &n);
1550 if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
1551 }
1552 else
1553 n = node->char_len;
1554 r = add_length(reg, n);
1555 if (r) return r;
1556 r = compile_tree(node->target, reg);
1557 if (r) return r;
1558 r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
1559 }
1560 break;
1561
1562 default:
1563 return ONIGERR_TYPE_BUG;
1564 break;
1565 }
1566
1567 return r;
1568}
1569
1570static int
1571compile_length_tree(Node* node, regex_t* reg)
1572{
1573 int len, type, r;
1574
1575 type = NTYPE(node);
1576 switch (type) {
1577 case NT_LIST:
1578 len = 0;
1579 do {
1580 r = compile_length_tree(NCAR(node), reg);
1581 if (r < 0) return r;
1582 len += r;
1583 } while (IS_NOT_NULL(node = NCDR(node)));
1584 r = len;
1585 break;
1586
1587 case NT_ALT:
1588 {
1589 int n = 0;
1590 len = 0;
1591 do {
1592 r = compile_length_tree(NCAR(node), reg);
1593 if (r < 0) return r;
1594 len += r;
1595 n++;
1596 } while (IS_NOT_NULL(node = NCDR(node)));
1597 r = len;
1598 r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
1599 }
1600 break;
1601
1602 case NT_STR:
1603 if (NSTRING_IS_RAW(node))
1604 r = compile_length_string_raw_node(NSTR(node), reg);
1605 else
1606 r = compile_length_string_node(node, reg);
1607 break;
1608
1609 case NT_CCLASS:
1610 r = compile_length_cclass_node(NCCLASS(node), reg);
1611 break;
1612
1613 case NT_CTYPE:
1614 case NT_CANY:
1615 r = SIZE_OPCODE;
1616 break;
1617
1618 case NT_BREF:
1619 {
1620 BRefNode* br = NBREF(node);
1621
1622#ifdef USE_BACKREF_WITH_LEVEL
1623 if (IS_BACKREF_NEST_LEVEL(br)) {
1624 r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
1625 SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1626 }
1627 else
1628#endif
1629 if (br->back_num == 1) {
1630 r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
1631 ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
1632 }
1633 else {
1634 r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
1635 }
1636 }
1637 break;
1638
1639#ifdef USE_SUBEXP_CALL
1640 case NT_CALL:
1641 r = SIZE_OP_CALL;
1642 break;
1643#endif
1644
1645 case NT_QTFR:
1646 r = compile_length_quantifier_node(NQTFR(node), reg);
1647 break;
1648
1649 case NT_ENCLOSE:
1650 r = compile_length_enclose_node(NENCLOSE(node), reg);
1651 break;
1652
1653 case NT_ANCHOR:
1654 r = compile_length_anchor_node(NANCHOR(node), reg);
1655 break;
1656
1657 default:
1658 return ONIGERR_TYPE_BUG;
1659 break;
1660 }
1661
1662 return r;
1663}
1664
1665static int
1666compile_tree(Node* node, regex_t* reg)
1667{
1668 int n, type, len, pos, r = 0;
1669
1670 type = NTYPE(node);
1671 switch (type) {
1672 case NT_LIST:
1673 do {
1674 r = compile_tree(NCAR(node), reg);
1675 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1676 break;
1677
1678 case NT_ALT:
1679 {
1680 Node* x = node;
1681 len = 0;
1682 do {
1683 len += compile_length_tree(NCAR(x), reg);
1684 if (NCDR(x) != NULL) {
1685 len += SIZE_OP_PUSH + SIZE_OP_JUMP;
1686 }
1687 } while (IS_NOT_NULL(x = NCDR(x)));
1688 pos = reg->used + len; /* goal position */
1689
1690 do {
1691 len = compile_length_tree(NCAR(node), reg);
1692 if (IS_NOT_NULL(NCDR(node))) {
1693 r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
1694 if (r) break;
1695 }
1696 r = compile_tree(NCAR(node), reg);
1697 if (r) break;
1698 if (IS_NOT_NULL(NCDR(node))) {
1699 len = pos - (reg->used + SIZE_OP_JUMP);
1700 r = add_opcode_rel_addr(reg, OP_JUMP, len);
1701 if (r) break;
1702 }
1703 } while (IS_NOT_NULL(node = NCDR(node)));
1704 }
1705 break;
1706
1707 case NT_STR:
1708 if (NSTRING_IS_RAW(node))
1709 r = compile_string_raw_node(NSTR(node), reg);
1710 else
1711 r = compile_string_node(node, reg);
1712 break;
1713
1714 case NT_CCLASS:
1715 r = compile_cclass_node(NCCLASS(node), reg);
1716 break;
1717
1718 case NT_CTYPE:
1719 {
1720 int op;
1721
1722 switch (NCTYPE(node)->ctype) {
1723 case ONIGENC_CTYPE_WORD:
1724 if (NCTYPE(node)->ascii_range != 0) {
1725 if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
1726 else op = OP_ASCII_WORD;
1727 }
1728 else {
1729 if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
1730 else op = OP_WORD;
1731 }
1732 break;
1733 default:
1734 return ONIGERR_TYPE_BUG;
1735 break;
1736 }
1737 r = add_opcode(reg, op);
1738 }
1739 break;
1740
1741 case NT_CANY:
1742 if (IS_MULTILINE(reg->options))
1743 r = add_opcode(reg, OP_ANYCHAR_ML);
1744 else
1745 r = add_opcode(reg, OP_ANYCHAR);
1746 break;
1747
1748 case NT_BREF:
1749 {
1750 BRefNode* br = NBREF(node);
1751
1752#ifdef USE_BACKREF_WITH_LEVEL
1753 if (IS_BACKREF_NEST_LEVEL(br)) {
1754 r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
1755 if (r) return r;
1756 r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
1757 if (r) return r;
1758 r = add_length(reg, br->nest_level);
1759 if (r) return r;
1760
1761 goto add_bacref_mems;
1762 }
1763 else
1764#endif
1765 if (br->back_num == 1) {
1766 n = br->back_static[0];
1767 if (IS_IGNORECASE(reg->options)) {
1768 r = add_opcode(reg, OP_BACKREFN_IC);
1769 if (r) return r;
1770 r = add_mem_num(reg, n);
1771 }
1772 else {
1773 switch (n) {
1774 case 1: r = add_opcode(reg, OP_BACKREF1); break;
1775 case 2: r = add_opcode(reg, OP_BACKREF2); break;
1776 default:
1777 r = add_opcode(reg, OP_BACKREFN);
1778 if (r) return r;
1779 r = add_mem_num(reg, n);
1780 break;
1781 }
1782 }
1783 }
1784 else {
1785 int i;
1786 int* p;
1787
1788 if (IS_IGNORECASE(reg->options)) {
1789 r = add_opcode(reg, OP_BACKREF_MULTI_IC);
1790 }
1791 else {
1792 r = add_opcode(reg, OP_BACKREF_MULTI);
1793 }
1794 if (r) return r;
1795
1796#ifdef USE_BACKREF_WITH_LEVEL
1797 add_bacref_mems:
1798#endif
1799 r = add_length(reg, br->back_num);
1800 if (r) return r;
1801 p = BACKREFS_P(br);
1802 for (i = br->back_num - 1; i >= 0; i--) {
1803 r = add_mem_num(reg, p[i]);
1804 if (r) return r;
1805 }
1806 }
1807 }
1808 break;
1809
1810#ifdef USE_SUBEXP_CALL
1811 case NT_CALL:
1812 r = compile_call(NCALL(node), reg);
1813 break;
1814#endif
1815
1816 case NT_QTFR:
1817 r = compile_quantifier_node(NQTFR(node), reg);
1818 break;
1819
1820 case NT_ENCLOSE:
1821 r = compile_enclose_node(NENCLOSE(node), reg);
1822 break;
1823
1824 case NT_ANCHOR:
1825 r = compile_anchor_node(NANCHOR(node), reg);
1826 break;
1827
1828 default:
1829#ifdef ONIG_DEBUG
1830 fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
1831#endif
1832 break;
1833 }
1834
1835 return r;
1836}
1837
1838#ifdef USE_NAMED_GROUP
1839
1840static int
1841noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
1842{
1843 int r = 0;
1844 Node* node = *plink;
1845
1846 switch (NTYPE(node)) {
1847 case NT_LIST:
1848 case NT_ALT:
1849 do {
1850 r = noname_disable_map(&(NCAR(node)), map, counter);
1851 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1852 break;
1853
1854 case NT_QTFR:
1855 {
1856 Node** ptarget = &(NQTFR(node)->target);
1857 Node* old = *ptarget;
1858 r = noname_disable_map(ptarget, map, counter);
1859 if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
1860 onig_reduce_nested_quantifier(node, *ptarget);
1861 }
1862 }
1863 break;
1864
1865 case NT_ENCLOSE:
1866 {
1867 EncloseNode* en = NENCLOSE(node);
1868 if (en->type == ENCLOSE_MEMORY) {
1869 if (IS_ENCLOSE_NAMED_GROUP(en)) {
1870 (*counter)++;
1871 map[en->regnum].new_val = *counter;
1872 en->regnum = *counter;
1873 }
1874 else if (en->regnum != 0) {
1875 *plink = en->target;
1876 en->target = NULL_NODE;
1877 onig_node_free(node);
1878 r = noname_disable_map(plink, map, counter);
1879 break;
1880 }
1881 }
1882 r = noname_disable_map(&(en->target), map, counter);
1883 }
1884 break;
1885
1886 case NT_ANCHOR:
1887 {
1888 AnchorNode* an = NANCHOR(node);
1889 switch (an->type) {
1890 case ANCHOR_PREC_READ:
1891 case ANCHOR_PREC_READ_NOT:
1892 case ANCHOR_LOOK_BEHIND:
1893 case ANCHOR_LOOK_BEHIND_NOT:
1894 r = noname_disable_map(&(an->target), map, counter);
1895 break;
1896 }
1897 }
1898 break;
1899
1900 default:
1901 break;
1902 }
1903
1904 return r;
1905}
1906
1907static int
1908renumber_node_backref(Node* node, GroupNumRemap* map)
1909{
1910 int i, pos, n, old_num;
1911 int *backs;
1912 BRefNode* bn = NBREF(node);
1913
1914 if (! IS_BACKREF_NAME_REF(bn))
1915 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
1916
1917 old_num = bn->back_num;
1918 if (IS_NULL(bn->back_dynamic))
1919 backs = bn->back_static;
1920 else
1921 backs = bn->back_dynamic;
1922
1923 for (i = 0, pos = 0; i < old_num; i++) {
1924 n = map[backs[i]].new_val;
1925 if (n > 0) {
1926 backs[pos] = n;
1927 pos++;
1928 }
1929 }
1930
1931 bn->back_num = pos;
1932 return 0;
1933}
1934
1935static int
1936renumber_by_map(Node* node, GroupNumRemap* map)
1937{
1938 int r = 0;
1939
1940 switch (NTYPE(node)) {
1941 case NT_LIST:
1942 case NT_ALT:
1943 do {
1944 r = renumber_by_map(NCAR(node), map);
1945 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1946 break;
1947 case NT_QTFR:
1948 r = renumber_by_map(NQTFR(node)->target, map);
1949 break;
1950 case NT_ENCLOSE:
1951 {
1952 EncloseNode* en = NENCLOSE(node);
1953 if (en->type == ENCLOSE_CONDITION)
1954 en->regnum = map[en->regnum].new_val;
1955 r = renumber_by_map(en->target, map);
1956 }
1957 break;
1958
1959 case NT_BREF:
1960 r = renumber_node_backref(node, map);
1961 break;
1962
1963 case NT_ANCHOR:
1964 {
1965 AnchorNode* an = NANCHOR(node);
1966 switch (an->type) {
1967 case ANCHOR_PREC_READ:
1968 case ANCHOR_PREC_READ_NOT:
1969 case ANCHOR_LOOK_BEHIND:
1970 case ANCHOR_LOOK_BEHIND_NOT:
1971 r = renumber_by_map(an->target, map);
1972 break;
1973 }
1974 }
1975 break;
1976
1977 default:
1978 break;
1979 }
1980
1981 return r;
1982}
1983
1984static int
1985numbered_ref_check(Node* node)
1986{
1987 int r = 0;
1988
1989 switch (NTYPE(node)) {
1990 case NT_LIST:
1991 case NT_ALT:
1992 do {
1993 r = numbered_ref_check(NCAR(node));
1994 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
1995 break;
1996 case NT_QTFR:
1997 r = numbered_ref_check(NQTFR(node)->target);
1998 break;
1999 case NT_ENCLOSE:
2000 r = numbered_ref_check(NENCLOSE(node)->target);
2001 break;
2002
2003 case NT_BREF:
2004 if (! IS_BACKREF_NAME_REF(NBREF(node)))
2005 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
2006 break;
2007
2008 default:
2009 break;
2010 }
2011
2012 return r;
2013}
2014
2015static int
2016disable_noname_group_capture_(Node** root, regex_t* reg, ScanEnv* env, GroupNumRemap* map)
2017{
2018 int r, i, pos, counter;
2019 BitStatusType loc;
2020
2021 CHECK_NULL_RETURN_MEMERR(map);
2022 for (i = 1; i <= env->num_mem; i++) {
2023 map[i].new_val = 0;
2024 }
2025 counter = 0;
2026 r = noname_disable_map(root, map, &counter);
2027 if (r != 0) return r;
2028
2029 r = renumber_by_map(*root, map);
2030 if (r != 0) return r;
2031
2032 for (i = 1, pos = 1; i <= env->num_mem; i++) {
2033 if (map[i].new_val > 0) {
2034 SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
2035 pos++;
2036 }
2037 }
2038
2039 loc = env->capture_history;
2040 BIT_STATUS_CLEAR(env->capture_history);
2041 for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
2042 if (BIT_STATUS_AT(loc, i)) {
2043 BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
2044 }
2045 }
2046
2047 env->num_mem = env->num_named;
2048 reg->num_mem = env->num_named;
2049
2050 return onig_renumber_name_table(reg, map);
2051}
2052
2053static int
2054disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
2055{
2056 GroupNumRemap* map = (GroupNumRemap*)xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
2057 int ret = disable_noname_group_capture_(root, reg, env, map);
2058 xfree(map);
2059 return ret;
2060}
2061#endif /* USE_NAMED_GROUP */
2062
2063#ifdef USE_SUBEXP_CALL
2064static int
2065unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
2066{
2067 int i, offset;
2068 EncloseNode* en;
2069 AbsAddrType addr;
2070
2071 for (i = 0; i < uslist->num; i++) {
2072 en = NENCLOSE(uslist->us[i].target);
2073 if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
2074 addr = en->call_addr;
2075 offset = uslist->us[i].offset;
2076
2077 BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
2078 }
2079 return 0;
2080}
2081#endif
2082
2083#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2084static int
2085quantifiers_memory_node_info(Node* node)
2086{
2087 int r = 0;
2088
2089 switch (NTYPE(node)) {
2090 case NT_LIST:
2091 case NT_ALT:
2092 {
2093 int v;
2094 do {
2095 v = quantifiers_memory_node_info(NCAR(node));
2096 if (v > r) r = v;
2097 } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
2098 }
2099 break;
2100
2101#ifdef USE_SUBEXP_CALL
2102 case NT_CALL:
2103 if (IS_CALL_RECURSION(NCALL(node))) {
2104 return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
2105 }
2106 else
2107 r = quantifiers_memory_node_info(NCALL(node)->target);
2108 break;
2109#endif
2110
2111 case NT_QTFR:
2112 {
2113 QtfrNode* qn = NQTFR(node);
2114 if (qn->upper != 0) {
2115 r = quantifiers_memory_node_info(qn->target);
2116 }
2117 }
2118 break;
2119
2120 case NT_ENCLOSE:
2121 {
2122 EncloseNode* en = NENCLOSE(node);
2123 switch (en->type) {
2124 case ENCLOSE_MEMORY:
2125 return NQ_TARGET_IS_EMPTY_MEM;
2126 break;
2127
2128 case ENCLOSE_OPTION:
2129 case ENCLOSE_STOP_BACKTRACK:
2130 case ENCLOSE_CONDITION:
2131 r = quantifiers_memory_node_info(en->target);
2132 break;
2133 default:
2134 break;
2135 }
2136 }
2137 break;
2138
2139 case NT_BREF:
2140 case NT_STR:
2141 case NT_CTYPE:
2142 case NT_CCLASS:
2143 case NT_CANY:
2144 case NT_ANCHOR:
2145 default:
2146 break;
2147 }
2148
2149 return r;
2150}
2151#endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
2152
2153static int
2154get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
2155{
2156 OnigDistance tmin;
2157 int r = 0;
2158
2159 *min = 0;
2160 switch (NTYPE(node)) {
2161 case NT_BREF:
2162 {
2163 int i;
2164 int* backs;
2165 Node** nodes = SCANENV_MEM_NODES(env);
2166 BRefNode* br = NBREF(node);
2167 if (br->state & NST_RECURSION) break;
2168
2169 backs = BACKREFS_P(br);
2170 if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2171 r = get_min_match_length(nodes[backs[0]], min, env);
2172 if (r != 0) break;
2173 for (i = 1; i < br->back_num; i++) {
2174 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2175 r = get_min_match_length(nodes[backs[i]], &tmin, env);
2176 if (r != 0) break;
2177 if (*min > tmin) *min = tmin;
2178 }
2179 }
2180 break;
2181
2182#ifdef USE_SUBEXP_CALL
2183 case NT_CALL:
2184 if (IS_CALL_RECURSION(NCALL(node))) {
2185 EncloseNode* en = NENCLOSE(NCALL(node)->target);
2186 if (IS_ENCLOSE_MIN_FIXED(en))
2187 *min = en->min_len;
2188 }
2189 else
2190 r = get_min_match_length(NCALL(node)->target, min, env);
2191 break;
2192#endif
2193
2194 case NT_LIST:
2195 do {
2196 r = get_min_match_length(NCAR(node), &tmin, env);
2197 if (r == 0) *min += tmin;
2198 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2199 break;
2200
2201 case NT_ALT:
2202 {
2203 Node *x, *y;
2204 y = node;
2205 do {
2206 x = NCAR(y);
2207 r = get_min_match_length(x, &tmin, env);
2208 if (r != 0) break;
2209 if (y == node) *min = tmin;
2210 else if (*min > tmin) *min = tmin;
2211 } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
2212 }
2213 break;
2214
2215 case NT_STR:
2216 {
2217 StrNode* sn = NSTR(node);
2218 *min = sn->end - sn->s;
2219 }
2220 break;
2221
2222 case NT_CTYPE:
2223 *min = 1;
2224 break;
2225
2226 case NT_CCLASS:
2227 case NT_CANY:
2228 *min = 1;
2229 break;
2230
2231 case NT_QTFR:
2232 {
2233 QtfrNode* qn = NQTFR(node);
2234
2235 if (qn->lower > 0) {
2236 r = get_min_match_length(qn->target, min, env);
2237 if (r == 0)
2238 *min = distance_multiply(*min, qn->lower);
2239 }
2240 }
2241 break;
2242
2243 case NT_ENCLOSE:
2244 {
2245 EncloseNode* en = NENCLOSE(node);
2246 switch (en->type) {
2247 case ENCLOSE_MEMORY:
2248#ifdef USE_SUBEXP_CALL
2249 if (IS_ENCLOSE_MIN_FIXED(en))
2250 *min = en->min_len;
2251 else {
2252 r = get_min_match_length(en->target, min, env);
2253 if (r == 0) {
2254 en->min_len = *min;
2255 SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
2256 }
2257 }
2258 break;
2259#endif
2260 case ENCLOSE_OPTION:
2261 case ENCLOSE_STOP_BACKTRACK:
2262 case ENCLOSE_CONDITION:
2263 r = get_min_match_length(en->target, min, env);
2264 break;
2265 }
2266 }
2267 break;
2268
2269 case NT_ANCHOR:
2270 default:
2271 break;
2272 }
2273
2274 return r;
2275}
2276
2277static int
2278get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
2279{
2280 OnigDistance tmax;
2281 int r = 0;
2282
2283 *max = 0;
2284 switch (NTYPE(node)) {
2285 case NT_LIST:
2286 do {
2287 r = get_max_match_length(NCAR(node), &tmax, env);
2288 if (r == 0)
2289 *max = distance_add(*max, tmax);
2290 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2291 break;
2292
2293 case NT_ALT:
2294 do {
2295 r = get_max_match_length(NCAR(node), &tmax, env);
2296 if (r == 0 && *max < tmax) *max = tmax;
2297 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2298 break;
2299
2300 case NT_STR:
2301 {
2302 StrNode* sn = NSTR(node);
2303 *max = sn->end - sn->s;
2304 }
2305 break;
2306
2307 case NT_CTYPE:
2308 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2309 break;
2310
2311 case NT_CCLASS:
2312 case NT_CANY:
2313 *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
2314 break;
2315
2316 case NT_BREF:
2317 {
2318 int i;
2319 int* backs;
2320 Node** nodes = SCANENV_MEM_NODES(env);
2321 BRefNode* br = NBREF(node);
2322 if (br->state & NST_RECURSION) {
2323 *max = ONIG_INFINITE_DISTANCE;
2324 break;
2325 }
2326 backs = BACKREFS_P(br);
2327 for (i = 0; i < br->back_num; i++) {
2328 if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
2329 r = get_max_match_length(nodes[backs[i]], &tmax, env);
2330 if (r != 0) break;
2331 if (*max < tmax) *max = tmax;
2332 }
2333 }
2334 break;
2335
2336#ifdef USE_SUBEXP_CALL
2337 case NT_CALL:
2338 if (! IS_CALL_RECURSION(NCALL(node)))
2339 r = get_max_match_length(NCALL(node)->target, max, env);
2340 else
2341 *max = ONIG_INFINITE_DISTANCE;
2342 break;
2343#endif
2344
2345 case NT_QTFR:
2346 {
2347 QtfrNode* qn = NQTFR(node);
2348
2349 if (qn->upper != 0) {
2350 r = get_max_match_length(qn->target, max, env);
2351 if (r == 0 && *max != 0) {
2352 if (! IS_REPEAT_INFINITE(qn->upper))
2353 *max = distance_multiply(*max, qn->upper);
2354 else
2355 *max = ONIG_INFINITE_DISTANCE;
2356 }
2357 }
2358 }
2359 break;
2360
2361 case NT_ENCLOSE:
2362 {
2363 EncloseNode* en = NENCLOSE(node);
2364 switch (en->type) {
2365 case ENCLOSE_MEMORY:
2366#ifdef USE_SUBEXP_CALL
2367 if (IS_ENCLOSE_MAX_FIXED(en))
2368 *max = en->max_len;
2369 else {
2370 r = get_max_match_length(en->target, max, env);
2371 if (r == 0) {
2372 en->max_len = *max;
2373 SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
2374 }
2375 }
2376 break;
2377#endif
2378 case ENCLOSE_OPTION:
2379 case ENCLOSE_STOP_BACKTRACK:
2380 case ENCLOSE_CONDITION:
2381 r = get_max_match_length(en->target, max, env);
2382 break;
2383 }
2384 }
2385 break;
2386
2387 case NT_ANCHOR:
2388 default:
2389 break;
2390 }
2391
2392 return r;
2393}
2394
2395#define GET_CHAR_LEN_VARLEN -1
2396#define GET_CHAR_LEN_TOP_ALT_VARLEN -2
2397
2398/* fixed size pattern node only */
2399static int
2400get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
2401{
2402 int tlen;
2403 int r = 0;
2404
2405 level++;
2406 *len = 0;
2407 switch (NTYPE(node)) {
2408 case NT_LIST:
2409 do {
2410 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2411 if (r == 0)
2412 *len = (int )distance_add(*len, tlen);
2413 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2414 break;
2415
2416 case NT_ALT:
2417 {
2418 int tlen2;
2419 int varlen = 0;
2420
2421 r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
2422 while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
2423 r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
2424 if (r == 0) {
2425 if (tlen != tlen2)
2426 varlen = 1;
2427 }
2428 }
2429 if (r == 0) {
2430 if (varlen != 0) {
2431 if (level == 1)
2432 r = GET_CHAR_LEN_TOP_ALT_VARLEN;
2433 else
2434 r = GET_CHAR_LEN_VARLEN;
2435 }
2436 else
2437 *len = tlen;
2438 }
2439 }
2440 break;
2441
2442 case NT_STR:
2443 {
2444 StrNode* sn = NSTR(node);
2445 UChar *s = sn->s;
2446 while (s < sn->end) {
2447 s += enclen(reg->enc, s);
2448 (*len)++;
2449 }
2450 }
2451 break;
2452
2453 case NT_QTFR:
2454 {
2455 QtfrNode* qn = NQTFR(node);
2456 if (qn->lower == qn->upper) {
2457 r = get_char_length_tree1(qn->target, reg, &tlen, level);
2458 if (r == 0)
2459 *len = (int )distance_multiply(tlen, qn->lower);
2460 }
2461 else
2462 r = GET_CHAR_LEN_VARLEN;
2463 }
2464 break;
2465
2466#ifdef USE_SUBEXP_CALL
2467 case NT_CALL:
2468 if (! IS_CALL_RECURSION(NCALL(node)))
2469 r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
2470 else
2471 r = GET_CHAR_LEN_VARLEN;
2472 break;
2473#endif
2474
2475 case NT_CTYPE:
2476 *len = 1;
2477 break;
2478
2479 case NT_CCLASS:
2480 case NT_CANY:
2481 *len = 1;
2482 break;
2483
2484 case NT_ENCLOSE:
2485 {
2486 EncloseNode* en = NENCLOSE(node);
2487 switch (en->type) {
2488 case ENCLOSE_MEMORY:
2489#ifdef USE_SUBEXP_CALL
2490 if (IS_ENCLOSE_CLEN_FIXED(en))
2491 *len = en->char_len;
2492 else {
2493 r = get_char_length_tree1(en->target, reg, len, level);
2494 if (r == 0) {
2495 en->char_len = *len;
2496 SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
2497 }
2498 }
2499 break;
2500#endif
2501 case ENCLOSE_OPTION:
2502 case ENCLOSE_STOP_BACKTRACK:
2503 case ENCLOSE_CONDITION:
2504 r = get_char_length_tree1(en->target, reg, len, level);
2505 break;
2506 default:
2507 break;
2508 }
2509 }
2510 break;
2511
2512 case NT_ANCHOR:
2513 break;
2514
2515 default:
2516 r = GET_CHAR_LEN_VARLEN;
2517 break;
2518 }
2519
2520 return r;
2521}
2522
2523static int
2524get_char_length_tree(Node* node, regex_t* reg, int* len)
2525{
2526 return get_char_length_tree1(node, reg, len, 0);
2527}
2528
2529/* x is not included y ==> 1 : 0 */
2530static int
2531is_not_included(Node* x, Node* y, regex_t* reg)
2532{
2533 int i;
2534 OnigDistance len;
2535 OnigCodePoint code;
2536 UChar *p;
2537 int ytype;
2538
2539 retry:
2540 ytype = NTYPE(y);
2541 switch (NTYPE(x)) {
2542 case NT_CTYPE:
2543 {
2544 switch (ytype) {
2545 case NT_CTYPE:
2546 if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
2547 NCTYPE(y)->not != NCTYPE(x)->not &&
2548 NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
2549 return 1;
2550 else
2551 return 0;
2552 break;
2553
2554 case NT_CCLASS:
2555 swap:
2556 {
2557 Node* tmp;
2558 tmp = x; x = y; y = tmp;
2559 goto retry;
2560 }
2561 break;
2562
2563 case NT_STR:
2564 goto swap;
2565 break;
2566
2567 default:
2568 break;
2569 }
2570 }
2571 break;
2572
2573 case NT_CCLASS:
2574 {
2575 CClassNode* xc = NCCLASS(x);
2576 switch (ytype) {
2577 case NT_CTYPE:
2578 switch (NCTYPE(y)->ctype) {
2579 case ONIGENC_CTYPE_WORD:
2580 if (NCTYPE(y)->not == 0) {
2581 if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
2582 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2583 if (BITSET_AT(xc->bs, i)) {
2584 if (NCTYPE(y)->ascii_range) {
2585 if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
2586 }
2587 else {
2588 if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
2589 }
2590 }
2591 }
2592 return 1;
2593 }
2594 return 0;
2595 }
2596 else {
2597 if (IS_NOT_NULL(xc->mbuf)) return 0;
2598 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2599 int is_word;
2600 if (NCTYPE(y)->ascii_range)
2601 is_word = IS_CODE_SB_WORD(reg->enc, i);
2602 else
2603 is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
2604 if (! is_word) {
2605 if (!IS_NCCLASS_NOT(xc)) {
2606 if (BITSET_AT(xc->bs, i))
2607 return 0;
2608 }
2609 else {
2610 if (! BITSET_AT(xc->bs, i))
2611 return 0;
2612 }
2613 }
2614 }
2615 return 1;
2616 }
2617 break;
2618
2619 default:
2620 break;
2621 }
2622 break;
2623
2624 case NT_CCLASS:
2625 {
2626 int v;
2627 CClassNode* yc = NCCLASS(y);
2628
2629 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
2630 v = BITSET_AT(xc->bs, i);
2631 if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
2632 (v == 0 && IS_NCCLASS_NOT(xc))) {
2633 v = BITSET_AT(yc->bs, i);
2634 if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
2635 (v == 0 && IS_NCCLASS_NOT(yc)))
2636 return 0;
2637 }
2638 }
2639 if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
2640 (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
2641 return 1;
2642 return 0;
2643 }
2644 break;
2645
2646 case NT_STR:
2647 goto swap;
2648 break;
2649
2650 default:
2651 break;
2652 }
2653 }
2654 break;
2655
2656 case NT_STR:
2657 {
2658 StrNode* xs = NSTR(x);
2659 if (NSTRING_LEN(x) == 0)
2660 break;
2661
2662 switch (ytype) {
2663 case NT_CTYPE:
2664 switch (NCTYPE(y)->ctype) {
2665 case ONIGENC_CTYPE_WORD:
2666 if (NCTYPE(y)->ascii_range) {
2667 if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
2668 return NCTYPE(y)->not;
2669 else
2670 return !(NCTYPE(y)->not);
2671 }
2672 else {
2673 if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
2674 return NCTYPE(y)->not;
2675 else
2676 return !(NCTYPE(y)->not);
2677 }
2678 break;
2679 default:
2680 break;
2681 }
2682 break;
2683
2684 case NT_CCLASS:
2685 {
2686 CClassNode* cc = NCCLASS(y);
2687
2688 code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
2689 xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
2690 return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
2691 }
2692 break;
2693
2694 case NT_STR:
2695 {
2696 UChar *q;
2697 StrNode* ys = NSTR(y);
2698 len = NSTRING_LEN(x);
2699 if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
2700 if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
2701 /* tiny version */
2702 return 0;
2703 }
2704 else {
2705 for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
2706 if (*p != *q) return 1;
2707 }
2708 }
2709 }
2710 break;
2711
2712 default:
2713 break;
2714 }
2715 }
2716 break;
2717
2718 default:
2719 break;
2720 }
2721
2722 return 0;
2723}
2724
2725static Node*
2726get_head_value_node(Node* node, int exact, regex_t* reg)
2727{
2728 Node* n = NULL_NODE;
2729
2730 switch (NTYPE(node)) {
2731 case NT_BREF:
2732 case NT_ALT:
2733 case NT_CANY:
2734#ifdef USE_SUBEXP_CALL
2735 case NT_CALL:
2736#endif
2737 break;
2738
2739 case NT_CTYPE:
2740 case NT_CCLASS:
2741 if (exact == 0) {
2742 n = node;
2743 }
2744 break;
2745
2746 case NT_LIST:
2747 n = get_head_value_node(NCAR(node), exact, reg);
2748 break;
2749
2750 case NT_STR:
2751 {
2752 StrNode* sn = NSTR(node);
2753
2754 if (sn->end <= sn->s)
2755 break;
2756
2757 if (exact != 0 &&
2758 !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
2759 }
2760 else {
2761 n = node;
2762 }
2763 }
2764 break;
2765
2766 case NT_QTFR:
2767 {
2768 QtfrNode* qn = NQTFR(node);
2769 if (qn->lower > 0) {
2770 if (IS_NOT_NULL(qn->head_exact))
2771 n = qn->head_exact;
2772 else
2773 n = get_head_value_node(qn->target, exact, reg);
2774 }
2775 }
2776 break;
2777
2778 case NT_ENCLOSE:
2779 {
2780 EncloseNode* en = NENCLOSE(node);
2781 switch (en->type) {
2782 case ENCLOSE_OPTION:
2783 {
2784 OnigOptionType options = reg->options;
2785
2786 reg->options = NENCLOSE(node)->option;
2787 n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
2788 reg->options = options;
2789 }
2790 break;
2791
2792 case ENCLOSE_MEMORY:
2793 case ENCLOSE_STOP_BACKTRACK:
2794 case ENCLOSE_CONDITION:
2795 n = get_head_value_node(en->target, exact, reg);
2796 break;
2797 }
2798 }
2799 break;
2800
2801 case NT_ANCHOR:
2802 if (NANCHOR(node)->type == ANCHOR_PREC_READ)
2803 n = get_head_value_node(NANCHOR(node)->target, exact, reg);
2804 break;
2805
2806 default:
2807 break;
2808 }
2809
2810 return n;
2811}
2812
2813static int
2814check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
2815{
2816 int type, r = 0;
2817
2818 type = NTYPE(node);
2819 if ((NTYPE2BIT(type) & type_mask) == 0)
2820 return 1;
2821
2822 switch (type) {
2823 case NT_LIST:
2824 case NT_ALT:
2825 do {
2826 r = check_type_tree(NCAR(node), type_mask, enclose_mask,
2827 anchor_mask);
2828 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2829 break;
2830
2831 case NT_QTFR:
2832 r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
2833 anchor_mask);
2834 break;
2835
2836 case NT_ENCLOSE:
2837 {
2838 EncloseNode* en = NENCLOSE(node);
2839 if ((en->type & enclose_mask) == 0)
2840 return 1;
2841
2842 r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
2843 }
2844 break;
2845
2846 case NT_ANCHOR:
2847 type = NANCHOR(node)->type;
2848 if ((type & anchor_mask) == 0)
2849 return 1;
2850
2851 if (NANCHOR(node)->target)
2852 r = check_type_tree(NANCHOR(node)->target,
2853 type_mask, enclose_mask, anchor_mask);
2854 break;
2855
2856 default:
2857 break;
2858 }
2859 return r;
2860}
2861
2862#ifdef USE_SUBEXP_CALL
2863
2864#define RECURSION_EXIST 1
2865#define RECURSION_INFINITE 2
2866
2867static int
2868subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
2869{
2870 int type;
2871 int r = 0;
2872
2873 type = NTYPE(node);
2874 switch (type) {
2875 case NT_LIST:
2876 {
2877 Node *x;
2878 OnigDistance min;
2879 int ret;
2880
2881 x = node;
2882 do {
2883 ret = subexp_inf_recursive_check(NCAR(x), env, head);
2884 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2885 r |= ret;
2886 if (head) {
2887 ret = get_min_match_length(NCAR(x), &min, env);
2888 if (ret != 0) return ret;
2889 if (min != 0) head = 0;
2890 }
2891 } while (IS_NOT_NULL(x = NCDR(x)));
2892 }
2893 break;
2894
2895 case NT_ALT:
2896 {
2897 int ret;
2898 r = RECURSION_EXIST;
2899 do {
2900 ret = subexp_inf_recursive_check(NCAR(node), env, head);
2901 if (ret < 0 || ret == RECURSION_INFINITE) return ret;
2902 r &= ret;
2903 } while (IS_NOT_NULL(node = NCDR(node)));
2904 }
2905 break;
2906
2907 case NT_QTFR:
2908 r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
2909 if (r == RECURSION_EXIST) {
2910 if (NQTFR(node)->lower == 0) r = 0;
2911 }
2912 break;
2913
2914 case NT_ANCHOR:
2915 {
2916 AnchorNode* an = NANCHOR(node);
2917 switch (an->type) {
2918 case ANCHOR_PREC_READ:
2919 case ANCHOR_PREC_READ_NOT:
2920 case ANCHOR_LOOK_BEHIND:
2921 case ANCHOR_LOOK_BEHIND_NOT:
2922 r = subexp_inf_recursive_check(an->target, env, head);
2923 break;
2924 }
2925 }
2926 break;
2927
2928 case NT_CALL:
2929 r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
2930 break;
2931
2932 case NT_ENCLOSE:
2933 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
2934 return 0;
2935 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
2936 return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
2937 else {
2938 SET_ENCLOSE_STATUS(node, NST_MARK2);
2939 r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
2940 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
2941 }
2942 break;
2943
2944 default:
2945 break;
2946 }
2947
2948 return r;
2949}
2950
2951static int
2952subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
2953{
2954 int type;
2955 int r = 0;
2956
2957 type = NTYPE(node);
2958 switch (type) {
2959 case NT_LIST:
2960 case NT_ALT:
2961 do {
2962 r = subexp_inf_recursive_check_trav(NCAR(node), env);
2963 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
2964 break;
2965
2966 case NT_QTFR:
2967 r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
2968 break;
2969
2970 case NT_ANCHOR:
2971 {
2972 AnchorNode* an = NANCHOR(node);
2973 switch (an->type) {
2974 case ANCHOR_PREC_READ:
2975 case ANCHOR_PREC_READ_NOT:
2976 case ANCHOR_LOOK_BEHIND:
2977 case ANCHOR_LOOK_BEHIND_NOT:
2978 r = subexp_inf_recursive_check_trav(an->target, env);
2979 break;
2980 }
2981 }
2982 break;
2983
2984 case NT_ENCLOSE:
2985 {
2986 EncloseNode* en = NENCLOSE(node);
2987
2988 if (IS_ENCLOSE_RECURSION(en)) {
2989 SET_ENCLOSE_STATUS(node, NST_MARK1);
2990 r = subexp_inf_recursive_check(en->target, env, 1);
2991 if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
2992 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
2993 }
2994 r = subexp_inf_recursive_check_trav(en->target, env);
2995 }
2996
2997 break;
2998
2999 default:
3000 break;
3001 }
3002
3003 return r;
3004}
3005
3006static int
3007subexp_recursive_check(Node* node)
3008{
3009 int r = 0;
3010
3011 switch (NTYPE(node)) {
3012 case NT_LIST:
3013 case NT_ALT:
3014 do {
3015 r |= subexp_recursive_check(NCAR(node));
3016 } while (IS_NOT_NULL(node = NCDR(node)));
3017 break;
3018
3019 case NT_QTFR:
3020 r = subexp_recursive_check(NQTFR(node)->target);
3021 break;
3022
3023 case NT_ANCHOR:
3024 {
3025 AnchorNode* an = NANCHOR(node);
3026 switch (an->type) {
3027 case ANCHOR_PREC_READ:
3028 case ANCHOR_PREC_READ_NOT:
3029 case ANCHOR_LOOK_BEHIND:
3030 case ANCHOR_LOOK_BEHIND_NOT:
3031 r = subexp_recursive_check(an->target);
3032 break;
3033 }
3034 }
3035 break;
3036
3037 case NT_CALL:
3038 r = subexp_recursive_check(NCALL(node)->target);
3039 if (r != 0) SET_CALL_RECURSION(node);
3040 break;
3041
3042 case NT_ENCLOSE:
3043 if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
3044 return 0;
3045 else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
3046 return 1; /* recursion */
3047 else {
3048 SET_ENCLOSE_STATUS(node, NST_MARK2);
3049 r = subexp_recursive_check(NENCLOSE(node)->target);
3050 CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
3051 }
3052 break;
3053
3054 default:
3055 break;
3056 }
3057
3058 return r;
3059}
3060
3061
3062static int
3063subexp_recursive_check_trav(Node* node, ScanEnv* env)
3064{
3065#define FOUND_CALLED_NODE 1
3066
3067 int type;
3068 int r = 0;
3069
3070 type = NTYPE(node);
3071 switch (type) {
3072 case NT_LIST:
3073 case NT_ALT:
3074 {
3075 int ret;
3076 do {
3077 ret = subexp_recursive_check_trav(NCAR(node), env);
3078 if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
3079 else if (ret < 0) return ret;
3080 } while (IS_NOT_NULL(node = NCDR(node)));
3081 }
3082 break;
3083
3084 case NT_QTFR:
3085 r = subexp_recursive_check_trav(NQTFR(node)->target, env);
3086 if (NQTFR(node)->upper == 0) {
3087 if (r == FOUND_CALLED_NODE)
3088 NQTFR(node)->is_refered = 1;
3089 }
3090 break;
3091
3092 case NT_ANCHOR:
3093 {
3094 AnchorNode* an = NANCHOR(node);
3095 switch (an->type) {
3096 case ANCHOR_PREC_READ:
3097 case ANCHOR_PREC_READ_NOT:
3098 case ANCHOR_LOOK_BEHIND:
3099 case ANCHOR_LOOK_BEHIND_NOT:
3100 r = subexp_recursive_check_trav(an->target, env);
3101 break;
3102 }
3103 }
3104 break;
3105
3106 case NT_ENCLOSE:
3107 {
3108 EncloseNode* en = NENCLOSE(node);
3109
3110 if (! IS_ENCLOSE_RECURSION(en)) {
3111 if (IS_ENCLOSE_CALLED(en)) {
3112 SET_ENCLOSE_STATUS(node, NST_MARK1);
3113 r = subexp_recursive_check(en->target);
3114 if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
3115 CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
3116 }
3117 }
3118 r = subexp_recursive_check_trav(en->target, env);
3119 if (IS_ENCLOSE_CALLED(en))
3120 r |= FOUND_CALLED_NODE;
3121 }
3122 break;
3123
3124 default:
3125 break;
3126 }
3127
3128 return r;
3129}
3130
3131static int
3132setup_subexp_call(Node* node, ScanEnv* env)
3133{
3134 int type;
3135 int r = 0;
3136
3137 type = NTYPE(node);
3138 switch (type) {
3139 case NT_LIST:
3140 do {
3141 r = setup_subexp_call(NCAR(node), env);
3142 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3143 break;
3144
3145 case NT_ALT:
3146 do {
3147 r = setup_subexp_call(NCAR(node), env);
3148 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3149 break;
3150
3151 case NT_QTFR:
3152 r = setup_subexp_call(NQTFR(node)->target, env);
3153 break;
3154 case NT_ENCLOSE:
3155 r = setup_subexp_call(NENCLOSE(node)->target, env);
3156 break;
3157
3158 case NT_CALL:
3159 {
3160 CallNode* cn = NCALL(node);
3161 Node** nodes = SCANENV_MEM_NODES(env);
3162
3163 if (cn->group_num != 0) {
3164 int gnum = cn->group_num;
3165
3166#ifdef USE_NAMED_GROUP
3167 if (env->num_named > 0 &&
3168 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
3169 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
3170 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
3171 }
3172#endif
3173 if (gnum > env->num_mem) {
3174 onig_scan_env_set_error_string(env,
3175 ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
3176 return ONIGERR_UNDEFINED_GROUP_REFERENCE;
3177 }
3178
3179#ifdef USE_NAMED_GROUP
3180 set_call_attr:
3181#endif
3182 cn->target = nodes[cn->group_num];
3183 if (IS_NULL(cn->target)) {
3184 onig_scan_env_set_error_string(env,
3185 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3186 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3187 }
3188 SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
3189 BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
3190 cn->unset_addr_list = env->unset_addr_list;
3191 }
3192#ifdef USE_NAMED_GROUP
3193#ifdef USE_PERL_SUBEXP_CALL
3194 else if (cn->name == cn->name_end) {
3195 goto set_call_attr;
3196 }
3197#endif
3198 else {
3199 int *refs;
3200
3201 int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
3202 &refs);
3203 if (n <= 0) {
3204 onig_scan_env_set_error_string(env,
3205 ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
3206 return ONIGERR_UNDEFINED_NAME_REFERENCE;
3207 }
3208 else if (n > 1 &&
3209 ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
3210 onig_scan_env_set_error_string(env,
3211 ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
3212 return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
3213 }
3214 else {
3215 cn->group_num = refs[0];
3216 goto set_call_attr;
3217 }
3218 }
3219#endif
3220 }
3221 break;
3222
3223 case NT_ANCHOR:
3224 {
3225 AnchorNode* an = NANCHOR(node);
3226
3227 switch (an->type) {
3228 case ANCHOR_PREC_READ:
3229 case ANCHOR_PREC_READ_NOT:
3230 case ANCHOR_LOOK_BEHIND:
3231 case ANCHOR_LOOK_BEHIND_NOT:
3232 r = setup_subexp_call(an->target, env);
3233 break;
3234 }
3235 }
3236 break;
3237
3238 default:
3239 break;
3240 }
3241
3242 return r;
3243}
3244#endif
3245
3246/* divide different length alternatives in look-behind.
3247 (?<=A|B) ==> (?<=A)|(?<=B)
3248 (?<!A|B) ==> (?<!A)(?<!B)
3249*/
3250static int
3251divide_look_behind_alternatives(Node* node)
3252{
3253 Node *head, *np, *insert_node;
3254 AnchorNode* an = NANCHOR(node);
3255 int anc_type = an->type;
3256
3257 head = an->target;
3258 np = NCAR(head);
3259 swap_node(node, head);
3260 NCAR(node) = head;
3261 NANCHOR(head)->target = np;
3262
3263 np = node;
3264 while ((np = NCDR(np)) != NULL_NODE) {
3265 insert_node = onig_node_new_anchor(anc_type);
3266 CHECK_NULL_RETURN_MEMERR(insert_node);
3267 NANCHOR(insert_node)->target = NCAR(np);
3268 NCAR(np) = insert_node;
3269 }
3270
3271 if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
3272 np = node;
3273 do {
3274 SET_NTYPE(np, NT_LIST); /* alt -> list */
3275 } while ((np = NCDR(np)) != NULL_NODE);
3276 }
3277 return 0;
3278}
3279
3280static int
3281setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
3282{
3283 int r, len;
3284 AnchorNode* an = NANCHOR(node);
3285
3286 r = get_char_length_tree(an->target, reg, &len);
3287 if (r == 0)
3288 an->char_len = len;
3289 else if (r == GET_CHAR_LEN_VARLEN)
3290 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3291 else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
3292 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
3293 r = divide_look_behind_alternatives(node);
3294 else
3295 r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
3296 }
3297
3298 return r;
3299}
3300
3301static int
3302next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
3303{
3304 int type;
3305
3306 retry:
3307 type = NTYPE(node);
3308 if (type == NT_QTFR) {
3309 QtfrNode* qn = NQTFR(node);
3310 if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
3311#ifdef USE_QTFR_PEEK_NEXT
3312 Node* n = get_head_value_node(next_node, 1, reg);
3313 /* '\0': for UTF-16BE etc... */
3314 if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
3315 qn->next_head_exact = n;
3316 }
3317#endif
3318 /* automatic possessification a*b ==> (?>a*)b */
3319 if (qn->lower <= 1) {
3320 int ttype = NTYPE(qn->target);
3321 if (IS_NODE_TYPE_SIMPLE(ttype)) {
3322 Node *x, *y;
3323 x = get_head_value_node(qn->target, 0, reg);
3324 if (IS_NOT_NULL(x)) {
3325 y = get_head_value_node(next_node, 0, reg);
3326 if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
3327 Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
3328 CHECK_NULL_RETURN_MEMERR(en);
3329 SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
3330 swap_node(node, en);
3331 NENCLOSE(node)->target = en;
3332 }
3333 }
3334 }
3335 }
3336
3337#ifndef ONIG_DONT_OPTIMIZE
3338 if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
3339 in_root && /* qn->lower == 0 && */
3340 NTYPE(qn->target) == NT_CANY &&
3341 ! IS_MULTILINE(reg->options)) {
3342 /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
3343 Node *np;
3344 np = onig_node_new_list(NULL_NODE, NULL_NODE);
3345 CHECK_NULL_RETURN_MEMERR(np);
3346 swap_node(node, np);
3347 NCDR(node) = onig_node_new_list(np, NULL_NODE);
3348 if (IS_NULL(NCDR(node))) {
3349 onig_node_free(np);
3350 return ONIGERR_MEMORY;
3351 }
3352 np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
3353 CHECK_NULL_RETURN_MEMERR(np);
3354 NCAR(node) = np;
3355 }
3356#endif
3357 }
3358 }
3359 else if (type == NT_ENCLOSE) {
3360 EncloseNode* en = NENCLOSE(node);
3361 in_root = 0;
3362 if (en->type == ENCLOSE_MEMORY) {
3363 node = en->target;
3364 goto retry;
3365 }
3366 }
3367 return 0;
3368}
3369
3370
3371static int
3372update_string_node_case_fold(regex_t* reg, Node *node)
3373{
3374 UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3375 UChar *sbuf, *ebuf, *sp;
3376 int r, i, len;
3377 OnigDistance sbuf_size;
3378 StrNode* sn = NSTR(node);
3379
3380 end = sn->end;
3381 sbuf_size = (end - sn->s) * 2;
3382 sbuf = (UChar* )xmalloc(sbuf_size);
3383 CHECK_NULL_RETURN_MEMERR(sbuf);
3384 ebuf = sbuf + sbuf_size;
3385
3386 sp = sbuf;
3387 p = sn->s;
3388 while (p < end) {
3389 len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
3390 for (i = 0; i < len; i++) {
3391 if (sp >= ebuf) {
3392 UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
3393 if (IS_NULL(p)) {
3394 xfree(sbuf);
3395 return ONIGERR_MEMORY;
3396 }
3397 sbuf = p;
3398 sp = sbuf + sbuf_size;
3399 sbuf_size *= 2;
3400 ebuf = sbuf + sbuf_size;
3401 }
3402
3403 *sp++ = buf[i];
3404 }
3405 }
3406
3407 r = onig_node_str_set(node, sbuf, sp);
3408
3409 xfree(sbuf);
3410 return r;
3411}
3412
3413static int
3414expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
3415 regex_t* reg)
3416{
3417 int r;
3418 Node *node;
3419
3420 node = onig_node_new_str(s, end);
3421 if (IS_NULL(node)) return ONIGERR_MEMORY;
3422
3423 r = update_string_node_case_fold(reg, node);
3424 if (r != 0) {
3425 onig_node_free(node);
3426 return r;
3427 }
3428
3429 NSTRING_SET_AMBIG(node);
3430 NSTRING_SET_DONT_GET_OPT_INFO(node);
3431 *rnode = node;
3432 return 0;
3433}
3434
3435static int
3436is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
3437 int slen)
3438{
3439 int i;
3440
3441 for (i = 0; i < item_num; i++) {
3442 if (items[i].byte_len != slen) {
3443 return 1;
3444 }
3445 if (items[i].code_len != 1) {
3446 return 1;
3447 }
3448 }
3449 return 0;
3450}
3451
3452static int
3453expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
3454 UChar *p, int slen, UChar *end,
3455 regex_t* reg, Node **rnode)
3456{
3457 int r, i, j, len, varlen;
3458 Node *anode, *var_anode, *snode, *xnode, *an;
3459 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
3460
3461 *rnode = var_anode = NULL_NODE;
3462
3463 varlen = 0;
3464 for (i = 0; i < item_num; i++) {
3465 if (items[i].byte_len != slen) {
3466 varlen = 1;
3467 break;
3468 }
3469 }
3470
3471 if (varlen != 0) {
3472 *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3473 if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
3474
3475 xnode = onig_node_new_list(NULL, NULL);
3476 if (IS_NULL(xnode)) goto mem_err;
3477 NCAR(var_anode) = xnode;
3478
3479 anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3480 if (IS_NULL(anode)) goto mem_err;
3481 NCAR(xnode) = anode;
3482 }
3483 else {
3484 *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
3485 if (IS_NULL(anode)) return ONIGERR_MEMORY;
3486 }
3487
3488 snode = onig_node_new_str(p, p + slen);
3489 if (IS_NULL(snode)) goto mem_err;
3490
3491 NCAR(anode) = snode;
3492
3493 for (i = 0; i < item_num; i++) {
3494 snode = onig_node_new_str(NULL, NULL);
3495 if (IS_NULL(snode)) goto mem_err;
3496
3497 for (j = 0; j < items[i].code_len; j++) {
3498 len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
3499 if (len < 0) {
3500 r = len;
3501 goto mem_err2;
3502 }
3503
3504 r = onig_node_str_cat(snode, buf, buf + len);
3505 if (r != 0) goto mem_err2;
3506 }
3507
3508 an = onig_node_new_alt(NULL_NODE, NULL_NODE);
3509 if (IS_NULL(an)) {
3510 goto mem_err2;
3511 }
3512
3513 if (items[i].byte_len != slen) {
3514 Node *rem;
3515 UChar *q = p + items[i].byte_len;
3516
3517 if (q < end) {
3518 r = expand_case_fold_make_rem_string(&rem, q, end, reg);
3519 if (r != 0) {
3520 onig_node_free(an);
3521 goto mem_err2;
3522 }
3523
3524 xnode = onig_node_list_add(NULL_NODE, snode);
3525 if (IS_NULL(xnode)) {
3526 onig_node_free(an);
3527 onig_node_free(rem);
3528 goto mem_err2;
3529 }
3530 if (IS_NULL(onig_node_list_add(xnode, rem))) {
3531 onig_node_free(an);
3532 onig_node_free(xnode);
3533 onig_node_free(rem);
3534 goto mem_err;
3535 }
3536
3537 NCAR(an) = xnode;
3538 }
3539 else {
3540 NCAR(an) = snode;
3541 }
3542
3543 NCDR(var_anode) = an;
3544 var_anode = an;
3545 }
3546 else {
3547 NCAR(an) = snode;
3548 NCDR(anode) = an;
3549 anode = an;
3550 }
3551 }
3552
3553 return varlen;
3554
3555 mem_err2:
3556 onig_node_free(snode);
3557
3558 mem_err:
3559 onig_node_free(*rnode);
3560
3561 return ONIGERR_MEMORY;
3562}
3563
3564static int
3565expand_case_fold_string(Node* node, regex_t* reg)
3566{
3567#define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
3568
3569 int r, n, len, alt_num;
3570 int varlen = 0;
3571 UChar *start, *end, *p;
3572 Node *top_root, *root, *snode, *prev_node;
3573 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
3574 StrNode* sn = NSTR(node);
3575
3576 if (NSTRING_IS_AMBIG(node)) return 0;
3577
3578 start = sn->s;
3579 end = sn->end;
3580 if (start >= end) return 0;
3581
3582 r = 0;
3583 top_root = root = prev_node = snode = NULL_NODE;
3584 alt_num = 1;
3585 p = start;
3586 while (p < end) {
3587 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
3588 p, end, items);
3589 if (n < 0) {
3590 r = n;
3591 goto err;
3592 }
3593
3594 len = enclen(reg->enc, p);
3595
3596 varlen = is_case_fold_variable_len(n, items, len);
3597 if (n == 0 || varlen == 0) {
3598 if (IS_NULL(snode)) {
3599 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3600 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3601 if (IS_NULL(root)) {
3602 onig_node_free(prev_node);
3603 goto mem_err;
3604 }
3605 }
3606
3607 prev_node = snode = onig_node_new_str(NULL, NULL);
3608 if (IS_NULL(snode)) goto mem_err;
3609 if (IS_NOT_NULL(root)) {
3610 if (IS_NULL(onig_node_list_add(root, snode))) {
3611 onig_node_free(snode);
3612 goto mem_err;
3613 }
3614 }
3615 }
3616
3617 r = onig_node_str_cat(snode, p, p + len);
3618 if (r != 0) goto err;
3619 }
3620 else {
3621 alt_num *= (n + 1);
3622 if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
3623
3624 if (IS_NOT_NULL(snode)) {
3625 r = update_string_node_case_fold(reg, snode);
3626 if (r == 0) {
3627 NSTRING_SET_AMBIG(snode);
3628 }
3629 }
3630 if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
3631 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3632 if (IS_NULL(root)) {
3633 onig_node_free(prev_node);
3634 goto mem_err;
3635 }
3636 }
3637
3638 r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
3639 if (r < 0) goto mem_err;
3640 if (r == 1) {
3641 if (IS_NULL(root)) {
3642 top_root = prev_node;
3643 }
3644 else {
3645 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3646 onig_node_free(prev_node);
3647 goto mem_err;
3648 }
3649 }
3650
3651 root = NCAR(prev_node);
3652 }
3653 else { /* r == 0 */
3654 if (IS_NOT_NULL(root)) {
3655 if (IS_NULL(onig_node_list_add(root, prev_node))) {
3656 onig_node_free(prev_node);
3657 goto mem_err;
3658 }
3659 }
3660 }
3661
3662 snode = NULL_NODE;
3663 }
3664
3665 p += len;
3666 }
3667 if (IS_NOT_NULL(snode)) {
3668 r = update_string_node_case_fold(reg, snode);
3669 if (r == 0) {
3670 NSTRING_SET_AMBIG(snode);
3671 }
3672 }
3673
3674 if (p < end) {
3675 Node *srem;
3676
3677 r = expand_case_fold_make_rem_string(&srem, p, end, reg);
3678 if (r != 0) goto mem_err;
3679
3680 if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
3681 top_root = root = onig_node_list_add(NULL_NODE, prev_node);
3682 if (IS_NULL(root)) {
3683 onig_node_free(srem);
3684 onig_node_free(prev_node);
3685 goto mem_err;
3686 }
3687 }
3688
3689 if (IS_NULL(root)) {
3690 prev_node = srem;
3691 }
3692 else {
3693 if (IS_NULL(onig_node_list_add(root, srem))) {
3694 onig_node_free(srem);
3695 goto mem_err;
3696 }
3697 }
3698 }
3699
3700 /* ending */
3701 top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
3702 swap_node(node, top_root);
3703 onig_node_free(top_root);
3704 return 0;
3705
3706 mem_err:
3707 r = ONIGERR_MEMORY;
3708
3709 err:
3710 onig_node_free(top_root);
3711 return r;
3712}
3713
3714
3715#ifdef USE_COMBINATION_EXPLOSION_CHECK
3716
3717#define CEC_THRES_NUM_BIG_REPEAT 512
3718#define CEC_INFINITE_NUM 0x7fffffff
3719
3720#define CEC_IN_INFINITE_REPEAT (1<<0)
3721#define CEC_IN_FINITE_REPEAT (1<<1)
3722#define CEC_CONT_BIG_REPEAT (1<<2)
3723
3724static int
3725setup_comb_exp_check(Node* node, int state, ScanEnv* env)
3726{
3727 int type;
3728 int r = state;
3729
3730 type = NTYPE(node);
3731 switch (type) {
3732 case NT_LIST:
3733 {
3734 Node* prev = NULL_NODE;
3735 do {
3736 r = setup_comb_exp_check(NCAR(node), r, env);
3737 prev = NCAR(node);
3738 } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
3739 }
3740 break;
3741
3742 case NT_ALT:
3743 {
3744 int ret;
3745 do {
3746 ret = setup_comb_exp_check(NCAR(node), state, env);
3747 r |= ret;
3748 } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
3749 }
3750 break;
3751
3752 case NT_QTFR:
3753 {
3754 int child_state = state;
3755 int add_state = 0;
3756 QtfrNode* qn = NQTFR(node);
3757 Node* target = qn->target;
3758 int var_num;
3759
3760 if (! IS_REPEAT_INFINITE(qn->upper)) {
3761 if (qn->upper > 1) {
3762 /* {0,1}, {1,1} are allowed */
3763 child_state |= CEC_IN_FINITE_REPEAT;
3764
3765 /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
3766 if (env->backrefed_mem == 0) {
3767 if (NTYPE(qn->target) == NT_ENCLOSE) {
3768 EncloseNode* en = NENCLOSE(qn->target);
3769 if (en->type == ENCLOSE_MEMORY) {
3770 if (NTYPE(en->target) == NT_QTFR) {
3771 QtfrNode* q = NQTFR(en->target);
3772 if (IS_REPEAT_INFINITE(q->upper)
3773 && q->greedy == qn->greedy) {
3774 qn->upper = (qn->lower == 0 ? 1 : qn->lower);
3775 if (qn->upper == 1)
3776 child_state = state;
3777 }
3778 }
3779 }
3780 }
3781 }
3782 }
3783 }
3784
3785 if (state & CEC_IN_FINITE_REPEAT) {
3786 qn->comb_exp_check_num = -1;
3787 }
3788 else {
3789 if (IS_REPEAT_INFINITE(qn->upper)) {
3790 var_num = CEC_INFINITE_NUM;
3791 child_state |= CEC_IN_INFINITE_REPEAT;
3792 }
3793 else {
3794 var_num = qn->upper - qn->lower;
3795 }
3796
3797 if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
3798 add_state |= CEC_CONT_BIG_REPEAT;
3799
3800 if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
3801 ((state & CEC_CONT_BIG_REPEAT) != 0 &&
3802 var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
3803 if (qn->comb_exp_check_num == 0) {
3804 env->num_comb_exp_check++;
3805 qn->comb_exp_check_num = env->num_comb_exp_check;
3806 if (env->curr_max_regnum > env->comb_exp_max_regnum)
3807 env->comb_exp_max_regnum = env->curr_max_regnum;
3808 }
3809 }
3810 }
3811
3812 r = setup_comb_exp_check(target, child_state, env);
3813 r |= add_state;
3814 }
3815 break;
3816
3817 case NT_ENCLOSE:
3818 {
3819 EncloseNode* en = NENCLOSE(node);
3820
3821 switch (en->type) {
3822 case ENCLOSE_MEMORY:
3823 {
3824 if (env->curr_max_regnum < en->regnum)
3825 env->curr_max_regnum = en->regnum;
3826
3827 r = setup_comb_exp_check(en->target, state, env);
3828 }
3829 break;
3830
3831 default:
3832 r = setup_comb_exp_check(en->target, state, env);
3833 break;
3834 }
3835 }
3836 break;
3837
3838#ifdef USE_SUBEXP_CALL
3839 case NT_CALL:
3840 if (IS_CALL_RECURSION(NCALL(node)))
3841 env->has_recursion = 1;
3842 else
3843 r = setup_comb_exp_check(NCALL(node)->target, state, env);
3844 break;
3845#endif
3846
3847 default:
3848 break;
3849 }
3850
3851 return r;
3852}
3853#endif
3854
3855#define IN_ALT (1<<0)
3856#define IN_NOT (1<<1)
3857#define IN_REPEAT (1<<2)
3858#define IN_VAR_REPEAT (1<<3)
3859#define IN_ROOT (1<<4)
3860
3861/* setup_tree does the following work.
3862 1. check empty loop. (set qn->target_empty_info)
3863 2. expand ignore-case in char class.
3864 3. set memory status bit flags. (reg->mem_stats)
3865 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
3866 5. find invalid patterns in look-behind.
3867 6. expand repeated string.
3868 */
3869static int
3870setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
3871{
3872 int type;
3873 int r = 0;
3874 int in_root = state & IN_ROOT;
3875
3876 state &= ~IN_ROOT;
3877restart:
3878 type = NTYPE(node);
3879 switch (type) {
3880 case NT_LIST:
3881 {
3882 Node* prev = NULL_NODE;
3883 int prev_in_root = 0;
3884 state |= in_root;
3885 do {
3886 r = setup_tree(NCAR(node), reg, state, env);
3887 if (IS_NOT_NULL(prev) && r == 0) {
3888 r = next_setup(prev, NCAR(node), prev_in_root, reg);
3889 }
3890 prev = NCAR(node);
3891 prev_in_root = state & IN_ROOT;
3892 state &= ~IN_ROOT;
3893 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3894 }
3895 break;
3896
3897 case NT_ALT:
3898 do {
3899 r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
3900 } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
3901 break;
3902
3903 case NT_CCLASS:
3904 break;
3905
3906 case NT_STR:
3907 if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
3908 r = expand_case_fold_string(node, reg);
3909 }
3910 break;
3911
3912 case NT_CTYPE:
3913 case NT_CANY:
3914 break;
3915
3916#ifdef USE_SUBEXP_CALL
3917 case NT_CALL:
3918 break;
3919#endif
3920
3921 case NT_BREF:
3922 {
3923 int i;
3924 int* p;
3925 Node** nodes = SCANENV_MEM_NODES(env);
3926 BRefNode* br = NBREF(node);
3927 p = BACKREFS_P(br);
3928 for (i = 0; i < br->back_num; i++) {
3929 if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
3930 BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
3931 BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
3932#ifdef USE_BACKREF_WITH_LEVEL
3933 if (IS_BACKREF_NEST_LEVEL(br)) {
3934 BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
3935 }
3936#endif
3937 SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
3938 }
3939 }
3940 break;
3941
3942 case NT_QTFR:
3943 {
3944 OnigDistance d;
3945 QtfrNode* qn = NQTFR(node);
3946 Node* target = qn->target;
3947
3948 if ((state & IN_REPEAT) != 0) {
3949 qn->state |= NST_IN_REPEAT;
3950 }
3951
3952 if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
3953 r = get_min_match_length(target, &d, env);
3954 if (r) break;
3955 if (d == 0) {
3956 qn->target_empty_info = NQ_TARGET_IS_EMPTY;
3957#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
3958 r = quantifiers_memory_node_info(target);
3959 if (r < 0) break;
3960 if (r > 0) {
3961 qn->target_empty_info = r;
3962 }
3963#endif
3964#if 0
3965 r = get_max_match_length(target, &d, env);
3966 if (r == 0 && d == 0) {
3967 /* ()* ==> ()?, ()+ ==> () */
3968 qn->upper = 1;
3969 if (qn->lower > 1) qn->lower = 1;
3970 if (NTYPE(target) == NT_STR) {
3971 qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
3972 }
3973 }
3974#endif
3975 }
3976 }
3977
3978 state |= IN_REPEAT;
3979 if (qn->lower != qn->upper)
3980 state |= IN_VAR_REPEAT;
3981 r = setup_tree(target, reg, state, env);
3982 if (r) break;
3983
3984 /* expand string */
3985#define EXPAND_STRING_MAX_LENGTH 100
3986 if (NTYPE(target) == NT_STR) {
3987 if (qn->lower > 1) {
3988 int i, n = qn->lower;
3989 OnigDistance len = NSTRING_LEN(target);
3990 StrNode* sn = NSTR(target);
3991 Node* np;
3992
3993 np = onig_node_new_str(sn->s, sn->end);
3994 if (IS_NULL(np)) return ONIGERR_MEMORY;
3995 NSTR(np)->flag = sn->flag;
3996
3997 for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
3998 r = onig_node_str_cat(np, sn->s, sn->end);
3999 if (r) {
4000 onig_node_free(np);
4001 return r;
4002 }
4003 }
4004 if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
4005 Node *np1, *np2;
4006
4007 qn->lower -= i;
4008 if (! IS_REPEAT_INFINITE(qn->upper))
4009 qn->upper -= i;
4010
4011 np1 = onig_node_new_list(np, NULL);
4012 if (IS_NULL(np1)) {
4013 onig_node_free(np);
4014 return ONIGERR_MEMORY;
4015 }
4016 swap_node(np1, node);
4017 np2 = onig_node_list_add(node, np1);
4018 if (IS_NULL(np2)) {
4019 onig_node_free(np1);
4020 return ONIGERR_MEMORY;
4021 }
4022 }
4023 else {
4024 swap_node(np, node);
4025 onig_node_free(np);
4026 }
4027 break; /* break case NT_QTFR: */
4028 }
4029 }
4030
4031#ifdef USE_OP_PUSH_OR_JUMP_EXACT
4032 if (qn->greedy && (qn->target_empty_info != 0)) {
4033 if (NTYPE(target) == NT_QTFR) {
4034 QtfrNode* tqn = NQTFR(target);
4035 if (IS_NOT_NULL(tqn->head_exact)) {
4036 qn->head_exact = tqn->head_exact;
4037 tqn->head_exact = NULL;
4038 }
4039 }
4040 else {
4041 qn->head_exact = get_head_value_node(qn->target, 1, reg);
4042 }
4043 }
4044#endif
4045 }
4046 break;
4047
4048 case NT_ENCLOSE:
4049 {
4050 EncloseNode* en = NENCLOSE(node);
4051
4052 switch (en->type) {
4053 case ENCLOSE_OPTION:
4054 {
4055 OnigOptionType options = reg->options;
4056 state |= in_root;
4057 reg->options = NENCLOSE(node)->option;
4058 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4059 reg->options = options;
4060 }
4061 break;
4062
4063 case ENCLOSE_MEMORY:
4064 if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
4065 BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
4066 /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
4067 }
4068 r = setup_tree(en->target, reg, state, env);
4069 break;
4070
4071 case ENCLOSE_STOP_BACKTRACK:
4072 {
4073 Node* target = en->target;
4074 r = setup_tree(target, reg, state, env);
4075 if (NTYPE(target) == NT_QTFR) {
4076 QtfrNode* tqn = NQTFR(target);
4077 if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
4078 tqn->greedy != 0) { /* (?>a*), a*+ etc... */
4079 int qtype = NTYPE(tqn->target);
4080 if (IS_NODE_TYPE_SIMPLE(qtype))
4081 SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
4082 }
4083 }
4084 }
4085 break;
4086
4087 case ENCLOSE_CONDITION:
4088#ifdef USE_NAMED_GROUP
4089 if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
4090 env->num_named > 0 &&
4091 IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
4092 !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
4093 return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
4094 }
4095#endif
4096 r = setup_tree(NENCLOSE(node)->target, reg, state, env);
4097 break;
4098 }
4099 }
4100 break;
4101
4102 case NT_ANCHOR:
4103 {
4104 AnchorNode* an = NANCHOR(node);
4105
4106 switch (an->type) {
4107 case ANCHOR_PREC_READ:
4108 r = setup_tree(an->target, reg, state, env);
4109 break;
4110 case ANCHOR_PREC_READ_NOT:
4111 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4112 break;
4113
4114/* allowed node types in look-behind */
4115#define ALLOWED_TYPE_IN_LB \
4116 ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
4117 BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
4118
4119#define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
4120#define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
4121
4122#define ALLOWED_ANCHOR_IN_LB \
4123( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4124 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4125 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4126 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4127#define ALLOWED_ANCHOR_IN_LB_NOT \
4128( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
4129 ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
4130 ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
4131 ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
4132
4133 case ANCHOR_LOOK_BEHIND:
4134 {
4135 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4136 ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
4137 if (r < 0) return r;
4138 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4139 r = setup_look_behind(node, reg, env);
4140 if (r != 0) return r;
4141 if (NTYPE(node) != NT_ANCHOR) goto restart;
4142 r = setup_tree(an->target, reg, state, env);
4143 }
4144 break;
4145
4146 case ANCHOR_LOOK_BEHIND_NOT:
4147 {
4148 r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
4149 ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
4150 if (r < 0) return r;
4151 if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
4152 r = setup_look_behind(node, reg, env);
4153 if (r != 0) return r;
4154 if (NTYPE(node) != NT_ANCHOR) goto restart;
4155 r = setup_tree(an->target, reg, (state | IN_NOT), env);
4156 }
4157 break;
4158 }
4159 }
4160 break;
4161
4162 default:
4163 break;
4164 }
4165
4166 return r;
4167}
4168
4169#ifndef USE_SUNDAY_QUICK_SEARCH
4170/* set skip map for Boyer-Moore search */
4171static int
4172set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4173 UChar skip[], int** int_skip, int ignore_case)
4174{
4175 OnigDistance i, len;
4176 int clen, flen, n, j, k;
4177 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4178 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4179 OnigEncoding enc = reg->enc;
4180
4181 len = end - s;
4182 if (len < ONIG_CHAR_TABLE_SIZE) {
4183 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
4184
4185 n = 0;
4186 for (i = 0; i < len - 1; i += clen) {
4187 p = s + i;
4188 if (ignore_case)
4189 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4190 p, end, items);
4191 clen = enclen(enc, p);
4192
4193 for (j = 0; j < n; j++) {
4194 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4195 return 1; /* different length isn't supported. */
4196 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4197 if (flen != clen)
4198 return 1; /* different length isn't supported. */
4199 }
4200 for (j = 0; j < clen; j++) {
4201 skip[s[i + j]] = (UChar )(len - 1 - i - j);
4202 for (k = 0; k < n; k++) {
4203 skip[buf[k][j]] = (UChar )(len - 1 - i - j);
4204 }
4205 }
4206 }
4207 }
4208 else {
4209 if (IS_NULL(*int_skip)) {
4210 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4211 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4212 }
4213 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
4214
4215 n = 0;
4216 for (i = 0; i < len - 1; i += clen) {
4217 p = s + i;
4218 if (ignore_case)
4219 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4220 p, end, items);
4221 clen = enclen(enc, p);
4222
4223 for (j = 0; j < n; j++) {
4224 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4225 return 1; /* different length isn't supported. */
4226 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4227 if (flen != clen)
4228 return 1; /* different length isn't supported. */
4229 }
4230 for (j = 0; j < clen; j++) {
4231 (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
4232 for (k = 0; k < n; k++) {
4233 (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
4234 }
4235 }
4236 }
4237 }
4238 return 0;
4239}
4240
4241#else /* USE_SUNDAY_QUICK_SEARCH */
4242
4243/* set skip map for Sunday's quick search */
4244static int
4245set_bm_skip(UChar* s, UChar* end, regex_t* reg,
4246 UChar skip[], int** int_skip, int ignore_case)
4247{
4248 OnigDistance i, len;
4249 int clen, flen, n, j, k;
4250 UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
4251 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4252 OnigEncoding enc = reg->enc;
4253
4254 len = end - s;
4255 if (len < ONIG_CHAR_TABLE_SIZE) {
4256 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
4257
4258 n = 0;
4259 for (i = 0; i < len; i += clen) {
4260 p = s + i;
4261 if (ignore_case)
4262 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4263 p, end, items);
4264 clen = enclen(enc, p);
4265
4266 for (j = 0; j < n; j++) {
4267 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4268 return 1; /* different length isn't supported. */
4269 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4270 if (flen != clen)
4271 return 1; /* different length isn't supported. */
4272 }
4273 for (j = 0; j < clen; j++) {
4274 skip[s[i + j]] = (UChar )(len - i - j);
4275 for (k = 0; k < n; k++) {
4276 skip[buf[k][j]] = (UChar )(len - i - j);
4277 }
4278 }
4279 }
4280 }
4281 else {
4282 if (IS_NULL(*int_skip)) {
4283 *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
4284 if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
4285 }
4286 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
4287
4288 n = 0;
4289 for (i = 0; i < len; i += clen) {
4290 p = s + i;
4291 if (ignore_case)
4292 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
4293 p, end, items);
4294 clen = enclen(enc, p);
4295
4296 for (j = 0; j < n; j++) {
4297 if ((items[j].code_len != 1) || (items[j].byte_len != clen))
4298 return 1; /* different length isn't supported. */
4299 flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
4300 if (flen != clen)
4301 return 1; /* different length isn't supported. */
4302 }
4303 for (j = 0; j < clen; j++) {
4304 (*int_skip)[s[i + j]] = (int )(len - i - j);
4305 for (k = 0; k < n; k++) {
4306 (*int_skip)[buf[k][j]] = (int )(len - i - j);
4307 }
4308 }
4309 }
4310 }
4311 return 0;
4312}
4313#endif /* USE_SUNDAY_QUICK_SEARCH */
4314
4315#define OPT_EXACT_MAXLEN 24
4316
4317typedef struct {
4318 OnigDistance min; /* min byte length */
4319 OnigDistance max; /* max byte length */
4320} MinMaxLen;
4321
4322typedef struct {
4323 MinMaxLen mmd;
4324 OnigEncoding enc;
4325 OnigOptionType options;
4326 OnigCaseFoldType case_fold_flag;
4327 ScanEnv* scan_env;
4328} OptEnv;
4329
4330typedef struct {
4331 int left_anchor;
4332 int right_anchor;
4333} OptAncInfo;
4334
4335typedef struct {
4336 MinMaxLen mmd; /* info position */
4337 OptAncInfo anc;
4338
4339 int reach_end;
4340 int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
4341 int len;
4342 UChar s[OPT_EXACT_MAXLEN];
4343} OptExactInfo;
4344
4345typedef struct {
4346 MinMaxLen mmd; /* info position */
4347 OptAncInfo anc;
4348
4349 int value; /* weighted value */
4350 UChar map[ONIG_CHAR_TABLE_SIZE];
4351} OptMapInfo;
4352
4353typedef struct {
4354 MinMaxLen len;
4355
4356 OptAncInfo anc;
4357 OptExactInfo exb; /* boundary */
4358 OptExactInfo exm; /* middle */
4359 OptExactInfo expr; /* prec read (?=...) */
4360
4361 OptMapInfo map; /* boundary */
4362} NodeOptInfo;
4363
4364
4365static int
4366map_position_value(OnigEncoding enc, int i)
4367{
4368 static const short int ByteValTable[] = {
4369 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
4370 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4371 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
4372 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
4373 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4374 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
4375 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
4376 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
4377 };
4378
4379 if (i < numberof(ByteValTable)) {
4380 if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
4381 return 20;
4382 else
4383 return (int )ByteValTable[i];
4384 }
4385 else
4386 return 4; /* Take it easy. */
4387}
4388
4389static int
4390distance_value(MinMaxLen* mm)
4391{
4392 /* 1000 / (min-max-dist + 1) */
4393 static const short int dist_vals[] = {
4394 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
4395 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
4396 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
4397 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
4398 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
4399 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
4400 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
4401 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
4402 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
4403 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
4404 };
4405
4406 OnigDistance d;
4407
4408 if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
4409
4410 d = mm->max - mm->min;
4411 if (d < numberof(dist_vals))
4412 /* return dist_vals[d] * 16 / (mm->min + 12); */
4413 return (int )dist_vals[d];
4414 else
4415 return 1;
4416}
4417
4418static int
4419comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
4420{
4421 if (v2 <= 0) return -1;
4422 if (v1 <= 0) return 1;
4423
4424 v1 *= distance_value(d1);
4425 v2 *= distance_value(d2);
4426
4427 if (v2 > v1) return 1;
4428 if (v2 < v1) return -1;
4429
4430 if (d2->min < d1->min) return 1;
4431 if (d2->min > d1->min) return -1;
4432 return 0;
4433}
4434
4435static int
4436is_equal_mml(MinMaxLen* a, MinMaxLen* b)
4437{
4438 return (a->min == b->min && a->max == b->max) ? 1 : 0;
4439}
4440
4441
4442static void
4443set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
4444{
4445 mml->min = min;
4446 mml->max = max;
4447}
4448
4449static void
4450clear_mml(MinMaxLen* mml)
4451{
4452 mml->min = mml->max = 0;
4453}
4454
4455static void
4456copy_mml(MinMaxLen* to, MinMaxLen* from)
4457{
4458 to->min = from->min;
4459 to->max = from->max;
4460}
4461
4462static void
4463add_mml(MinMaxLen* to, MinMaxLen* from)
4464{
4465 to->min = distance_add(to->min, from->min);
4466 to->max = distance_add(to->max, from->max);
4467}
4468
4469#if 0
4470static void
4471add_len_mml(MinMaxLen* to, OnigDistance len)
4472{
4473 to->min = distance_add(to->min, len);
4474 to->max = distance_add(to->max, len);
4475}
4476#endif
4477
4478static void
4479alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
4480{
4481 if (to->min > from->min) to->min = from->min;
4482 if (to->max < from->max) to->max = from->max;
4483}
4484
4485static void
4486copy_opt_env(OptEnv* to, OptEnv* from)
4487{
4488 *to = *from;
4489}
4490
4491static void
4492clear_opt_anc_info(OptAncInfo* anc)
4493{
4494 anc->left_anchor = 0;
4495 anc->right_anchor = 0;
4496}
4497
4498static void
4499copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
4500{
4501 *to = *from;
4502}
4503
4504static void
4505concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
4506 OnigDistance left_len, OnigDistance right_len)
4507{
4508 clear_opt_anc_info(to);
4509
4510 to->left_anchor = left->left_anchor;
4511 if (left_len == 0) {
4512 to->left_anchor |= right->left_anchor;
4513 }
4514
4515 to->right_anchor = right->right_anchor;
4516 if (right_len == 0) {
4517 to->right_anchor |= left->right_anchor;
4518 }
4519 else {
4520 to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
4521 }
4522}
4523
4524static int
4525is_left_anchor(int anc)
4526{
4527 if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
4528 anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
4529 anc == ANCHOR_PREC_READ_NOT)
4530 return 0;
4531
4532 return 1;
4533}
4534
4535static int
4536is_set_opt_anc_info(OptAncInfo* to, int anc)
4537{
4538 if ((to->left_anchor & anc) != 0) return 1;
4539
4540 return ((to->right_anchor & anc) != 0 ? 1 : 0);
4541}
4542
4543static void
4544add_opt_anc_info(OptAncInfo* to, int anc)
4545{
4546 if (is_left_anchor(anc))
4547 to->left_anchor |= anc;
4548 else
4549 to->right_anchor |= anc;
4550}
4551
4552static void
4553remove_opt_anc_info(OptAncInfo* to, int anc)
4554{
4555 if (is_left_anchor(anc))
4556 to->left_anchor &= ~anc;
4557 else
4558 to->right_anchor &= ~anc;
4559}
4560
4561static void
4562alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
4563{
4564 to->left_anchor &= add->left_anchor;
4565 to->right_anchor &= add->right_anchor;
4566}
4567
4568static int
4569is_full_opt_exact_info(OptExactInfo* ex)
4570{
4571 return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
4572}
4573
4574static void
4575clear_opt_exact_info(OptExactInfo* ex)
4576{
4577 clear_mml(&ex->mmd);
4578 clear_opt_anc_info(&ex->anc);
4579 ex->reach_end = 0;
4580 ex->ignore_case = -1; /* unset */
4581 ex->len = 0;
4582 ex->s[0] = '\0';
4583}
4584
4585static void
4586copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
4587{
4588 *to = *from;
4589}
4590
4591static void
4592concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
4593{
4594 int i, j, len;
4595 UChar *p, *end;
4596 OptAncInfo tanc;
4597
4598 if (to->ignore_case < 0)
4599 to->ignore_case = add->ignore_case;
4600 else if (to->ignore_case != add->ignore_case)
4601 return ; /* avoid */
4602
4603 p = add->s;
4604 end = p + add->len;
4605 for (i = to->len; p < end; ) {
4606 len = enclen(enc, p);
4607 if (i + len > OPT_EXACT_MAXLEN) break;
4608 for (j = 0; j < len && p < end; j++)
4609 to->s[i++] = *p++;
4610 }
4611
4612 to->len = i;
4613 to->reach_end = (p == end ? add->reach_end : 0);
4614
4615 concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
4616 if (! to->reach_end) tanc.right_anchor = 0;
4617 copy_opt_anc_info(&to->anc, &tanc);
4618}
4619
4620static void
4621concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
4622 int raw ARG_UNUSED, OnigEncoding enc)
4623{
4624 int i, j, len;
4625 UChar *p;
4626
4627 for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
4628 len = enclen(enc, p);
4629 if (i + len > OPT_EXACT_MAXLEN) break;
4630 for (j = 0; j < len && p < end; j++)
4631 to->s[i++] = *p++;
4632 }
4633
4634 to->len = i;
4635}
4636
4637static void
4638alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
4639{
4640 int i, j, len;
4641
4642 if (add->len == 0 || to->len == 0) {
4643 clear_opt_exact_info(to);
4644 return ;
4645 }
4646
4647 if (! is_equal_mml(&to->mmd, &add->mmd)) {
4648 clear_opt_exact_info(to);
4649 return ;
4650 }
4651
4652 for (i = 0; i < to->len && i < add->len; ) {
4653 if (to->s[i] != add->s[i]) break;
4654 len = enclen(env->enc, to->s + i);
4655
4656 for (j = 1; j < len; j++) {
4657 if (to->s[i+j] != add->s[i+j]) break;
4658 }
4659 if (j < len) break;
4660 i += len;
4661 }
4662
4663 if (! add->reach_end || i < add->len || i < to->len) {
4664 to->reach_end = 0;
4665 }
4666 to->len = i;
4667 if (to->ignore_case < 0)
4668 to->ignore_case = add->ignore_case;
4669 else if (add->ignore_case >= 0)
4670 to->ignore_case |= add->ignore_case;
4671
4672 alt_merge_opt_anc_info(&to->anc, &add->anc);
4673 if (! to->reach_end) to->anc.right_anchor = 0;
4674}
4675
4676static void
4677select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
4678{
4679 int v1, v2;
4680
4681 v1 = now->len;
4682 v2 = alt->len;
4683
4684 if (v2 == 0) {
4685 return ;
4686 }
4687 else if (v1 == 0) {
4688 copy_opt_exact_info(now, alt);
4689 return ;
4690 }
4691 else if (v1 <= 2 && v2 <= 2) {
4692 /* ByteValTable[x] is big value --> low price */
4693 v2 = map_position_value(enc, now->s[0]);
4694 v1 = map_position_value(enc, alt->s[0]);
4695
4696 if (now->len > 1) v1 += 5;
4697 if (alt->len > 1) v2 += 5;
4698 }
4699
4700 if (now->ignore_case <= 0) v1 *= 2;
4701 if (alt->ignore_case <= 0) v2 *= 2;
4702
4703 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4704 copy_opt_exact_info(now, alt);
4705}
4706
4707static void
4708clear_opt_map_info(OptMapInfo* map)
4709{
4710 static const OptMapInfo clean_info = {
4711 {0, 0}, {0, 0}, 0,
4712 {
4713 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4714 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4715 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4716 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4717 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4718 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4719 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4720 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4721 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4722 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4723 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4724 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4725 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4726 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4727 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4728 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4729 }
4730 };
4731
4732 xmemcpy(map, &clean_info, sizeof(OptMapInfo));
4733}
4734
4735static void
4736copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
4737{
4738 *to = *from;
4739}
4740
4741static void
4742add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
4743{
4744 if (map->map[c] == 0) {
4745 map->map[c] = 1;
4746 map->value += map_position_value(enc, c);
4747 }
4748}
4749
4750static int
4751add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
4752 OnigEncoding enc, OnigCaseFoldType case_fold_flag)
4753{
4754 OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
4755 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
4756 int i, n;
4757
4758 add_char_opt_map_info(map, p[0], enc);
4759
4760 case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
4761 n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
4762 if (n < 0) return n;
4763
4764 for (i = 0; i < n; i++) {
4765 ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
4766 add_char_opt_map_info(map, buf[0], enc);
4767 }
4768
4769 return 0;
4770}
4771
4772static void
4773select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
4774{
4775 const int z = 1<<15; /* 32768: something big value */
4776
4777 int v1, v2;
4778
4779 if (alt->value == 0) return ;
4780 if (now->value == 0) {
4781 copy_opt_map_info(now, alt);
4782 return ;
4783 }
4784
4785 v1 = z / now->value;
4786 v2 = z / alt->value;
4787 if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
4788 copy_opt_map_info(now, alt);
4789}
4790
4791static int
4792comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
4793{
4794#define COMP_EM_BASE 20
4795 int ve, vm;
4796
4797 if (m->value <= 0) return -1;
4798
4799 ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
4800 vm = COMP_EM_BASE * 5 * 2 / m->value;
4801 return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
4802}
4803
4804static void
4805alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
4806{
4807 int i, val;
4808
4809 /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
4810 if (to->value == 0) return ;
4811 if (add->value == 0 || to->mmd.max < add->mmd.min) {
4812 clear_opt_map_info(to);
4813 return ;
4814 }
4815
4816 alt_merge_mml(&to->mmd, &add->mmd);
4817
4818 val = 0;
4819 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
4820 if (add->map[i])
4821 to->map[i] = 1;
4822
4823 if (to->map[i])
4824 val += map_position_value(enc, i);
4825 }
4826 to->value = val;
4827
4828 alt_merge_opt_anc_info(&to->anc, &add->anc);
4829}
4830
4831static void
4832set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
4833{
4834 copy_mml(&(opt->exb.mmd), mmd);
4835 copy_mml(&(opt->expr.mmd), mmd);
4836 copy_mml(&(opt->map.mmd), mmd);
4837}
4838
4839static void
4840clear_node_opt_info(NodeOptInfo* opt)
4841{
4842 clear_mml(&opt->len);
4843 clear_opt_anc_info(&opt->anc);
4844 clear_opt_exact_info(&opt->exb);
4845 clear_opt_exact_info(&opt->exm);
4846 clear_opt_exact_info(&opt->expr);
4847 clear_opt_map_info(&opt->map);
4848}
4849
4850static void
4851copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
4852{
4853 *to = *from;
4854}
4855
4856static void
4857concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
4858{
4859 int exb_reach, exm_reach;
4860 OptAncInfo tanc;
4861
4862 concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
4863 copy_opt_anc_info(&to->anc, &tanc);
4864
4865 if (add->exb.len > 0 && to->len.max == 0) {
4866 concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
4867 to->len.max, add->len.max);
4868 copy_opt_anc_info(&add->exb.anc, &tanc);
4869 }
4870
4871 if (add->map.value > 0 && to->len.max == 0) {
4872 if (add->map.mmd.max == 0)
4873 add->map.anc.left_anchor |= to->anc.left_anchor;
4874 }
4875
4876 exb_reach = to->exb.reach_end;
4877 exm_reach = to->exm.reach_end;
4878
4879 if (add->len.max != 0)
4880 to->exb.reach_end = to->exm.reach_end = 0;
4881
4882 if (add->exb.len > 0) {
4883 if (exb_reach) {
4884 concat_opt_exact_info(&to->exb, &add->exb, enc);
4885 clear_opt_exact_info(&add->exb);
4886 }
4887 else if (exm_reach) {
4888 concat_opt_exact_info(&to->exm, &add->exb, enc);
4889 clear_opt_exact_info(&add->exb);
4890 }
4891 }
4892 select_opt_exact_info(enc, &to->exm, &add->exb);
4893 select_opt_exact_info(enc, &to->exm, &add->exm);
4894
4895 if (to->expr.len > 0) {
4896 if (add->len.max > 0) {
4897 if (to->expr.len > (int )add->len.max)
4898 to->expr.len = (int )add->len.max;
4899
4900 if (to->expr.mmd.max == 0)
4901 select_opt_exact_info(enc, &to->exb, &to->expr);
4902 else
4903 select_opt_exact_info(enc, &to->exm, &to->expr);
4904 }
4905 }
4906 else if (add->expr.len > 0) {
4907 copy_opt_exact_info(&to->expr, &add->expr);
4908 }
4909
4910 select_opt_map_info(&to->map, &add->map);
4911
4912 add_mml(&to->len, &add->len);
4913}
4914
4915static void
4916alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
4917{
4918 alt_merge_opt_anc_info (&to->anc, &add->anc);
4919 alt_merge_opt_exact_info(&to->exb, &add->exb, env);
4920 alt_merge_opt_exact_info(&to->exm, &add->exm, env);
4921 alt_merge_opt_exact_info(&to->expr, &add->expr, env);
4922 alt_merge_opt_map_info(env->enc, &to->map, &add->map);
4923
4924 alt_merge_mml(&to->len, &add->len);
4925}
4926
4927
4928#define MAX_NODE_OPT_INFO_REF_COUNT 5
4929
4930static int
4931optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
4932{
4933 int type;
4934 int r = 0;
4935
4936 clear_node_opt_info(opt);
4937 set_bound_node_opt_info(opt, &env->mmd);
4938
4939 type = NTYPE(node);
4940 switch (type) {
4941 case NT_LIST:
4942 {
4943 OptEnv nenv;
4944 NodeOptInfo nopt;
4945 Node* nd = node;
4946
4947 copy_opt_env(&nenv, env);
4948 do {
4949 r = optimize_node_left(NCAR(nd), &nopt, &nenv);
4950 if (r == 0) {
4951 add_mml(&nenv.mmd, &nopt.len);
4952 concat_left_node_opt_info(env->enc, opt, &nopt);
4953 }
4954 } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
4955 }
4956 break;
4957
4958 case NT_ALT:
4959 {
4960 NodeOptInfo nopt;
4961 Node* nd = node;
4962
4963 do {
4964 r = optimize_node_left(NCAR(nd), &nopt, env);
4965 if (r == 0) {
4966 if (nd == node) copy_node_opt_info(opt, &nopt);
4967 else alt_merge_node_opt_info(opt, &nopt, env);
4968 }
4969 } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
4970 }
4971 break;
4972
4973 case NT_STR:
4974 {
4975 StrNode* sn = NSTR(node);
4976 OnigDistance slen = sn->end - sn->s;
4977 int is_raw = NSTRING_IS_RAW(node);
4978
4979 if (! NSTRING_IS_AMBIG(node)) {
4980 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4981 is_raw, env->enc);
4982 opt->exb.ignore_case = 0;
4983 if (slen > 0) {
4984 add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
4985 }
4986 set_mml(&opt->len, slen, slen);
4987 }
4988 else {
4989 OnigDistance max;
4990
4991 if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
4992 int n = onigenc_strlen(env->enc, sn->s, sn->end);
4993 max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
4994 }
4995 else {
4996 concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
4997 is_raw, env->enc);
4998 opt->exb.ignore_case = 1;
4999
5000 if (slen > 0) {
5001 r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
5002 env->enc, env->case_fold_flag);
5003 if (r != 0) break;
5004 }
5005
5006 max = slen;
5007 }
5008
5009 set_mml(&opt->len, slen, max);
5010 }
5011
5012 if ((OnigDistance )opt->exb.len == slen)
5013 opt->exb.reach_end = 1;
5014 }
5015 break;
5016
5017 case NT_CCLASS:
5018 {
5019 int i, z;
5020 CClassNode* cc = NCCLASS(node);
5021
5022 /* no need to check ignore case. (set in setup_tree()) */
5023
5024 if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
5025 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5026 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5027
5028 set_mml(&opt->len, min, max);
5029 }
5030 else {
5031 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5032 z = BITSET_AT(cc->bs, i);
5033 if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
5034 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5035 }
5036 }
5037 set_mml(&opt->len, 1, 1);
5038 }
5039 }
5040 break;
5041
5042 case NT_CTYPE:
5043 {
5044 int i, min, max;
5045 int maxcode;
5046
5047 max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5048
5049 if (max == 1) {
5050 min = 1;
5051
5052 maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
5053 switch (NCTYPE(node)->ctype) {
5054 case ONIGENC_CTYPE_WORD:
5055 if (NCTYPE(node)->not != 0) {
5056 for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
5057 if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
5058 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5059 }
5060 }
5061 }
5062 else {
5063 for (i = 0; i < maxcode; i++) {
5064 if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
5065 add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
5066 }
5067 }
5068 }
5069 break;
5070 }
5071 }
5072 else {
5073 min = ONIGENC_MBC_MINLEN(env->enc);
5074 }
5075 set_mml(&opt->len, min, max);
5076 }
5077 break;
5078
5079 case NT_CANY:
5080 {
5081 OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
5082 OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
5083 set_mml(&opt->len, min, max);
5084 }
5085 break;
5086
5087 case NT_ANCHOR:
5088 switch (NANCHOR(node)->type) {
5089 case ANCHOR_BEGIN_BUF:
5090 case ANCHOR_BEGIN_POSITION:
5091 case ANCHOR_BEGIN_LINE:
5092 case ANCHOR_END_BUF:
5093 case ANCHOR_SEMI_END_BUF:
5094 case ANCHOR_END_LINE:
5095 case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
5096 case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
5097 add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
5098 break;
5099
5100 case ANCHOR_PREC_READ:
5101 {
5102 NodeOptInfo nopt;
5103
5104 r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
5105 if (r == 0) {
5106 if (nopt.exb.len > 0)
5107 copy_opt_exact_info(&opt->expr, &nopt.exb);
5108 else if (nopt.exm.len > 0)
5109 copy_opt_exact_info(&opt->expr, &nopt.exm);
5110
5111 opt->expr.reach_end = 0;
5112
5113 if (nopt.map.value > 0)
5114 copy_opt_map_info(&opt->map, &nopt.map);
5115 }
5116 }
5117 break;
5118
5119 case ANCHOR_LOOK_BEHIND_NOT:
5120 break;
5121 }
5122 break;
5123
5124 case NT_BREF:
5125 {
5126 int i;
5127 int* backs;
5128 OnigDistance min, max, tmin, tmax;
5129 Node** nodes = SCANENV_MEM_NODES(env->scan_env);
5130 BRefNode* br = NBREF(node);
5131
5132 if (br->state & NST_RECURSION) {
5133 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5134 break;
5135 }
5136 backs = BACKREFS_P(br);
5137 r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
5138 if (r != 0) break;
5139 r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
5140 if (r != 0) break;
5141 for (i = 1; i < br->back_num; i++) {
5142 r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
5143 if (r != 0) break;
5144 r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
5145 if (r != 0) break;
5146 if (min > tmin) min = tmin;
5147 if (max < tmax) max = tmax;
5148 }
5149 if (r == 0) set_mml(&opt->len, min, max);
5150 }
5151 break;
5152
5153#ifdef USE_SUBEXP_CALL
5154 case NT_CALL:
5155 if (IS_CALL_RECURSION(NCALL(node)))
5156 set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
5157 else {
5158 OnigOptionType save = env->options;
5159 env->options = NENCLOSE(NCALL(node)->target)->option;
5160 r = optimize_node_left(NCALL(node)->target, opt, env);
5161 env->options = save;
5162 }
5163 break;
5164#endif
5165
5166 case NT_QTFR:
5167 {
5168 int i;
5169 OnigDistance min, max;
5170 NodeOptInfo nopt;
5171 QtfrNode* qn = NQTFR(node);
5172
5173 r = optimize_node_left(qn->target, &nopt, env);
5174 if (r) break;
5175
5176 if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
5177 if (env->mmd.max == 0 &&
5178 NTYPE(qn->target) == NT_CANY && qn->greedy) {
5179 if (IS_MULTILINE(env->options))
5180 /* implicit anchor: /.*a/ ==> /\A.*a/ */
5181 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
5182 else
5183 add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
5184 }
5185 }
5186 else {
5187 if (qn->lower > 0) {
5188 copy_node_opt_info(opt, &nopt);
5189 if (nopt.exb.len > 0) {
5190 if (nopt.exb.reach_end) {
5191 for (i = 2; i <= qn->lower &&
5192 ! is_full_opt_exact_info(&opt->exb); i++) {
5193 concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
5194 }
5195 if (i < qn->lower) {
5196 opt->exb.reach_end = 0;
5197 }
5198 }
5199 }
5200
5201 if (qn->lower != qn->upper) {
5202 opt->exb.reach_end = 0;
5203 opt->exm.reach_end = 0;
5204 }
5205 if (qn->lower > 1)
5206 opt->exm.reach_end = 0;
5207 }
5208 }
5209
5210 min = distance_multiply(nopt.len.min, qn->lower);
5211 if (IS_REPEAT_INFINITE(qn->upper))
5212 max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
5213 else
5214 max = distance_multiply(nopt.len.max, qn->upper);
5215
5216 set_mml(&opt->len, min, max);
5217 }
5218 break;
5219
5220 case NT_ENCLOSE:
5221 {
5222 EncloseNode* en = NENCLOSE(node);
5223
5224 switch (en->type) {
5225 case ENCLOSE_OPTION:
5226 {
5227 OnigOptionType save = env->options;
5228
5229 env->options = en->option;
5230 r = optimize_node_left(en->target, opt, env);
5231 env->options = save;
5232 }
5233 break;
5234
5235 case ENCLOSE_MEMORY:
5236#ifdef USE_SUBEXP_CALL
5237 en->opt_count++;
5238 if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
5239 OnigDistance min, max;
5240
5241 min = 0;
5242 max = ONIG_INFINITE_DISTANCE;
5243 if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
5244 if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
5245 set_mml(&opt->len, min, max);
5246 }
5247 else
5248#endif
5249 {
5250 r = optimize_node_left(en->target, opt, env);
5251
5252 if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
5253 if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
5254 remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
5255 }
5256 }
5257 break;
5258
5259 case ENCLOSE_STOP_BACKTRACK:
5260 case ENCLOSE_CONDITION:
5261 r = optimize_node_left(en->target, opt, env);
5262 break;
5263 }
5264 }
5265 break;
5266
5267 default:
5268#ifdef ONIG_DEBUG
5269 fprintf(stderr, "optimize_node_left: undefined node type %d\n",
5270 NTYPE(node));
5271#endif
5272 r = ONIGERR_TYPE_BUG;
5273 break;
5274 }
5275
5276 return r;
5277}
5278
5279static int
5280set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
5281{
5282 int r;
5283 int allow_reverse;
5284
5285 if (e->len == 0) return 0;
5286
5287 reg->exact = (UChar* )xmalloc(e->len);
5288 CHECK_NULL_RETURN_MEMERR(reg->exact);
5289 xmemcpy(reg->exact, e->s, e->len);
5290 reg->exact_end = reg->exact + e->len;
5291
5292 allow_reverse =
5293 ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
5294
5295 if (e->ignore_case > 0) {
5296 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5297 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5298 reg->map, &(reg->int_map), 1);
5299 if (r == 0) {
5300 reg->optimize = (allow_reverse != 0
5301 ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
5302 }
5303 else {
5304 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5305 }
5306 }
5307 else {
5308 reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
5309 }
5310 }
5311 else {
5312 if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
5313 r = set_bm_skip(reg->exact, reg->exact_end, reg,
5314 reg->map, &(reg->int_map), 0);
5315 if (r) return r;
5316
5317 reg->optimize = (allow_reverse != 0
5318 ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
5319 }
5320 else {
5321 reg->optimize = ONIG_OPTIMIZE_EXACT;
5322 }
5323 }
5324
5325 reg->dmin = e->mmd.min;
5326 reg->dmax = e->mmd.max;
5327
5328 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5329 reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
5330 }
5331
5332 return 0;
5333}
5334
5335static void
5336set_optimize_map_info(regex_t* reg, OptMapInfo* m)
5337{
5338 int i;
5339
5340 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5341 reg->map[i] = m->map[i];
5342
5343 reg->optimize = ONIG_OPTIMIZE_MAP;
5344 reg->dmin = m->mmd.min;
5345 reg->dmax = m->mmd.max;
5346
5347 if (reg->dmin != ONIG_INFINITE_DISTANCE) {
5348 reg->threshold_len = (int )(reg->dmin + 1);
5349 }
5350}
5351
5352static void
5353set_sub_anchor(regex_t* reg, OptAncInfo* anc)
5354{
5355 reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
5356 reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
5357}
5358
5359#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5360static void print_optimize_info(FILE* f, regex_t* reg);
5361#endif
5362
5363static int
5364set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
5365{
5366
5367 int r;
5368 NodeOptInfo opt;
5369 OptEnv env;
5370
5371 env.enc = reg->enc;
5372 env.options = reg->options;
5373 env.case_fold_flag = reg->case_fold_flag;
5374 env.scan_env = scan_env;
5375 clear_mml(&env.mmd);
5376
5377 r = optimize_node_left(node, &opt, &env);
5378 if (r) return r;
5379
5380 reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
5381 ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
5382 ANCHOR_LOOK_BEHIND);
5383
5384 reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
5385 ANCHOR_PREC_READ_NOT);
5386
5387 if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
5388 reg->anchor_dmin = opt.len.min;
5389 reg->anchor_dmax = opt.len.max;
5390 }
5391
5392 if (opt.exb.len > 0 || opt.exm.len > 0) {
5393 select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
5394 if (opt.map.value > 0 &&
5395 comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
5396 goto set_map;
5397 }
5398 else {
5399 r = set_optimize_exact_info(reg, &opt.exb);
5400 set_sub_anchor(reg, &opt.exb.anc);
5401 }
5402 }
5403 else if (opt.map.value > 0) {
5404 set_map:
5405 set_optimize_map_info(reg, &opt.map);
5406 set_sub_anchor(reg, &opt.map.anc);
5407 }
5408 else {
5409 reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
5410 if (opt.len.max == 0)
5411 reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
5412 }
5413
5414#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5415 print_optimize_info(stderr, reg);
5416#endif
5417 return r;
5418}
5419
5420static void
5421clear_optimize_info(regex_t* reg)
5422{
5423 reg->optimize = ONIG_OPTIMIZE_NONE;
5424 reg->anchor = 0;
5425 reg->anchor_dmin = 0;
5426 reg->anchor_dmax = 0;
5427 reg->sub_anchor = 0;
5428 reg->exact_end = (UChar* )NULL;
5429 reg->threshold_len = 0;
5430 if (IS_NOT_NULL(reg->exact)) {
5431 xfree(reg->exact);
5432 reg->exact = (UChar* )NULL;
5433 }
5434}
5435
5436#ifdef ONIG_DEBUG
5437
5438static void print_enc_string(FILE* fp, OnigEncoding enc,
5439 const UChar *s, const UChar *end)
5440{
5441 fprintf(fp, "\nPATTERN: /");
5442
5443 if (ONIGENC_MBC_MINLEN(enc) > 1) {
5444 const UChar *p;
5445 OnigCodePoint code;
5446
5447 p = s;
5448 while (p < end) {
5449 code = ONIGENC_MBC_TO_CODE(enc, p, end);
5450 if (code >= 0x80) {
5451 fprintf(fp, " 0x%04x ", (int )code);
5452 }
5453 else {
5454 fputc((int )code, fp);
5455 }
5456
5457 p += enclen(enc, p);
5458 }
5459 }
5460 else {
5461 while (s < end) {
5462 fputc((int )*s, fp);
5463 s++;
5464 }
5465 }
5466
5467 fprintf(fp, "/ (%s)\n", enc->name);
5468}
5469#endif /* ONIG_DEBUG */
5470
5471#if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
5472static void
5473print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
5474{
5475 if (a == ONIG_INFINITE_DISTANCE)
5476 fputs("inf", f);
5477 else
5478 fprintf(f, "(%"PRIuPTR")", a);
5479
5480 fputs("-", f);
5481
5482 if (b == ONIG_INFINITE_DISTANCE)
5483 fputs("inf", f);
5484 else
5485 fprintf(f, "(%"PRIuPTR")", b);
5486}
5487
5488static void
5489print_anchor(FILE* f, int anchor)
5490{
5491 int q = 0;
5492
5493 fprintf(f, "[");
5494
5495 if (anchor & ANCHOR_BEGIN_BUF) {
5496 fprintf(f, "begin-buf");
5497 q = 1;
5498 }
5499 if (anchor & ANCHOR_BEGIN_LINE) {
5500 if (q) fprintf(f, ", ");
5501 q = 1;
5502 fprintf(f, "begin-line");
5503 }
5504 if (anchor & ANCHOR_BEGIN_POSITION) {
5505 if (q) fprintf(f, ", ");
5506 q = 1;
5507 fprintf(f, "begin-pos");
5508 }
5509 if (anchor & ANCHOR_END_BUF) {
5510 if (q) fprintf(f, ", ");
5511 q = 1;
5512 fprintf(f, "end-buf");
5513 }
5514 if (anchor & ANCHOR_SEMI_END_BUF) {
5515 if (q) fprintf(f, ", ");
5516 q = 1;
5517 fprintf(f, "semi-end-buf");
5518 }
5519 if (anchor & ANCHOR_END_LINE) {
5520 if (q) fprintf(f, ", ");
5521 q = 1;
5522 fprintf(f, "end-line");
5523 }
5524 if (anchor & ANCHOR_ANYCHAR_STAR) {
5525 if (q) fprintf(f, ", ");
5526 q = 1;
5527 fprintf(f, "anychar-star");
5528 }
5529 if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
5530 if (q) fprintf(f, ", ");
5531 fprintf(f, "anychar-star-ml");
5532 }
5533
5534 fprintf(f, "]");
5535}
5536
5537static void
5538print_optimize_info(FILE* f, regex_t* reg)
5539{
5540 static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
5541 "EXACT_IC", "MAP",
5542 "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
5543
5544 fprintf(f, "optimize: %s\n", on[reg->optimize]);
5545 fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
5546 if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
5547 print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
5548 fprintf(f, "\n");
5549
5550 if (reg->optimize) {
5551 fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
5552 fprintf(f, "\n");
5553 }
5554 fprintf(f, "\n");
5555
5556 if (reg->exact) {
5557 UChar *p;
5558 fprintf(f, "exact: [");
5559 for (p = reg->exact; p < reg->exact_end; p++) {
5560 fputc(*p, f);
5561 }
5562 fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
5563 }
5564 else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
5565 int c, i, n = 0;
5566
5567 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
5568 if (reg->map[i]) n++;
5569
5570 fprintf(f, "map: n=%d\n", n);
5571 if (n > 0) {
5572 c = 0;
5573 fputc('[', f);
5574 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
5575 if (reg->map[i] != 0) {
5576 if (c > 0) fputs(", ", f);
5577 c++;
5578 if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
5579 ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
5580 fputc(i, f);
5581 else
5582 fprintf(f, "%d", i);
5583 }
5584 }
5585 fprintf(f, "]\n");
5586 }
5587 }
5588}
5589#endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
5590
5591
5592extern void
5593onig_free_body(regex_t* reg)
5594{
5595 if (IS_NOT_NULL(reg)) {
5596 if (IS_NOT_NULL(reg->p)) xfree(reg->p);
5597 if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
5598 if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
5599 if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
5600 if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
5601 if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
5602
5603#ifdef USE_NAMED_GROUP
5604 onig_names_free(reg);
5605#endif
5606 }
5607}
5608
5609extern void
5610onig_free(regex_t* reg)
5611{
5612 if (IS_NOT_NULL(reg)) {
5613 onig_free_body(reg);
5614 xfree(reg);
5615 }
5616}
5617
5618size_t
5619onig_memsize(const regex_t *reg)
5620{
5621 size_t size = sizeof(regex_t);
5622 if (IS_NULL(reg)) return 0;
5623 if (IS_NOT_NULL(reg->p)) size += reg->alloc;
5624 if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
5625 if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5626 if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
5627 if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
5628 if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
5629
5630 return size;
5631}
5632
5633size_t
5634onig_region_memsize(const OnigRegion *regs)
5635{
5636 size_t size = sizeof(*regs);
5637 if (IS_NULL(regs)) return 0;
5638 size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
5639 return size;
5640}
5641
5642#define REGEX_TRANSFER(to,from) do {\
5643 (to)->state = ONIG_STATE_MODIFY;\
5644 onig_free_body(to);\
5645 xmemcpy(to, from, sizeof(regex_t));\
5646 xfree(from);\
5647} while (0)
5648
5649extern void
5650onig_transfer(regex_t* to, regex_t* from)
5651{
5652 THREAD_ATOMIC_START;
5653 REGEX_TRANSFER(to, from);
5654 THREAD_ATOMIC_END;
5655}
5656
5657#define REGEX_CHAIN_HEAD(reg) do {\
5658 while (IS_NOT_NULL((reg)->chain)) {\
5659 (reg) = (reg)->chain;\
5660 }\
5661} while (0)
5662
5663extern void
5664onig_chain_link_add(regex_t* to, regex_t* add)
5665{
5666 THREAD_ATOMIC_START;
5667 REGEX_CHAIN_HEAD(to);
5668 to->chain = add;
5669 THREAD_ATOMIC_END;
5670}
5671
5672extern void
5673onig_chain_reduce(regex_t* reg)
5674{
5675 regex_t *head, *prev;
5676
5677 prev = reg;
5678 head = prev->chain;
5679 if (IS_NOT_NULL(head)) {
5680 reg->state = ONIG_STATE_MODIFY;
5681 while (IS_NOT_NULL(head->chain)) {
5682 prev = head;
5683 head = head->chain;
5684 }
5685 prev->chain = (regex_t* )NULL;
5686 REGEX_TRANSFER(reg, head);
5687 }
5688}
5689
5690#ifdef ONIG_DEBUG_COMPILE
5691static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
5692#endif
5693#ifdef ONIG_DEBUG_PARSE_TREE
5694static void print_tree P_((FILE* f, Node* node));
5695#endif
5696
5697extern int
5698onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5699 OnigErrorInfo* einfo)
5700{
5701#define COMPILE_INIT_SIZE 20
5702
5703 int r;
5704 OnigDistance init_size;
5705 Node* root;
5706 ScanEnv scan_env = {0};
5707#ifdef USE_SUBEXP_CALL
5708 UnsetAddrList uslist;
5709#endif
5710
5711 if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
5712
5713 reg->state = ONIG_STATE_COMPILING;
5714
5715#ifdef ONIG_DEBUG
5716 print_enc_string(stderr, reg->enc, pattern, pattern_end);
5717#endif
5718
5719 if (reg->alloc == 0) {
5720 init_size = (pattern_end - pattern) * 2;
5721 if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
5722 r = BBUF_INIT(reg, init_size);
5723 if (r != 0) goto end;
5724 }
5725 else
5726 reg->used = 0;
5727
5728 reg->num_mem = 0;
5729 reg->num_repeat = 0;
5730 reg->num_null_check = 0;
5731 reg->repeat_range_alloc = 0;
5732 reg->repeat_range = (OnigRepeatRange* )NULL;
5733#ifdef USE_COMBINATION_EXPLOSION_CHECK
5734 reg->num_comb_exp_check = 0;
5735#endif
5736
5737 r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
5738 if (r != 0) goto err;
5739
5740#ifdef USE_NAMED_GROUP
5741 /* mixed use named group and no-named group */
5742 if (scan_env.num_named > 0 &&
5743 IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
5744 !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
5745 if (scan_env.num_named != scan_env.num_mem)
5746 r = disable_noname_group_capture(&root, reg, &scan_env);
5747 else
5748 r = numbered_ref_check(root);
5749
5750 if (r != 0) goto err;
5751 }
5752#endif
5753
5754#ifdef USE_SUBEXP_CALL
5755 if (scan_env.num_call > 0) {
5756 r = unset_addr_list_init(&uslist, scan_env.num_call);
5757 if (r != 0) goto err;
5758 scan_env.unset_addr_list = &uslist;
5759 r = setup_subexp_call(root, &scan_env);
5760 if (r != 0) goto err_unset;
5761 r = subexp_recursive_check_trav(root, &scan_env);
5762 if (r < 0) goto err_unset;
5763 r = subexp_inf_recursive_check_trav(root, &scan_env);
5764 if (r != 0) goto err_unset;
5765
5766 reg->num_call = scan_env.num_call;
5767 }
5768 else
5769 reg->num_call = 0;
5770#endif
5771
5772 r = setup_tree(root, reg, IN_ROOT, &scan_env);
5773 if (r != 0) goto err_unset;
5774
5775#ifdef ONIG_DEBUG_PARSE_TREE
5776 print_tree(stderr, root);
5777#endif
5778
5779 reg->capture_history = scan_env.capture_history;
5780 reg->bt_mem_start = scan_env.bt_mem_start;
5781 reg->bt_mem_start |= reg->capture_history;
5782 if (IS_FIND_CONDITION(reg->options))
5783 BIT_STATUS_ON_ALL(reg->bt_mem_end);
5784 else {
5785 reg->bt_mem_end = scan_env.bt_mem_end;
5786 reg->bt_mem_end |= reg->capture_history;
5787 }
5788
5789#ifdef USE_COMBINATION_EXPLOSION_CHECK
5790 if (scan_env.backrefed_mem == 0
5791#ifdef USE_SUBEXP_CALL
5792 || scan_env.num_call == 0
5793#endif
5794 ) {
5795 setup_comb_exp_check(root, 0, &scan_env);
5796#ifdef USE_SUBEXP_CALL
5797 if (scan_env.has_recursion != 0) {
5798 scan_env.num_comb_exp_check = 0;
5799 }
5800 else
5801#endif
5802 if (scan_env.comb_exp_max_regnum > 0) {
5803 int i;
5804 for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
5805 if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
5806 scan_env.num_comb_exp_check = 0;
5807 break;
5808 }
5809 }
5810 }
5811 }
5812
5813 reg->num_comb_exp_check = scan_env.num_comb_exp_check;
5814#endif
5815
5816 clear_optimize_info(reg);
5817#ifndef ONIG_DONT_OPTIMIZE
5818 r = set_optimize_info_from_tree(root, reg, &scan_env);
5819 if (r != 0) goto err_unset;
5820#endif
5821
5822 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
5823 xfree(scan_env.mem_nodes_dynamic);
5824 scan_env.mem_nodes_dynamic = (Node** )NULL;
5825 }
5826
5827 r = compile_tree(root, reg);
5828 if (r == 0) {
5829 r = add_opcode(reg, OP_END);
5830#ifdef USE_SUBEXP_CALL
5831 if (scan_env.num_call > 0) {
5832 r = unset_addr_list_fix(&uslist, reg);
5833 unset_addr_list_end(&uslist);
5834 if (r) goto err;
5835 }
5836#endif
5837
5838 if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
5839 reg->stack_pop_level = STACK_POP_LEVEL_ALL;
5840 else {
5841 if (reg->bt_mem_start != 0)
5842 reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
5843 else
5844 reg->stack_pop_level = STACK_POP_LEVEL_FREE;
5845 }
5846 }
5847#ifdef USE_SUBEXP_CALL
5848 else if (scan_env.num_call > 0) {
5849 unset_addr_list_end(&uslist);
5850 }
5851#endif
5852 onig_node_free(root);
5853
5854#ifdef ONIG_DEBUG_COMPILE
5855#ifdef USE_NAMED_GROUP
5856 onig_print_names(stderr, reg);
5857#endif
5858 print_compiled_byte_code_list(stderr, reg);
5859#endif
5860
5861 end:
5862 reg->state = ONIG_STATE_NORMAL;
5863 return r;
5864
5865 err_unset:
5866#ifdef USE_SUBEXP_CALL
5867 if (scan_env.num_call > 0) {
5868 unset_addr_list_end(&uslist);
5869 }
5870#endif
5871 err:
5872 if (IS_NOT_NULL(scan_env.error)) {
5873 if (IS_NOT_NULL(einfo)) {
5874 einfo->enc = scan_env.enc;
5875 einfo->par = scan_env.error;
5876 einfo->par_end = scan_env.error_end;
5877 }
5878 }
5879
5880 onig_node_free(root);
5881 if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
5882 xfree(scan_env.mem_nodes_dynamic);
5883 return r;
5884}
5885
5886#ifdef USE_RECOMPILE_API
5887extern int
5888onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
5889 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5890 OnigErrorInfo* einfo)
5891{
5892 int r;
5893 regex_t *new_reg;
5894
5895 r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
5896 if (r) return r;
5897 if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
5898 onig_transfer(reg, new_reg);
5899 }
5900 else {
5901 onig_chain_link_add(reg, new_reg);
5902 }
5903 return 0;
5904}
5905#endif
5906
5907static int onig_inited = 0;
5908
5909extern int
5910onig_reg_init(regex_t* reg, OnigOptionType option,
5911 OnigCaseFoldType case_fold_flag,
5912 OnigEncoding enc, OnigSyntaxType* syntax)
5913{
5914 if (! onig_inited)
5915 onig_init();
5916
5917 if (IS_NULL(reg))
5918 return ONIGERR_INVALID_ARGUMENT;
5919
5920 if (ONIGENC_IS_UNDEF(enc))
5921 return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
5922
5923 if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
5924 == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
5925 return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
5926 }
5927
5928 (reg)->state = ONIG_STATE_MODIFY;
5929
5930 if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
5931 option |= syntax->options;
5932 option &= ~ONIG_OPTION_SINGLELINE;
5933 }
5934 else
5935 option |= syntax->options;
5936
5937 (reg)->enc = enc;
5938 (reg)->options = option;
5939 (reg)->syntax = syntax;
5940 (reg)->optimize = 0;
5941 (reg)->exact = (UChar* )NULL;
5942 (reg)->int_map = (int* )NULL;
5943 (reg)->int_map_backward = (int* )NULL;
5944 (reg)->chain = (regex_t* )NULL;
5945
5946 (reg)->p = (UChar* )NULL;
5947 (reg)->alloc = 0;
5948 (reg)->used = 0;
5949 (reg)->name_table = (void* )NULL;
5950
5951 (reg)->case_fold_flag = case_fold_flag;
5952 return 0;
5953}
5954
5955extern int
5956onig_new_without_alloc(regex_t* reg, const UChar* pattern,
5957 const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
5958 OnigSyntaxType* syntax, OnigErrorInfo* einfo)
5959{
5960 int r;
5961
5962 r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5963 if (r) return r;
5964
5965 r = onig_compile(reg, pattern, pattern_end, einfo);
5966 return r;
5967}
5968
5969extern int
5970onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
5971 OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
5972 OnigErrorInfo* einfo)
5973{
5974 int r;
5975
5976 *reg = (regex_t* )xmalloc(sizeof(regex_t));
5977 if (IS_NULL(*reg)) return ONIGERR_MEMORY;
5978
5979 r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
5980 if (r) goto err;
5981
5982 r = onig_compile(*reg, pattern, pattern_end, einfo);
5983 if (r) {
5984 err:
5985 onig_free(*reg);
5986 *reg = NULL;
5987 }
5988 return r;
5989}
5990
5991
5992extern int
5993onig_init(void)
5994{
5995 if (onig_inited != 0)
5996 return 0;
5997
5998 THREAD_SYSTEM_INIT;
5999 THREAD_ATOMIC_START;
6000
6001 onig_inited = 1;
6002
6003 onigenc_init();
6004 /* onigenc_set_default_caseconv_table((UChar* )0); */
6005
6006#ifdef ONIG_DEBUG_STATISTICS
6007 onig_statistics_init();
6008#endif
6009
6010 THREAD_ATOMIC_END;
6011 return 0;
6012}
6013
6014
6015static OnigEndCallListItemType* EndCallTop;
6016
6017extern void onig_add_end_call(void (*func)(void))
6018{
6019 OnigEndCallListItemType* item;
6020
6021 item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
6022 if (item == 0) return ;
6023
6024 item->next = EndCallTop;
6025 item->func = func;
6026
6027 EndCallTop = item;
6028}
6029
6030static void
6031exec_end_call_list(void)
6032{
6033 OnigEndCallListItemType* prev;
6034 void (*func)(void);
6035
6036 while (EndCallTop != 0) {
6037 func = EndCallTop->func;
6038 (*func)();
6039
6040 prev = EndCallTop;
6041 EndCallTop = EndCallTop->next;
6042 xfree(prev);
6043 }
6044}
6045
6046extern int
6047onig_end(void)
6048{
6049 THREAD_ATOMIC_START;
6050
6051 exec_end_call_list();
6052
6053#ifdef ONIG_DEBUG_STATISTICS
6054 onig_print_statistics(stderr);
6055#endif
6056
6057#ifdef USE_SHARED_CCLASS_TABLE
6058 onig_free_shared_cclass_table();
6059#endif
6060
6061#ifdef USE_PARSE_TREE_NODE_RECYCLE
6062 onig_free_node_list();
6063#endif
6064
6065 onig_inited = 0;
6066
6067 THREAD_ATOMIC_END;
6068 THREAD_SYSTEM_END;
6069 return 0;
6070}
6071
6072extern int
6073onig_is_in_code_range(const UChar* p, OnigCodePoint code)
6074{
6075 OnigCodePoint n, *data;
6076 OnigCodePoint low, high, x;
6077
6078 GET_CODE_POINT(n, p);
6079 data = (OnigCodePoint* )p;
6080 data++;
6081
6082 for (low = 0, high = n; low < high; ) {
6083 x = (low + high) >> 1;
6084 if (code > data[x * 2 + 1])
6085 low = x + 1;
6086 else
6087 high = x;
6088 }
6089
6090 return ((low < n && code >= data[low * 2]) ? 1 : 0);
6091}
6092
6093extern int
6094onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
6095{
6096 int found;
6097
6098 if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
6099 if (IS_NULL(cc->mbuf)) {
6100 found = 0;
6101 }
6102 else {
6103 found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
6104 }
6105 }
6106 else {
6107 found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
6108 }
6109
6110 if (IS_NCCLASS_NOT(cc))
6111 return !found;
6112 else
6113 return found;
6114}
6115
6116extern int
6117onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
6118{
6119 int len;
6120
6121 if (ONIGENC_MBC_MINLEN(enc) > 1) {
6122 len = 2;
6123 }
6124 else {
6125 len = ONIGENC_CODE_TO_MBCLEN(enc, code);
6126 }
6127 return onig_is_code_in_cc_len(len, code, cc);
6128}
6129
6130
6131#ifdef ONIG_DEBUG
6132
6133/* arguments type */
6134#define ARG_SPECIAL -1
6135#define ARG_NON 0
6136#define ARG_RELADDR 1
6137#define ARG_ABSADDR 2
6138#define ARG_LENGTH 3
6139#define ARG_MEMNUM 4
6140#define ARG_OPTION 5
6141#define ARG_STATE_CHECK 6
6142
6143OnigOpInfoType OnigOpInfo[] = {
6144 { OP_FINISH, "finish", ARG_NON },
6145 { OP_END, "end", ARG_NON },
6146 { OP_EXACT1, "exact1", ARG_SPECIAL },
6147 { OP_EXACT2, "exact2", ARG_SPECIAL },
6148 { OP_EXACT3, "exact3", ARG_SPECIAL },
6149 { OP_EXACT4, "exact4", ARG_SPECIAL },
6150 { OP_EXACT5, "exact5", ARG_SPECIAL },
6151 { OP_EXACTN, "exactn", ARG_SPECIAL },
6152 { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
6153 { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
6154 { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
6155 { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
6156 { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
6157 { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
6158 { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
6159 { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
6160 { OP_CCLASS, "cclass", ARG_SPECIAL },
6161 { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
6162 { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
6163 { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
6164 { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
6165 { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
6166 { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
6167 { OP_ANYCHAR, "anychar", ARG_NON },
6168 { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
6169 { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
6170 { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
6171 { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
6172 { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
6173 { OP_WORD, "word", ARG_NON },
6174 { OP_NOT_WORD, "not-word", ARG_NON },
6175 { OP_WORD_BOUND, "word-bound", ARG_NON },
6176 { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
6177 { OP_WORD_BEGIN, "word-begin", ARG_NON },
6178 { OP_WORD_END, "word-end", ARG_NON },
6179 { OP_ASCII_WORD, "ascii-word", ARG_NON },
6180 { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
6181 { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
6182 { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
6183 { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
6184 { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
6185 { OP_BEGIN_BUF, "begin-buf", ARG_NON },
6186 { OP_END_BUF, "end-buf", ARG_NON },
6187 { OP_BEGIN_LINE, "begin-line", ARG_NON },
6188 { OP_END_LINE, "end-line", ARG_NON },
6189 { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
6190 { OP_BEGIN_POSITION, "begin-position", ARG_NON },
6191 { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
6192 { OP_BACKREF1, "backref1", ARG_NON },
6193 { OP_BACKREF2, "backref2", ARG_NON },
6194 { OP_BACKREFN, "backrefn", ARG_MEMNUM },
6195 { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
6196 { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
6197 { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
6198 { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
6199 { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
6200 { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
6201 { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
6202 { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
6203 { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
6204 { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
6205 { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
6206 { OP_SET_OPTION, "set-option", ARG_OPTION },
6207 { OP_KEEP, "keep", ARG_NON },
6208 { OP_FAIL, "fail", ARG_NON },
6209 { OP_JUMP, "jump", ARG_RELADDR },
6210 { OP_PUSH, "push", ARG_RELADDR },
6211 { OP_POP, "pop", ARG_NON },
6212 { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
6213 { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
6214 { OP_REPEAT, "repeat", ARG_SPECIAL },
6215 { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
6216 { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
6217 { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
6218 { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
6219 { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
6220 { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
6221 { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
6222 { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
6223 { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
6224 { OP_PUSH_POS, "push-pos", ARG_NON },
6225 { OP_POP_POS, "pop-pos", ARG_NON },
6226 { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
6227 { OP_FAIL_POS, "fail-pos", ARG_NON },
6228 { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
6229 { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
6230 { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
6231 { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
6232 { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
6233 { OP_CALL, "call", ARG_ABSADDR },
6234 { OP_RETURN, "return", ARG_NON },
6235 { OP_CONDITION, "condition", ARG_SPECIAL },
6236 { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
6237 { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
6238 { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
6239 { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
6240 { OP_STATE_CHECK_ANYCHAR_ML_STAR,
6241 "state-check-anychar-ml*", ARG_STATE_CHECK },
6242 { -1, "", ARG_NON }
6243};
6244
6245static const char*
6246op2name(int opcode)
6247{
6248 int i;
6249
6250 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6251 if (opcode == OnigOpInfo[i].opcode)
6252 return OnigOpInfo[i].name;
6253 }
6254 return "";
6255}
6256
6257static int
6258op2arg_type(int opcode)
6259{
6260 int i;
6261
6262 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
6263 if (opcode == OnigOpInfo[i].opcode)
6264 return OnigOpInfo[i].arg_type;
6265 }
6266 return ARG_SPECIAL;
6267}
6268
6269#ifdef ONIG_DEBUG_PARSE_TREE
6270static void
6271Indent(FILE* f, int indent)
6272{
6273 int i;
6274 for (i = 0; i < indent; i++) putc(' ', f);
6275}
6276#endif /* ONIG_DEBUG_PARSE_TREE */
6277
6278static void
6279p_string(FILE* f, ptrdiff_t len, UChar* s)
6280{
6281 fputs(":", f);
6282 while (len-- > 0) { fputc(*s++, f); }
6283}
6284
6285static void
6286p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
6287{
6288 int x = len * mb_len;
6289
6290 fprintf(f, ":%d:", len);
6291 while (x-- > 0) { fputc(*s++, f); }
6292}
6293
6294extern void
6295onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
6296 OnigEncoding enc)
6297{
6298 int i, n, arg_type;
6299 RelAddrType addr;
6300 LengthType len;
6301 MemNumType mem;
6302 StateCheckNumType scn;
6303 OnigCodePoint code;
6304 UChar *q;
6305
6306 fprintf(f, "[%s", op2name(*bp));
6307 arg_type = op2arg_type(*bp);
6308 if (arg_type != ARG_SPECIAL) {
6309 bp++;
6310 switch (arg_type) {
6311 case ARG_NON:
6312 break;
6313 case ARG_RELADDR:
6314 GET_RELADDR_INC(addr, bp);
6315 fprintf(f, ":(+%d)", addr);
6316 break;
6317 case ARG_ABSADDR:
6318 GET_ABSADDR_INC(addr, bp);
6319 fprintf(f, ":(%d)", addr);
6320 break;
6321 case ARG_LENGTH:
6322 GET_LENGTH_INC(len, bp);
6323 fprintf(f, ":%d", len);
6324 break;
6325 case ARG_MEMNUM:
6326 mem = *((MemNumType* )bp);
6327 bp += SIZE_MEMNUM;
6328 fprintf(f, ":%d", mem);
6329 break;
6330 case ARG_OPTION:
6331 {
6332 OnigOptionType option = *((OnigOptionType* )bp);
6333 bp += SIZE_OPTION;
6334 fprintf(f, ":%d", option);
6335 }
6336 break;
6337
6338 case ARG_STATE_CHECK:
6339 scn = *((StateCheckNumType* )bp);
6340 bp += SIZE_STATE_CHECK_NUM;
6341 fprintf(f, ":%d", scn);
6342 break;
6343 }
6344 }
6345 else {
6346 switch (*bp++) {
6347 case OP_EXACT1:
6348 case OP_ANYCHAR_STAR_PEEK_NEXT:
6349 case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
6350 p_string(f, 1, bp++); break;
6351 case OP_EXACT2:
6352 p_string(f, 2, bp); bp += 2; break;
6353 case OP_EXACT3:
6354 p_string(f, 3, bp); bp += 3; break;
6355 case OP_EXACT4:
6356 p_string(f, 4, bp); bp += 4; break;
6357 case OP_EXACT5:
6358 p_string(f, 5, bp); bp += 5; break;
6359 case OP_EXACTN:
6360 GET_LENGTH_INC(len, bp);
6361 p_len_string(f, len, 1, bp);
6362 bp += len;
6363 break;
6364
6365 case OP_EXACTMB2N1:
6366 p_string(f, 2, bp); bp += 2; break;
6367 case OP_EXACTMB2N2:
6368 p_string(f, 4, bp); bp += 4; break;
6369 case OP_EXACTMB2N3:
6370 p_string(f, 6, bp); bp += 6; break;
6371 case OP_EXACTMB2N:
6372 GET_LENGTH_INC(len, bp);
6373 p_len_string(f, len, 2, bp);
6374 bp += len * 2;
6375 break;
6376 case OP_EXACTMB3N:
6377 GET_LENGTH_INC(len, bp);
6378 p_len_string(f, len, 3, bp);
6379 bp += len * 3;
6380 break;
6381 case OP_EXACTMBN:
6382 {
6383 int mb_len;
6384
6385 GET_LENGTH_INC(mb_len, bp);
6386 GET_LENGTH_INC(len, bp);
6387 fprintf(f, ":%d:%d:", mb_len, len);
6388 n = len * mb_len;
6389 while (n-- > 0) { fputc(*bp++, f); }
6390 }
6391 break;
6392
6393 case OP_EXACT1_IC:
6394 len = enclen(enc, bp);
6395 p_string(f, len, bp);
6396 bp += len;
6397 break;
6398 case OP_EXACTN_IC:
6399 GET_LENGTH_INC(len, bp);
6400 p_len_string(f, len, 1, bp);
6401 bp += len;
6402 break;
6403
6404 case OP_CCLASS:
6405 n = bitset_on_num((BitSetRef )bp);
6406 bp += SIZE_BITSET;
6407 fprintf(f, ":%d", n);
6408 break;
6409
6410 case OP_CCLASS_NOT:
6411 n = bitset_on_num((BitSetRef )bp);
6412 bp += SIZE_BITSET;
6413 fprintf(f, ":%d", n);
6414 break;
6415
6416 case OP_CCLASS_MB:
6417 case OP_CCLASS_MB_NOT:
6418 GET_LENGTH_INC(len, bp);
6419 q = bp;
6420#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6421 ALIGNMENT_RIGHT(q);
6422#endif
6423 GET_CODE_POINT(code, q);
6424 bp += len;
6425 fprintf(f, ":%d:%d", (int )code, len);
6426 break;
6427
6428 case OP_CCLASS_MIX:
6429 case OP_CCLASS_MIX_NOT:
6430 n = bitset_on_num((BitSetRef )bp);
6431 bp += SIZE_BITSET;
6432 GET_LENGTH_INC(len, bp);
6433 q = bp;
6434#ifndef PLATFORM_UNALIGNED_WORD_ACCESS
6435 ALIGNMENT_RIGHT(q);
6436#endif
6437 GET_CODE_POINT(code, q);
6438 bp += len;
6439 fprintf(f, ":%d:%d:%d", n, (int )code, len);
6440 break;
6441
6442 case OP_CCLASS_NODE:
6443 {
6444 CClassNode *cc;
6445
6446 GET_POINTER_INC(cc, bp);
6447 n = bitset_on_num(cc->bs);
6448 fprintf(f, ":%"PRIuPTR":%d", (uintptr_t )cc, n);
6449 }
6450 break;
6451
6452 case OP_BACKREFN_IC:
6453 mem = *((MemNumType* )bp);
6454 bp += SIZE_MEMNUM;
6455 fprintf(f, ":%d", mem);
6456 break;
6457
6458 case OP_BACKREF_MULTI_IC:
6459 case OP_BACKREF_MULTI:
6460 fputs(" ", f);
6461 GET_LENGTH_INC(len, bp);
6462 for (i = 0; i < len; i++) {
6463 GET_MEMNUM_INC(mem, bp);
6464 if (i > 0) fputs(", ", f);
6465 fprintf(f, "%d", mem);
6466 }
6467 break;
6468
6469 case OP_BACKREF_WITH_LEVEL:
6470 {
6471 OnigOptionType option;
6472 LengthType level;
6473
6474 GET_OPTION_INC(option, bp);
6475 fprintf(f, ":%d", option);
6476 GET_LENGTH_INC(level, bp);
6477 fprintf(f, ":%d", level);
6478
6479 fputs(" ", f);
6480 GET_LENGTH_INC(len, bp);
6481 for (i = 0; i < len; i++) {
6482 GET_MEMNUM_INC(mem, bp);
6483 if (i > 0) fputs(", ", f);
6484 fprintf(f, "%d", mem);
6485 }
6486 }
6487 break;
6488
6489 case OP_REPEAT:
6490 case OP_REPEAT_NG:
6491 {
6492 mem = *((MemNumType* )bp);
6493 bp += SIZE_MEMNUM;
6494 addr = *((RelAddrType* )bp);
6495 bp += SIZE_RELADDR;
6496 fprintf(f, ":%d:%d", mem, addr);
6497 }
6498 break;
6499
6500 case OP_PUSH_OR_JUMP_EXACT1:
6501 case OP_PUSH_IF_PEEK_NEXT:
6502 addr = *((RelAddrType* )bp);
6503 bp += SIZE_RELADDR;
6504 fprintf(f, ":(%d)", addr);
6505 p_string(f, 1, bp);
6506 bp += 1;
6507 break;
6508
6509 case OP_LOOK_BEHIND:
6510 GET_LENGTH_INC(len, bp);
6511 fprintf(f, ":%d", len);
6512 break;
6513
6514 case OP_PUSH_LOOK_BEHIND_NOT:
6515 GET_RELADDR_INC(addr, bp);
6516 GET_LENGTH_INC(len, bp);
6517 fprintf(f, ":%d:(%d)", len, addr);
6518 break;
6519
6520 case OP_STATE_CHECK_PUSH:
6521 case OP_STATE_CHECK_PUSH_OR_JUMP:
6522 scn = *((StateCheckNumType* )bp);
6523 bp += SIZE_STATE_CHECK_NUM;
6524 addr = *((RelAddrType* )bp);
6525 bp += SIZE_RELADDR;
6526 fprintf(f, ":%d:(%d)", scn, addr);
6527 break;
6528
6529 case OP_CONDITION:
6530 GET_MEMNUM_INC(mem, bp);
6531 GET_RELADDR_INC(addr, bp);
6532 fprintf(f, ":%d:(%d)", mem, addr);
6533 break;
6534
6535 default:
6536 fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
6537 *--bp);
6538 }
6539 }
6540 fputs("]", f);
6541 if (nextp) *nextp = bp;
6542}
6543
6544#ifdef ONIG_DEBUG_COMPILE
6545static void
6546print_compiled_byte_code_list(FILE* f, regex_t* reg)
6547{
6548 int ncode;
6549 UChar* bp = reg->p;
6550 UChar* end = reg->p + reg->used;
6551
6552 fprintf(f, "code length: %d", reg->used);
6553
6554 ncode = -1;
6555 while (bp < end) {
6556 ncode++;
6557 if (ncode % 5 == 0)
6558 fprintf(f, "\n%ld:", bp - reg->p);
6559 else
6560 fprintf(f, " %ld:", bp - reg->p);
6561 onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
6562 }
6563
6564 fprintf(f, "\n");
6565}
6566#endif /* ONIG_DEBUG_COMPILE */
6567
6568#ifdef ONIG_DEBUG_PARSE_TREE
6569static void
6570print_indent_tree(FILE* f, Node* node, int indent)
6571{
6572 int i, type, container_p = 0;
6573 int add = 3;
6574 UChar* p;
6575
6576 Indent(f, indent);
6577 if (IS_NULL(node)) {
6578 fprintf(f, "ERROR: null node!!!\n");
6579 exit (0);
6580 }
6581
6582 type = NTYPE(node);
6583 switch (type) {
6584 case NT_LIST:
6585 case NT_ALT:
6586 if (NTYPE(node) == NT_LIST)
6587 fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
6588 else
6589 fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
6590
6591 print_indent_tree(f, NCAR(node), indent + add);
6592 while (IS_NOT_NULL(node = NCDR(node))) {
6593 if (NTYPE(node) != type) {
6594 fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
6595 exit(0);
6596 }
6597 print_indent_tree(f, NCAR(node), indent + add);
6598 }
6599 break;
6600
6601 case NT_STR:
6602 fprintf(f, "<string%s:%"PRIxPTR">",
6603 (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
6604 for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
6605 if (*p >= 0x20 && *p < 0x7f)
6606 fputc(*p, f);
6607 else {
6608 fprintf(f, " 0x%02x", *p);
6609 }
6610 }
6611 break;
6612
6613 case NT_CCLASS:
6614 fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
6615 if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
6616 if (NCCLASS(node)->mbuf) {
6617 BBuf* bbuf = NCCLASS(node)->mbuf;
6618 for (i = 0; i < (int )bbuf->used; i++) {
6619 if (i > 0) fprintf(f, ",");
6620 fprintf(f, "%0x", bbuf->p[i]);
6621 }
6622 }
6623 break;
6624
6625 case NT_CTYPE:
6626 fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
6627 switch (NCTYPE(node)->ctype) {
6628 case ONIGENC_CTYPE_WORD:
6629 if (NCTYPE(node)->not != 0)
6630 fputs("not word", f);
6631 else
6632 fputs("word", f);
6633 break;
6634
6635 default:
6636 fprintf(f, "ERROR: undefined ctype.\n");
6637 exit(0);
6638 }
6639 break;
6640
6641 case NT_CANY:
6642 fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
6643 break;
6644
6645 case NT_ANCHOR:
6646 fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
6647 switch (NANCHOR(node)->type) {
6648 case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
6649 case ANCHOR_END_BUF: fputs("end buf", f); break;
6650 case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
6651 case ANCHOR_END_LINE: fputs("end line", f); break;
6652 case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
6653 case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
6654 case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
6655
6656 case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
6657 case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
6658#ifdef USE_WORD_BEGIN_END
6659 case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
6660 case ANCHOR_WORD_END: fputs("word end", f); break;
6661#endif
6662 case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
6663 case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
6664 case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
6665 case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
6666 case ANCHOR_KEEP: fputs("keep",f); break;
6667
6668 default:
6669 fprintf(f, "ERROR: undefined anchor type.\n");
6670 break;
6671 }
6672 break;
6673
6674 case NT_BREF:
6675 {
6676 int* p;
6677 BRefNode* br = NBREF(node);
6678 p = BACKREFS_P(br);
6679 fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
6680 for (i = 0; i < br->back_num; i++) {
6681 if (i > 0) fputs(", ", f);
6682 fprintf(f, "%d", p[i]);
6683 }
6684 }
6685 break;
6686
6687#ifdef USE_SUBEXP_CALL
6688 case NT_CALL:
6689 {
6690 CallNode* cn = NCALL(node);
6691 fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
6692 p_string(f, cn->name_end - cn->name, cn->name);
6693 }
6694 break;
6695#endif
6696
6697 case NT_QTFR:
6698 fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
6699 NQTFR(node)->lower, NQTFR(node)->upper,
6700 (NQTFR(node)->greedy ? "" : "?"));
6701 print_indent_tree(f, NQTFR(node)->target, indent + add);
6702 break;
6703
6704 case NT_ENCLOSE:
6705 fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
6706 switch (NENCLOSE(node)->type) {
6707 case ENCLOSE_OPTION:
6708 fprintf(f, "option:%d", NENCLOSE(node)->option);
6709 break;
6710 case ENCLOSE_MEMORY:
6711 fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
6712 break;
6713 case ENCLOSE_STOP_BACKTRACK:
6714 fprintf(f, "stop-bt");
6715 break;
6716 case ENCLOSE_CONDITION:
6717 fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
6718 break;
6719
6720 default:
6721 break;
6722 }
6723 fprintf(f, "\n");
6724 print_indent_tree(f, NENCLOSE(node)->target, indent + add);
6725 break;
6726
6727 default:
6728 fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
6729 break;
6730 }
6731
6732 if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
6733 type != NT_ENCLOSE)
6734 fprintf(f, "\n");
6735
6736 if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
6737
6738 fflush(f);
6739}
6740
6741static void
6742print_tree(FILE* f, Node* node)
6743{
6744 print_indent_tree(f, node, 0);
6745}
6746#endif /* ONIG_DEBUG_PARSE_TREE */
6747#endif /* ONIG_DEBUG */
Note: See TracBrowser for help on using the repository browser.