1 | /**********************************************************************
|
---|
2 | regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
|
---|
3 | **********************************************************************/
|
---|
4 | /*-
|
---|
5 | * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
---|
6 | * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
---|
7 | * All rights reserved.
|
---|
8 | *
|
---|
9 | * Redistribution and use in source and binary forms, with or without
|
---|
10 | * modification, are permitted provided that the following conditions
|
---|
11 | * are met:
|
---|
12 | * 1. Redistributions of source code must retain the above copyright
|
---|
13 | * notice, this list of conditions and the following disclaimer.
|
---|
14 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
15 | * notice, this list of conditions and the following disclaimer in the
|
---|
16 | * documentation and/or other materials provided with the distribution.
|
---|
17 | *
|
---|
18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
28 | * SUCH DAMAGE.
|
---|
29 | */
|
---|
30 |
|
---|
31 | #include "regparse.h"
|
---|
32 |
|
---|
33 | OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
|
---|
34 |
|
---|
35 | extern OnigCaseFoldType
|
---|
36 | onig_get_default_case_fold_flag(void)
|
---|
37 | {
|
---|
38 | return OnigDefaultCaseFoldFlag;
|
---|
39 | }
|
---|
40 |
|
---|
41 | extern int
|
---|
42 | onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
|
---|
43 | {
|
---|
44 | OnigDefaultCaseFoldFlag = case_fold_flag;
|
---|
45 | return 0;
|
---|
46 | }
|
---|
47 |
|
---|
48 |
|
---|
49 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
50 | static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
---|
51 | #endif
|
---|
52 |
|
---|
53 | #if 0
|
---|
54 | static UChar*
|
---|
55 | str_dup(UChar* s, UChar* end)
|
---|
56 | {
|
---|
57 | ptrdiff_t len = end - s;
|
---|
58 |
|
---|
59 | if (len > 0) {
|
---|
60 | UChar* r = (UChar* )xmalloc(len + 1);
|
---|
61 | CHECK_NULL_RETURN(r);
|
---|
62 | xmemcpy(r, s, len);
|
---|
63 | r[len] = (UChar )0;
|
---|
64 | return r;
|
---|
65 | }
|
---|
66 | else return NULL;
|
---|
67 | }
|
---|
68 | #endif
|
---|
69 |
|
---|
70 | static void
|
---|
71 | swap_node(Node* a, Node* b)
|
---|
72 | {
|
---|
73 | Node c;
|
---|
74 | c = *a; *a = *b; *b = c;
|
---|
75 |
|
---|
76 | if (NTYPE(a) == NT_STR) {
|
---|
77 | StrNode* sn = NSTR(a);
|
---|
78 | if (sn->capa == 0) {
|
---|
79 | size_t len = sn->end - sn->s;
|
---|
80 | sn->s = sn->buf;
|
---|
81 | sn->end = sn->s + len;
|
---|
82 | }
|
---|
83 | }
|
---|
84 |
|
---|
85 | if (NTYPE(b) == NT_STR) {
|
---|
86 | StrNode* sn = NSTR(b);
|
---|
87 | if (sn->capa == 0) {
|
---|
88 | size_t len = sn->end - sn->s;
|
---|
89 | sn->s = sn->buf;
|
---|
90 | sn->end = sn->s + len;
|
---|
91 | }
|
---|
92 | }
|
---|
93 | }
|
---|
94 |
|
---|
95 | static OnigDistance
|
---|
96 | distance_add(OnigDistance d1, OnigDistance d2)
|
---|
97 | {
|
---|
98 | if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
|
---|
99 | return ONIG_INFINITE_DISTANCE;
|
---|
100 | else {
|
---|
101 | if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
|
---|
102 | else return ONIG_INFINITE_DISTANCE;
|
---|
103 | }
|
---|
104 | }
|
---|
105 |
|
---|
106 | static OnigDistance
|
---|
107 | distance_multiply(OnigDistance d, int m)
|
---|
108 | {
|
---|
109 | if (m == 0) return 0;
|
---|
110 |
|
---|
111 | if (d < ONIG_INFINITE_DISTANCE / m)
|
---|
112 | return d * m;
|
---|
113 | else
|
---|
114 | return ONIG_INFINITE_DISTANCE;
|
---|
115 | }
|
---|
116 |
|
---|
117 | static int
|
---|
118 | bitset_is_empty(BitSetRef bs)
|
---|
119 | {
|
---|
120 | int i;
|
---|
121 | for (i = 0; i < BITSET_SIZE; i++) {
|
---|
122 | if (bs[i] != 0) return 0;
|
---|
123 | }
|
---|
124 | return 1;
|
---|
125 | }
|
---|
126 |
|
---|
127 | #ifdef ONIG_DEBUG
|
---|
128 | static int
|
---|
129 | bitset_on_num(BitSetRef bs)
|
---|
130 | {
|
---|
131 | int i, n;
|
---|
132 |
|
---|
133 | n = 0;
|
---|
134 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
135 | if (BITSET_AT(bs, i)) n++;
|
---|
136 | }
|
---|
137 | return n;
|
---|
138 | }
|
---|
139 | #endif
|
---|
140 |
|
---|
141 | extern int
|
---|
142 | onig_bbuf_init(BBuf* buf, OnigDistance size)
|
---|
143 | {
|
---|
144 | if (size <= 0) {
|
---|
145 | size = 0;
|
---|
146 | buf->p = NULL;
|
---|
147 | }
|
---|
148 | else {
|
---|
149 | buf->p = (UChar* )xmalloc(size);
|
---|
150 | if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
|
---|
151 | }
|
---|
152 |
|
---|
153 | buf->alloc = (unsigned int )size;
|
---|
154 | buf->used = 0;
|
---|
155 | return 0;
|
---|
156 | }
|
---|
157 |
|
---|
158 |
|
---|
159 | #ifdef USE_SUBEXP_CALL
|
---|
160 |
|
---|
161 | static int
|
---|
162 | unset_addr_list_init(UnsetAddrList* uslist, int size)
|
---|
163 | {
|
---|
164 | UnsetAddr* p;
|
---|
165 |
|
---|
166 | p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
|
---|
167 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
168 | uslist->num = 0;
|
---|
169 | uslist->alloc = size;
|
---|
170 | uslist->us = p;
|
---|
171 | return 0;
|
---|
172 | }
|
---|
173 |
|
---|
174 | static void
|
---|
175 | unset_addr_list_end(UnsetAddrList* uslist)
|
---|
176 | {
|
---|
177 | if (IS_NOT_NULL(uslist->us))
|
---|
178 | xfree(uslist->us);
|
---|
179 | }
|
---|
180 |
|
---|
181 | static int
|
---|
182 | unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
|
---|
183 | {
|
---|
184 | UnsetAddr* p;
|
---|
185 | int size;
|
---|
186 |
|
---|
187 | if (uslist->num >= uslist->alloc) {
|
---|
188 | size = uslist->alloc * 2;
|
---|
189 | p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
|
---|
190 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
191 | uslist->alloc = size;
|
---|
192 | uslist->us = p;
|
---|
193 | }
|
---|
194 |
|
---|
195 | uslist->us[uslist->num].offset = offset;
|
---|
196 | uslist->us[uslist->num].target = node;
|
---|
197 | uslist->num++;
|
---|
198 | return 0;
|
---|
199 | }
|
---|
200 | #endif /* USE_SUBEXP_CALL */
|
---|
201 |
|
---|
202 |
|
---|
203 | static int
|
---|
204 | add_opcode(regex_t* reg, int opcode)
|
---|
205 | {
|
---|
206 | BBUF_ADD1(reg, opcode);
|
---|
207 | return 0;
|
---|
208 | }
|
---|
209 |
|
---|
210 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
211 | static int
|
---|
212 | add_state_check_num(regex_t* reg, int num)
|
---|
213 | {
|
---|
214 | StateCheckNumType n = (StateCheckNumType )num;
|
---|
215 |
|
---|
216 | BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
|
---|
217 | return 0;
|
---|
218 | }
|
---|
219 | #endif
|
---|
220 |
|
---|
221 | static int
|
---|
222 | add_rel_addr(regex_t* reg, int addr)
|
---|
223 | {
|
---|
224 | RelAddrType ra = (RelAddrType )addr;
|
---|
225 |
|
---|
226 | BBUF_ADD(reg, &ra, SIZE_RELADDR);
|
---|
227 | return 0;
|
---|
228 | }
|
---|
229 |
|
---|
230 | static int
|
---|
231 | add_abs_addr(regex_t* reg, int addr)
|
---|
232 | {
|
---|
233 | AbsAddrType ra = (AbsAddrType )addr;
|
---|
234 |
|
---|
235 | BBUF_ADD(reg, &ra, SIZE_ABSADDR);
|
---|
236 | return 0;
|
---|
237 | }
|
---|
238 |
|
---|
239 | static int
|
---|
240 | add_length(regex_t* reg, OnigDistance len)
|
---|
241 | {
|
---|
242 | LengthType l = (LengthType )len;
|
---|
243 |
|
---|
244 | BBUF_ADD(reg, &l, SIZE_LENGTH);
|
---|
245 | return 0;
|
---|
246 | }
|
---|
247 |
|
---|
248 | static int
|
---|
249 | add_mem_num(regex_t* reg, int num)
|
---|
250 | {
|
---|
251 | MemNumType n = (MemNumType )num;
|
---|
252 |
|
---|
253 | BBUF_ADD(reg, &n, SIZE_MEMNUM);
|
---|
254 | return 0;
|
---|
255 | }
|
---|
256 |
|
---|
257 | #if 0
|
---|
258 | static int
|
---|
259 | add_pointer(regex_t* reg, void* addr)
|
---|
260 | {
|
---|
261 | PointerType ptr = (PointerType )addr;
|
---|
262 |
|
---|
263 | BBUF_ADD(reg, &ptr, SIZE_POINTER);
|
---|
264 | return 0;
|
---|
265 | }
|
---|
266 | #endif
|
---|
267 |
|
---|
268 | static int
|
---|
269 | add_option(regex_t* reg, OnigOptionType option)
|
---|
270 | {
|
---|
271 | BBUF_ADD(reg, &option, SIZE_OPTION);
|
---|
272 | return 0;
|
---|
273 | }
|
---|
274 |
|
---|
275 | static int
|
---|
276 | add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
|
---|
277 | {
|
---|
278 | int r;
|
---|
279 |
|
---|
280 | r = add_opcode(reg, opcode);
|
---|
281 | if (r) return r;
|
---|
282 | r = add_rel_addr(reg, addr);
|
---|
283 | return r;
|
---|
284 | }
|
---|
285 |
|
---|
286 | static int
|
---|
287 | add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
|
---|
288 | {
|
---|
289 | BBUF_ADD(reg, bytes, len);
|
---|
290 | return 0;
|
---|
291 | }
|
---|
292 |
|
---|
293 | static int
|
---|
294 | add_bitset(regex_t* reg, BitSetRef bs)
|
---|
295 | {
|
---|
296 | BBUF_ADD(reg, bs, SIZE_BITSET);
|
---|
297 | return 0;
|
---|
298 | }
|
---|
299 |
|
---|
300 | static int
|
---|
301 | add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
|
---|
302 | {
|
---|
303 | int r;
|
---|
304 |
|
---|
305 | r = add_opcode(reg, opcode);
|
---|
306 | if (r) return r;
|
---|
307 | r = add_option(reg, option);
|
---|
308 | return r;
|
---|
309 | }
|
---|
310 |
|
---|
311 | static int compile_length_tree(Node* node, regex_t* reg);
|
---|
312 | static int compile_tree(Node* node, regex_t* reg);
|
---|
313 |
|
---|
314 |
|
---|
315 | #define IS_NEED_STR_LEN_OP_EXACT(op) \
|
---|
316 | ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
|
---|
317 | (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
|
---|
318 |
|
---|
319 | static int
|
---|
320 | select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
|
---|
321 | {
|
---|
322 | int op;
|
---|
323 | OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
|
---|
324 |
|
---|
325 | if (ignore_case) {
|
---|
326 | switch (str_len) {
|
---|
327 | case 1: op = OP_EXACT1_IC; break;
|
---|
328 | default: op = OP_EXACTN_IC; break;
|
---|
329 | }
|
---|
330 | }
|
---|
331 | else {
|
---|
332 | switch (mb_len) {
|
---|
333 | case 1:
|
---|
334 | switch (str_len) {
|
---|
335 | case 1: op = OP_EXACT1; break;
|
---|
336 | case 2: op = OP_EXACT2; break;
|
---|
337 | case 3: op = OP_EXACT3; break;
|
---|
338 | case 4: op = OP_EXACT4; break;
|
---|
339 | case 5: op = OP_EXACT5; break;
|
---|
340 | default: op = OP_EXACTN; break;
|
---|
341 | }
|
---|
342 | break;
|
---|
343 |
|
---|
344 | case 2:
|
---|
345 | switch (str_len) {
|
---|
346 | case 1: op = OP_EXACTMB2N1; break;
|
---|
347 | case 2: op = OP_EXACTMB2N2; break;
|
---|
348 | case 3: op = OP_EXACTMB2N3; break;
|
---|
349 | default: op = OP_EXACTMB2N; break;
|
---|
350 | }
|
---|
351 | break;
|
---|
352 |
|
---|
353 | case 3:
|
---|
354 | op = OP_EXACTMB3N;
|
---|
355 | break;
|
---|
356 |
|
---|
357 | default:
|
---|
358 | op = OP_EXACTMBN;
|
---|
359 | break;
|
---|
360 | }
|
---|
361 | }
|
---|
362 | return op;
|
---|
363 | }
|
---|
364 |
|
---|
365 | static int
|
---|
366 | compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
|
---|
367 | {
|
---|
368 | int r;
|
---|
369 | int saved_num_null_check = reg->num_null_check;
|
---|
370 |
|
---|
371 | if (empty_info != 0) {
|
---|
372 | r = add_opcode(reg, OP_NULL_CHECK_START);
|
---|
373 | if (r) return r;
|
---|
374 | r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
|
---|
375 | if (r) return r;
|
---|
376 | reg->num_null_check++;
|
---|
377 | }
|
---|
378 |
|
---|
379 | r = compile_tree(node, reg);
|
---|
380 | if (r) return r;
|
---|
381 |
|
---|
382 | if (empty_info != 0) {
|
---|
383 | if (empty_info == NQ_TARGET_IS_EMPTY)
|
---|
384 | r = add_opcode(reg, OP_NULL_CHECK_END);
|
---|
385 | else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
|
---|
386 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
|
---|
387 | else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
|
---|
388 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
|
---|
389 |
|
---|
390 | if (r) return r;
|
---|
391 | r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
|
---|
392 | }
|
---|
393 | return r;
|
---|
394 | }
|
---|
395 |
|
---|
396 | #ifdef USE_SUBEXP_CALL
|
---|
397 | static int
|
---|
398 | compile_call(CallNode* node, regex_t* reg)
|
---|
399 | {
|
---|
400 | int r;
|
---|
401 |
|
---|
402 | r = add_opcode(reg, OP_CALL);
|
---|
403 | if (r) return r;
|
---|
404 | r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
|
---|
405 | node->target);
|
---|
406 | if (r) return r;
|
---|
407 | r = add_abs_addr(reg, 0 /*dummy addr.*/);
|
---|
408 | return r;
|
---|
409 | }
|
---|
410 | #endif
|
---|
411 |
|
---|
412 | static int
|
---|
413 | compile_tree_n_times(Node* node, int n, regex_t* reg)
|
---|
414 | {
|
---|
415 | int i, r;
|
---|
416 |
|
---|
417 | for (i = 0; i < n; i++) {
|
---|
418 | r = compile_tree(node, reg);
|
---|
419 | if (r) return r;
|
---|
420 | }
|
---|
421 | return 0;
|
---|
422 | }
|
---|
423 |
|
---|
424 | static int
|
---|
425 | add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
|
---|
426 | regex_t* reg ARG_UNUSED, int ignore_case)
|
---|
427 | {
|
---|
428 | int len;
|
---|
429 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
430 |
|
---|
431 | len = SIZE_OPCODE;
|
---|
432 |
|
---|
433 | if (op == OP_EXACTMBN) len += SIZE_LENGTH;
|
---|
434 | if (IS_NEED_STR_LEN_OP_EXACT(op))
|
---|
435 | len += SIZE_LENGTH;
|
---|
436 |
|
---|
437 | len += (int )byte_len;
|
---|
438 | return len;
|
---|
439 | }
|
---|
440 |
|
---|
441 | static int
|
---|
442 | add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
|
---|
443 | regex_t* reg, int ignore_case)
|
---|
444 | {
|
---|
445 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
446 | add_opcode(reg, op);
|
---|
447 |
|
---|
448 | if (op == OP_EXACTMBN)
|
---|
449 | add_length(reg, mb_len);
|
---|
450 |
|
---|
451 | if (IS_NEED_STR_LEN_OP_EXACT(op)) {
|
---|
452 | if (op == OP_EXACTN_IC)
|
---|
453 | add_length(reg, byte_len);
|
---|
454 | else
|
---|
455 | add_length(reg, byte_len / mb_len);
|
---|
456 | }
|
---|
457 |
|
---|
458 | add_bytes(reg, s, byte_len);
|
---|
459 | return 0;
|
---|
460 | }
|
---|
461 |
|
---|
462 |
|
---|
463 | static int
|
---|
464 | compile_length_string_node(Node* node, regex_t* reg)
|
---|
465 | {
|
---|
466 | int rlen, r, len, prev_len, blen, ambig;
|
---|
467 | OnigEncoding enc = reg->enc;
|
---|
468 | UChar *p, *prev;
|
---|
469 | StrNode* sn;
|
---|
470 |
|
---|
471 | sn = NSTR(node);
|
---|
472 | if (sn->end <= sn->s)
|
---|
473 | return 0;
|
---|
474 |
|
---|
475 | ambig = NSTRING_IS_AMBIG(node);
|
---|
476 |
|
---|
477 | p = prev = sn->s;
|
---|
478 | prev_len = enclen(enc, p, sn->end);
|
---|
479 | p += prev_len;
|
---|
480 | blen = prev_len;
|
---|
481 | rlen = 0;
|
---|
482 |
|
---|
483 | for (; p < sn->end; ) {
|
---|
484 | len = enclen(enc, p, sn->end);
|
---|
485 | if (len == prev_len || ambig) {
|
---|
486 | blen += len;
|
---|
487 | }
|
---|
488 | else {
|
---|
489 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
490 | rlen += r;
|
---|
491 | prev = p;
|
---|
492 | blen = len;
|
---|
493 | prev_len = len;
|
---|
494 | }
|
---|
495 | p += len;
|
---|
496 | }
|
---|
497 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
498 | rlen += r;
|
---|
499 | return rlen;
|
---|
500 | }
|
---|
501 |
|
---|
502 | static int
|
---|
503 | compile_length_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
504 | {
|
---|
505 | if (sn->end <= sn->s)
|
---|
506 | return 0;
|
---|
507 |
|
---|
508 | return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
509 | }
|
---|
510 |
|
---|
511 | static int
|
---|
512 | compile_string_node(Node* node, regex_t* reg)
|
---|
513 | {
|
---|
514 | int r, len, prev_len, blen, ambig;
|
---|
515 | OnigEncoding enc = reg->enc;
|
---|
516 | UChar *p, *prev, *end;
|
---|
517 | StrNode* sn;
|
---|
518 |
|
---|
519 | sn = NSTR(node);
|
---|
520 | if (sn->end <= sn->s)
|
---|
521 | return 0;
|
---|
522 |
|
---|
523 | end = sn->end;
|
---|
524 | ambig = NSTRING_IS_AMBIG(node);
|
---|
525 |
|
---|
526 | p = prev = sn->s;
|
---|
527 | prev_len = enclen(enc, p, end);
|
---|
528 | p += prev_len;
|
---|
529 | blen = prev_len;
|
---|
530 |
|
---|
531 | for (; p < end; ) {
|
---|
532 | len = enclen(enc, p, end);
|
---|
533 | if (len == prev_len || ambig) {
|
---|
534 | blen += len;
|
---|
535 | }
|
---|
536 | else {
|
---|
537 | r = add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
538 | if (r) return r;
|
---|
539 |
|
---|
540 | prev = p;
|
---|
541 | blen = len;
|
---|
542 | prev_len = len;
|
---|
543 | }
|
---|
544 |
|
---|
545 | p += len;
|
---|
546 | }
|
---|
547 | return add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
548 | }
|
---|
549 |
|
---|
550 | static int
|
---|
551 | compile_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
552 | {
|
---|
553 | if (sn->end <= sn->s)
|
---|
554 | return 0;
|
---|
555 |
|
---|
556 | return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
557 | }
|
---|
558 |
|
---|
559 | static int
|
---|
560 | add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
|
---|
561 | {
|
---|
562 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
563 | add_length(reg, mbuf->used);
|
---|
564 | return add_bytes(reg, mbuf->p, mbuf->used);
|
---|
565 | #else
|
---|
566 | int r, pad_size;
|
---|
567 | UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
|
---|
568 |
|
---|
569 | GET_ALIGNMENT_PAD_SIZE(p, pad_size);
|
---|
570 | add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
|
---|
571 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
572 |
|
---|
573 | r = add_bytes(reg, mbuf->p, mbuf->used);
|
---|
574 |
|
---|
575 | /* padding for return value from compile_length_cclass_node() to be fix. */
|
---|
576 | pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
|
---|
577 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
578 | return r;
|
---|
579 | #endif
|
---|
580 | }
|
---|
581 |
|
---|
582 | static int
|
---|
583 | compile_length_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
584 | {
|
---|
585 | int len;
|
---|
586 |
|
---|
587 | if (IS_NULL(cc->mbuf)) {
|
---|
588 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
589 | }
|
---|
590 | else {
|
---|
591 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
592 | len = SIZE_OPCODE;
|
---|
593 | }
|
---|
594 | else {
|
---|
595 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
596 | }
|
---|
597 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
598 | len += SIZE_LENGTH + cc->mbuf->used;
|
---|
599 | #else
|
---|
600 | len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
|
---|
601 | #endif
|
---|
602 | }
|
---|
603 |
|
---|
604 | return len;
|
---|
605 | }
|
---|
606 |
|
---|
607 | static int
|
---|
608 | compile_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
609 | {
|
---|
610 | int r;
|
---|
611 |
|
---|
612 | if (IS_NULL(cc->mbuf)) {
|
---|
613 | if (IS_NCCLASS_NOT(cc))
|
---|
614 | add_opcode(reg, OP_CCLASS_NOT);
|
---|
615 | else
|
---|
616 | add_opcode(reg, OP_CCLASS);
|
---|
617 |
|
---|
618 | r = add_bitset(reg, cc->bs);
|
---|
619 | }
|
---|
620 | else {
|
---|
621 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
622 | if (IS_NCCLASS_NOT(cc))
|
---|
623 | add_opcode(reg, OP_CCLASS_MB_NOT);
|
---|
624 | else
|
---|
625 | add_opcode(reg, OP_CCLASS_MB);
|
---|
626 |
|
---|
627 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
628 | }
|
---|
629 | else {
|
---|
630 | if (IS_NCCLASS_NOT(cc))
|
---|
631 | add_opcode(reg, OP_CCLASS_MIX_NOT);
|
---|
632 | else
|
---|
633 | add_opcode(reg, OP_CCLASS_MIX);
|
---|
634 |
|
---|
635 | r = add_bitset(reg, cc->bs);
|
---|
636 | if (r) return r;
|
---|
637 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
638 | }
|
---|
639 | }
|
---|
640 |
|
---|
641 | return r;
|
---|
642 | }
|
---|
643 |
|
---|
644 | static int
|
---|
645 | entry_repeat_range(regex_t* reg, int id, int lower, int upper)
|
---|
646 | {
|
---|
647 | #define REPEAT_RANGE_ALLOC 4
|
---|
648 |
|
---|
649 | OnigRepeatRange* p;
|
---|
650 |
|
---|
651 | if (reg->repeat_range_alloc == 0) {
|
---|
652 | p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
|
---|
653 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
654 | reg->repeat_range = p;
|
---|
655 | reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
|
---|
656 | }
|
---|
657 | else if (reg->repeat_range_alloc <= id) {
|
---|
658 | int n;
|
---|
659 | n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
|
---|
660 | p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
|
---|
661 | sizeof(OnigRepeatRange) * n);
|
---|
662 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
663 | reg->repeat_range = p;
|
---|
664 | reg->repeat_range_alloc = n;
|
---|
665 | }
|
---|
666 | else {
|
---|
667 | p = reg->repeat_range;
|
---|
668 | }
|
---|
669 |
|
---|
670 | p[id].lower = lower;
|
---|
671 | p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
|
---|
672 | return 0;
|
---|
673 | }
|
---|
674 |
|
---|
675 | static int
|
---|
676 | compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
|
---|
677 | regex_t* reg)
|
---|
678 | {
|
---|
679 | int r;
|
---|
680 | int num_repeat = reg->num_repeat;
|
---|
681 |
|
---|
682 | r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
|
---|
683 | if (r) return r;
|
---|
684 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
685 | reg->num_repeat++;
|
---|
686 | if (r) return r;
|
---|
687 | r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
|
---|
688 | if (r) return r;
|
---|
689 |
|
---|
690 | r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
|
---|
691 | if (r) return r;
|
---|
692 |
|
---|
693 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
694 | if (r) return r;
|
---|
695 |
|
---|
696 | if (
|
---|
697 | #ifdef USE_SUBEXP_CALL
|
---|
698 | reg->num_call > 0 ||
|
---|
699 | #endif
|
---|
700 | IS_QUANTIFIER_IN_REPEAT(qn)) {
|
---|
701 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
|
---|
702 | }
|
---|
703 | else {
|
---|
704 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
|
---|
705 | }
|
---|
706 | if (r) return r;
|
---|
707 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
708 | return r;
|
---|
709 | }
|
---|
710 |
|
---|
711 | static int
|
---|
712 | is_anychar_star_quantifier(QtfrNode* qn)
|
---|
713 | {
|
---|
714 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
---|
715 | NTYPE(qn->target) == NT_CANY)
|
---|
716 | return 1;
|
---|
717 | else
|
---|
718 | return 0;
|
---|
719 | }
|
---|
720 |
|
---|
721 | #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
|
---|
722 | #define CKN_ON (ckn > 0)
|
---|
723 |
|
---|
724 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
725 |
|
---|
726 | static int
|
---|
727 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
728 | {
|
---|
729 | int len, mod_tlen, cklen;
|
---|
730 | int ckn;
|
---|
731 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
732 | int empty_info = qn->target_empty_info;
|
---|
733 | int tlen = compile_length_tree(qn->target, reg);
|
---|
734 |
|
---|
735 | if (tlen < 0) return tlen;
|
---|
736 |
|
---|
737 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
738 |
|
---|
739 | cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
|
---|
740 |
|
---|
741 | /* anychar repeat */
|
---|
742 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
743 | if (qn->greedy && infinite) {
|
---|
744 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
|
---|
745 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
|
---|
746 | else
|
---|
747 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
|
---|
748 | }
|
---|
749 | }
|
---|
750 |
|
---|
751 | if (empty_info != 0)
|
---|
752 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
753 | else
|
---|
754 | mod_tlen = tlen;
|
---|
755 |
|
---|
756 | if (infinite && qn->lower <= 1) {
|
---|
757 | if (qn->greedy) {
|
---|
758 | if (qn->lower == 1)
|
---|
759 | len = SIZE_OP_JUMP;
|
---|
760 | else
|
---|
761 | len = 0;
|
---|
762 |
|
---|
763 | len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
|
---|
764 | }
|
---|
765 | else {
|
---|
766 | if (qn->lower == 0)
|
---|
767 | len = SIZE_OP_JUMP;
|
---|
768 | else
|
---|
769 | len = 0;
|
---|
770 |
|
---|
771 | len += mod_tlen + SIZE_OP_PUSH + cklen;
|
---|
772 | }
|
---|
773 | }
|
---|
774 | else if (qn->upper == 0) {
|
---|
775 | if (qn->is_refered != 0) /* /(?<n>..){0}/ */
|
---|
776 | len = SIZE_OP_JUMP + tlen;
|
---|
777 | else
|
---|
778 | len = 0;
|
---|
779 | }
|
---|
780 | else if (qn->upper == 1 && qn->greedy) {
|
---|
781 | if (qn->lower == 0) {
|
---|
782 | if (CKN_ON) {
|
---|
783 | len = SIZE_OP_STATE_CHECK_PUSH + tlen;
|
---|
784 | }
|
---|
785 | else {
|
---|
786 | len = SIZE_OP_PUSH + tlen;
|
---|
787 | }
|
---|
788 | }
|
---|
789 | else {
|
---|
790 | len = tlen;
|
---|
791 | }
|
---|
792 | }
|
---|
793 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
794 | len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
|
---|
795 | }
|
---|
796 | else {
|
---|
797 | len = SIZE_OP_REPEAT_INC
|
---|
798 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
799 | if (CKN_ON)
|
---|
800 | len += SIZE_OP_STATE_CHECK;
|
---|
801 | }
|
---|
802 |
|
---|
803 | return len;
|
---|
804 | }
|
---|
805 |
|
---|
806 | static int
|
---|
807 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
808 | {
|
---|
809 | int r, mod_tlen;
|
---|
810 | int ckn;
|
---|
811 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
812 | int empty_info = qn->target_empty_info;
|
---|
813 | int tlen = compile_length_tree(qn->target, reg);
|
---|
814 |
|
---|
815 | if (tlen < 0) return tlen;
|
---|
816 |
|
---|
817 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
818 |
|
---|
819 | if (is_anychar_star_quantifier(qn)) {
|
---|
820 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
821 | if (r) return r;
|
---|
822 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
|
---|
823 | if (IS_MULTILINE(reg->options))
|
---|
824 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
825 | else
|
---|
826 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
827 | if (r) return r;
|
---|
828 | if (CKN_ON) {
|
---|
829 | r = add_state_check_num(reg, ckn);
|
---|
830 | if (r) return r;
|
---|
831 | }
|
---|
832 |
|
---|
833 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
834 | }
|
---|
835 | else {
|
---|
836 | if (IS_MULTILINE(reg->options)) {
|
---|
837 | r = add_opcode(reg, (CKN_ON ?
|
---|
838 | OP_STATE_CHECK_ANYCHAR_ML_STAR
|
---|
839 | : OP_ANYCHAR_ML_STAR));
|
---|
840 | }
|
---|
841 | else {
|
---|
842 | r = add_opcode(reg, (CKN_ON ?
|
---|
843 | OP_STATE_CHECK_ANYCHAR_STAR
|
---|
844 | : OP_ANYCHAR_STAR));
|
---|
845 | }
|
---|
846 | if (r) return r;
|
---|
847 | if (CKN_ON)
|
---|
848 | r = add_state_check_num(reg, ckn);
|
---|
849 |
|
---|
850 | return r;
|
---|
851 | }
|
---|
852 | }
|
---|
853 |
|
---|
854 | if (empty_info != 0)
|
---|
855 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
856 | else
|
---|
857 | mod_tlen = tlen;
|
---|
858 |
|
---|
859 | if (infinite && qn->lower <= 1) {
|
---|
860 | if (qn->greedy) {
|
---|
861 | if (qn->lower == 1) {
|
---|
862 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
863 | (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
|
---|
864 | if (r) return r;
|
---|
865 | }
|
---|
866 |
|
---|
867 | if (CKN_ON) {
|
---|
868 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
869 | if (r) return r;
|
---|
870 | r = add_state_check_num(reg, ckn);
|
---|
871 | if (r) return r;
|
---|
872 | r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
|
---|
873 | }
|
---|
874 | else {
|
---|
875 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
876 | }
|
---|
877 | if (r) return r;
|
---|
878 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
879 | if (r) return r;
|
---|
880 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
881 | -(mod_tlen + (int )SIZE_OP_JUMP
|
---|
882 | + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
|
---|
883 | }
|
---|
884 | else {
|
---|
885 | if (qn->lower == 0) {
|
---|
886 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
887 | if (r) return r;
|
---|
888 | }
|
---|
889 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
890 | if (r) return r;
|
---|
891 | if (CKN_ON) {
|
---|
892 | r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
|
---|
893 | if (r) return r;
|
---|
894 | r = add_state_check_num(reg, ckn);
|
---|
895 | if (r) return r;
|
---|
896 | r = add_rel_addr(reg,
|
---|
897 | -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
|
---|
898 | }
|
---|
899 | else
|
---|
900 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
901 | }
|
---|
902 | }
|
---|
903 | else if (qn->upper == 0) {
|
---|
904 | if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
905 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
906 | if (r) return r;
|
---|
907 | r = compile_tree(qn->target, reg);
|
---|
908 | }
|
---|
909 | else
|
---|
910 | r = 0;
|
---|
911 | }
|
---|
912 | else if (qn->upper == 1 && qn->greedy) {
|
---|
913 | if (qn->lower == 0) {
|
---|
914 | if (CKN_ON) {
|
---|
915 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
916 | if (r) return r;
|
---|
917 | r = add_state_check_num(reg, ckn);
|
---|
918 | if (r) return r;
|
---|
919 | r = add_rel_addr(reg, tlen);
|
---|
920 | }
|
---|
921 | else {
|
---|
922 | r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
|
---|
923 | }
|
---|
924 | if (r) return r;
|
---|
925 | }
|
---|
926 |
|
---|
927 | r = compile_tree(qn->target, reg);
|
---|
928 | }
|
---|
929 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
930 | if (CKN_ON) {
|
---|
931 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
932 | if (r) return r;
|
---|
933 | r = add_state_check_num(reg, ckn);
|
---|
934 | if (r) return r;
|
---|
935 | r = add_rel_addr(reg, SIZE_OP_JUMP);
|
---|
936 | }
|
---|
937 | else {
|
---|
938 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
939 | }
|
---|
940 |
|
---|
941 | if (r) return r;
|
---|
942 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
943 | if (r) return r;
|
---|
944 | r = compile_tree(qn->target, reg);
|
---|
945 | }
|
---|
946 | else {
|
---|
947 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
948 | if (CKN_ON) {
|
---|
949 | if (r) return r;
|
---|
950 | r = add_opcode(reg, OP_STATE_CHECK);
|
---|
951 | if (r) return r;
|
---|
952 | r = add_state_check_num(reg, ckn);
|
---|
953 | }
|
---|
954 | }
|
---|
955 | return r;
|
---|
956 | }
|
---|
957 |
|
---|
958 | #else /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
959 |
|
---|
960 | static int
|
---|
961 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
962 | {
|
---|
963 | int len, mod_tlen;
|
---|
964 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
965 | int empty_info = qn->target_empty_info;
|
---|
966 | int tlen = compile_length_tree(qn->target, reg);
|
---|
967 |
|
---|
968 | if (tlen < 0) return tlen;
|
---|
969 |
|
---|
970 | /* anychar repeat */
|
---|
971 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
972 | if (qn->greedy && infinite) {
|
---|
973 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
974 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
|
---|
975 | else
|
---|
976 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
|
---|
977 | }
|
---|
978 | }
|
---|
979 |
|
---|
980 | if (empty_info != 0)
|
---|
981 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
982 | else
|
---|
983 | mod_tlen = tlen;
|
---|
984 |
|
---|
985 | if (infinite &&
|
---|
986 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
987 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
988 | len = SIZE_OP_JUMP;
|
---|
989 | }
|
---|
990 | else {
|
---|
991 | len = tlen * qn->lower;
|
---|
992 | }
|
---|
993 |
|
---|
994 | if (qn->greedy) {
|
---|
995 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
996 | if (IS_NOT_NULL(qn->head_exact))
|
---|
997 | len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
|
---|
998 | else
|
---|
999 | #endif
|
---|
1000 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
1001 | len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
|
---|
1002 | else
|
---|
1003 | len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
|
---|
1004 | }
|
---|
1005 | else
|
---|
1006 | len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
|
---|
1007 | }
|
---|
1008 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
1009 | len = SIZE_OP_JUMP + tlen;
|
---|
1010 | }
|
---|
1011 | else if (!infinite && qn->greedy &&
|
---|
1012 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
1013 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
1014 | len = tlen * qn->lower;
|
---|
1015 | len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
|
---|
1016 | }
|
---|
1017 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
1018 | len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
|
---|
1019 | }
|
---|
1020 | else {
|
---|
1021 | len = SIZE_OP_REPEAT_INC
|
---|
1022 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
1023 | }
|
---|
1024 |
|
---|
1025 | return len;
|
---|
1026 | }
|
---|
1027 |
|
---|
1028 | static int
|
---|
1029 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
1030 | {
|
---|
1031 | int i, r, mod_tlen;
|
---|
1032 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
1033 | int empty_info = qn->target_empty_info;
|
---|
1034 | int tlen = compile_length_tree(qn->target, reg);
|
---|
1035 |
|
---|
1036 | if (tlen < 0) return tlen;
|
---|
1037 |
|
---|
1038 | if (is_anychar_star_quantifier(qn)) {
|
---|
1039 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1040 | if (r) return r;
|
---|
1041 | if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
1042 | if (IS_MULTILINE(reg->options))
|
---|
1043 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
1044 | else
|
---|
1045 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
1046 | if (r) return r;
|
---|
1047 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
1048 | }
|
---|
1049 | else {
|
---|
1050 | if (IS_MULTILINE(reg->options))
|
---|
1051 | return add_opcode(reg, OP_ANYCHAR_ML_STAR);
|
---|
1052 | else
|
---|
1053 | return add_opcode(reg, OP_ANYCHAR_STAR);
|
---|
1054 | }
|
---|
1055 | }
|
---|
1056 |
|
---|
1057 | if (empty_info != 0)
|
---|
1058 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
1059 | else
|
---|
1060 | mod_tlen = tlen;
|
---|
1061 |
|
---|
1062 | if (infinite &&
|
---|
1063 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
1064 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
1065 | if (qn->greedy) {
|
---|
1066 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
1067 | if (IS_NOT_NULL(qn->head_exact))
|
---|
1068 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
|
---|
1069 | else
|
---|
1070 | #endif
|
---|
1071 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
1072 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
|
---|
1073 | else
|
---|
1074 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
|
---|
1075 | }
|
---|
1076 | else {
|
---|
1077 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
|
---|
1078 | }
|
---|
1079 | if (r) return r;
|
---|
1080 | }
|
---|
1081 | else {
|
---|
1082 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1083 | if (r) return r;
|
---|
1084 | }
|
---|
1085 |
|
---|
1086 | if (qn->greedy) {
|
---|
1087 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
1088 | if (IS_NOT_NULL(qn->head_exact)) {
|
---|
1089 | r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
|
---|
1090 | mod_tlen + SIZE_OP_JUMP);
|
---|
1091 | if (r) return r;
|
---|
1092 | add_bytes(reg, NSTR(qn->head_exact)->s, 1);
|
---|
1093 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1094 | if (r) return r;
|
---|
1095 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1096 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
|
---|
1097 | }
|
---|
1098 | else
|
---|
1099 | #endif
|
---|
1100 | if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
1101 | r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
|
---|
1102 | mod_tlen + SIZE_OP_JUMP);
|
---|
1103 | if (r) return r;
|
---|
1104 | add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
1105 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1106 | if (r) return r;
|
---|
1107 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1108 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
|
---|
1109 | }
|
---|
1110 | else {
|
---|
1111 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
1112 | if (r) return r;
|
---|
1113 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1114 | if (r) return r;
|
---|
1115 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1116 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
|
---|
1117 | }
|
---|
1118 | }
|
---|
1119 | else {
|
---|
1120 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
1121 | if (r) return r;
|
---|
1122 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1123 | if (r) return r;
|
---|
1124 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
1125 | }
|
---|
1126 | }
|
---|
1127 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
1128 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
1129 | if (r) return r;
|
---|
1130 | r = compile_tree(qn->target, reg);
|
---|
1131 | }
|
---|
1132 | else if (!infinite && qn->greedy &&
|
---|
1133 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
1134 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
1135 | int n = qn->upper - qn->lower;
|
---|
1136 |
|
---|
1137 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1138 | if (r) return r;
|
---|
1139 |
|
---|
1140 | for (i = 0; i < n; i++) {
|
---|
1141 | r = add_opcode_rel_addr(reg, OP_PUSH,
|
---|
1142 | (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
|
---|
1143 | if (r) return r;
|
---|
1144 | r = compile_tree(qn->target, reg);
|
---|
1145 | if (r) return r;
|
---|
1146 | }
|
---|
1147 | }
|
---|
1148 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
1149 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
1150 | if (r) return r;
|
---|
1151 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
1152 | if (r) return r;
|
---|
1153 | r = compile_tree(qn->target, reg);
|
---|
1154 | }
|
---|
1155 | else {
|
---|
1156 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
1157 | }
|
---|
1158 | return r;
|
---|
1159 | }
|
---|
1160 | #endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
1161 |
|
---|
1162 | static int
|
---|
1163 | compile_length_option_node(EncloseNode* node, regex_t* reg)
|
---|
1164 | {
|
---|
1165 | int tlen;
|
---|
1166 | OnigOptionType prev = reg->options;
|
---|
1167 |
|
---|
1168 | reg->options = node->option;
|
---|
1169 | tlen = compile_length_tree(node->target, reg);
|
---|
1170 | reg->options = prev;
|
---|
1171 |
|
---|
1172 | if (tlen < 0) return tlen;
|
---|
1173 |
|
---|
1174 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
1175 | return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
|
---|
1176 | + tlen + SIZE_OP_SET_OPTION;
|
---|
1177 | }
|
---|
1178 | else
|
---|
1179 | return tlen;
|
---|
1180 | }
|
---|
1181 |
|
---|
1182 | static int
|
---|
1183 | compile_option_node(EncloseNode* node, regex_t* reg)
|
---|
1184 | {
|
---|
1185 | int r;
|
---|
1186 | OnigOptionType prev = reg->options;
|
---|
1187 |
|
---|
1188 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
1189 | r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
|
---|
1190 | if (r) return r;
|
---|
1191 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
1192 | if (r) return r;
|
---|
1193 | r = add_opcode(reg, OP_FAIL);
|
---|
1194 | if (r) return r;
|
---|
1195 | }
|
---|
1196 |
|
---|
1197 | reg->options = node->option;
|
---|
1198 | r = compile_tree(node->target, reg);
|
---|
1199 | reg->options = prev;
|
---|
1200 |
|
---|
1201 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
1202 | if (r) return r;
|
---|
1203 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
1204 | }
|
---|
1205 | return r;
|
---|
1206 | }
|
---|
1207 |
|
---|
1208 | static int
|
---|
1209 | compile_length_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
1210 | {
|
---|
1211 | int len;
|
---|
1212 | int tlen;
|
---|
1213 |
|
---|
1214 | if (node->type == ENCLOSE_OPTION)
|
---|
1215 | return compile_length_option_node(node, reg);
|
---|
1216 |
|
---|
1217 | if (node->target) {
|
---|
1218 | tlen = compile_length_tree(node->target, reg);
|
---|
1219 | if (tlen < 0) return tlen;
|
---|
1220 | }
|
---|
1221 | else
|
---|
1222 | tlen = 0;
|
---|
1223 |
|
---|
1224 | switch (node->type) {
|
---|
1225 | case ENCLOSE_MEMORY:
|
---|
1226 | #ifdef USE_SUBEXP_CALL
|
---|
1227 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
1228 | len = SIZE_OP_MEMORY_START_PUSH + tlen
|
---|
1229 | + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
|
---|
1230 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1231 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1232 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
1233 | else
|
---|
1234 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1235 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
1236 | }
|
---|
1237 | else if (IS_ENCLOSE_RECURSION(node)) {
|
---|
1238 | len = SIZE_OP_MEMORY_START_PUSH;
|
---|
1239 | len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
|
---|
1240 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
|
---|
1241 | }
|
---|
1242 | else
|
---|
1243 | #endif
|
---|
1244 | {
|
---|
1245 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
1246 | len = SIZE_OP_MEMORY_START_PUSH;
|
---|
1247 | else
|
---|
1248 | len = SIZE_OP_MEMORY_START;
|
---|
1249 |
|
---|
1250 | len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
|
---|
1251 | ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
|
---|
1252 | }
|
---|
1253 | break;
|
---|
1254 |
|
---|
1255 | case ENCLOSE_STOP_BACKTRACK:
|
---|
1256 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
1257 | QtfrNode* qn = NQTFR(node->target);
|
---|
1258 | tlen = compile_length_tree(qn->target, reg);
|
---|
1259 | if (tlen < 0) return tlen;
|
---|
1260 |
|
---|
1261 | len = tlen * qn->lower
|
---|
1262 | + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
|
---|
1263 | }
|
---|
1264 | else {
|
---|
1265 | len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
|
---|
1266 | }
|
---|
1267 | break;
|
---|
1268 |
|
---|
1269 | case ENCLOSE_CONDITION:
|
---|
1270 | len = SIZE_OP_CONDITION;
|
---|
1271 | if (NTYPE(node->target) == NT_ALT) {
|
---|
1272 | Node* x = node->target;
|
---|
1273 |
|
---|
1274 | tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
1275 | if (tlen < 0) return tlen;
|
---|
1276 | len += tlen + SIZE_OP_JUMP;
|
---|
1277 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
1278 | x = NCDR(x);
|
---|
1279 | tlen = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
1280 | if (tlen < 0) return tlen;
|
---|
1281 | len += tlen;
|
---|
1282 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
1283 | }
|
---|
1284 | else {
|
---|
1285 | return ONIGERR_PARSER_BUG;
|
---|
1286 | }
|
---|
1287 | break;
|
---|
1288 |
|
---|
1289 | case ENCLOSE_ABSENT:
|
---|
1290 | len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
|
---|
1291 | break;
|
---|
1292 |
|
---|
1293 | default:
|
---|
1294 | return ONIGERR_TYPE_BUG;
|
---|
1295 | break;
|
---|
1296 | }
|
---|
1297 |
|
---|
1298 | return len;
|
---|
1299 | }
|
---|
1300 |
|
---|
1301 | static int get_char_length_tree(Node* node, regex_t* reg, int* len);
|
---|
1302 |
|
---|
1303 | static int
|
---|
1304 | compile_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
1305 | {
|
---|
1306 | int r, len;
|
---|
1307 |
|
---|
1308 | if (node->type == ENCLOSE_OPTION)
|
---|
1309 | return compile_option_node(node, reg);
|
---|
1310 |
|
---|
1311 | switch (node->type) {
|
---|
1312 | case ENCLOSE_MEMORY:
|
---|
1313 | #ifdef USE_SUBEXP_CALL
|
---|
1314 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
1315 | r = add_opcode(reg, OP_CALL);
|
---|
1316 | if (r) return r;
|
---|
1317 | node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
|
---|
1318 | node->state |= NST_ADDR_FIXED;
|
---|
1319 | r = add_abs_addr(reg, (int )node->call_addr);
|
---|
1320 | if (r) return r;
|
---|
1321 | len = compile_length_tree(node->target, reg);
|
---|
1322 | len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
|
---|
1323 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1324 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1325 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
1326 | else
|
---|
1327 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1328 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
1329 |
|
---|
1330 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
1331 | if (r) return r;
|
---|
1332 | }
|
---|
1333 | #endif
|
---|
1334 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
1335 | r = add_opcode(reg, OP_MEMORY_START_PUSH);
|
---|
1336 | else
|
---|
1337 | r = add_opcode(reg, OP_MEMORY_START);
|
---|
1338 | if (r) return r;
|
---|
1339 | r = add_mem_num(reg, node->regnum);
|
---|
1340 | if (r) return r;
|
---|
1341 | r = compile_tree(node->target, reg);
|
---|
1342 | if (r) return r;
|
---|
1343 | #ifdef USE_SUBEXP_CALL
|
---|
1344 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
1345 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1346 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
1347 | ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
|
---|
1348 | else
|
---|
1349 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
1350 | ? OP_MEMORY_END_REC : OP_MEMORY_END));
|
---|
1351 |
|
---|
1352 | if (r) return r;
|
---|
1353 | r = add_mem_num(reg, node->regnum);
|
---|
1354 | if (r) return r;
|
---|
1355 | r = add_opcode(reg, OP_RETURN);
|
---|
1356 | }
|
---|
1357 | else if (IS_ENCLOSE_RECURSION(node)) {
|
---|
1358 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1359 | r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
|
---|
1360 | else
|
---|
1361 | r = add_opcode(reg, OP_MEMORY_END_REC);
|
---|
1362 | if (r) return r;
|
---|
1363 | r = add_mem_num(reg, node->regnum);
|
---|
1364 | }
|
---|
1365 | else
|
---|
1366 | #endif
|
---|
1367 | {
|
---|
1368 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1369 | r = add_opcode(reg, OP_MEMORY_END_PUSH);
|
---|
1370 | else
|
---|
1371 | r = add_opcode(reg, OP_MEMORY_END);
|
---|
1372 | if (r) return r;
|
---|
1373 | r = add_mem_num(reg, node->regnum);
|
---|
1374 | }
|
---|
1375 | break;
|
---|
1376 |
|
---|
1377 | case ENCLOSE_STOP_BACKTRACK:
|
---|
1378 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
1379 | QtfrNode* qn = NQTFR(node->target);
|
---|
1380 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1381 | if (r) return r;
|
---|
1382 |
|
---|
1383 | len = compile_length_tree(qn->target, reg);
|
---|
1384 | if (len < 0) return len;
|
---|
1385 |
|
---|
1386 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
|
---|
1387 | if (r) return r;
|
---|
1388 | r = compile_tree(qn->target, reg);
|
---|
1389 | if (r) return r;
|
---|
1390 | r = add_opcode(reg, OP_POP);
|
---|
1391 | if (r) return r;
|
---|
1392 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1393 | -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
|
---|
1394 | }
|
---|
1395 | else {
|
---|
1396 | r = add_opcode(reg, OP_PUSH_STOP_BT);
|
---|
1397 | if (r) return r;
|
---|
1398 | r = compile_tree(node->target, reg);
|
---|
1399 | if (r) return r;
|
---|
1400 | r = add_opcode(reg, OP_POP_STOP_BT);
|
---|
1401 | }
|
---|
1402 | break;
|
---|
1403 |
|
---|
1404 | case ENCLOSE_CONDITION:
|
---|
1405 | r = add_opcode(reg, OP_CONDITION);
|
---|
1406 | if (r) return r;
|
---|
1407 | r = add_mem_num(reg, node->regnum);
|
---|
1408 | if (r) return r;
|
---|
1409 |
|
---|
1410 | if (NTYPE(node->target) == NT_ALT) {
|
---|
1411 | Node* x = node->target;
|
---|
1412 | int len2;
|
---|
1413 |
|
---|
1414 | len = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
1415 | if (len < 0) return len;
|
---|
1416 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
1417 | x = NCDR(x);
|
---|
1418 | len2 = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
1419 | if (len2 < 0) return len2;
|
---|
1420 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
1421 |
|
---|
1422 | x = node->target;
|
---|
1423 | r = add_rel_addr(reg, len + SIZE_OP_JUMP);
|
---|
1424 | if (r) return r;
|
---|
1425 | r = compile_tree(NCAR(x), reg); /* yes-node */
|
---|
1426 | if (r) return r;
|
---|
1427 | r = add_opcode_rel_addr(reg, OP_JUMP, len2);
|
---|
1428 | if (r) return r;
|
---|
1429 | x = NCDR(x);
|
---|
1430 | r = compile_tree(NCAR(x), reg); /* no-node */
|
---|
1431 | }
|
---|
1432 | else {
|
---|
1433 | return ONIGERR_PARSER_BUG;
|
---|
1434 | }
|
---|
1435 | break;
|
---|
1436 |
|
---|
1437 | case ENCLOSE_ABSENT:
|
---|
1438 | len = compile_length_tree(node->target, reg);
|
---|
1439 | if (len < 0) return len;
|
---|
1440 |
|
---|
1441 | r = add_opcode(reg, OP_PUSH_ABSENT_POS);
|
---|
1442 | if (r) return r;
|
---|
1443 | r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
|
---|
1444 | if (r) return r;
|
---|
1445 | r = compile_tree(node->target, reg);
|
---|
1446 | if (r) return r;
|
---|
1447 | r = add_opcode(reg, OP_ABSENT_END);
|
---|
1448 | break;
|
---|
1449 |
|
---|
1450 | default:
|
---|
1451 | return ONIGERR_TYPE_BUG;
|
---|
1452 | break;
|
---|
1453 | }
|
---|
1454 |
|
---|
1455 | return r;
|
---|
1456 | }
|
---|
1457 |
|
---|
1458 | static int
|
---|
1459 | compile_length_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
1460 | {
|
---|
1461 | int len;
|
---|
1462 | int tlen = 0;
|
---|
1463 |
|
---|
1464 | if (node->target) {
|
---|
1465 | tlen = compile_length_tree(node->target, reg);
|
---|
1466 | if (tlen < 0) return tlen;
|
---|
1467 | }
|
---|
1468 |
|
---|
1469 | switch (node->type) {
|
---|
1470 | case ANCHOR_PREC_READ:
|
---|
1471 | len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
|
---|
1472 | break;
|
---|
1473 | case ANCHOR_PREC_READ_NOT:
|
---|
1474 | len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
|
---|
1475 | break;
|
---|
1476 | case ANCHOR_LOOK_BEHIND:
|
---|
1477 | len = SIZE_OP_LOOK_BEHIND + tlen;
|
---|
1478 | break;
|
---|
1479 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
1480 | len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
|
---|
1481 | break;
|
---|
1482 |
|
---|
1483 | default:
|
---|
1484 | len = SIZE_OPCODE;
|
---|
1485 | break;
|
---|
1486 | }
|
---|
1487 |
|
---|
1488 | return len;
|
---|
1489 | }
|
---|
1490 |
|
---|
1491 | static int
|
---|
1492 | compile_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
1493 | {
|
---|
1494 | int r, len;
|
---|
1495 |
|
---|
1496 | switch (node->type) {
|
---|
1497 | case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
|
---|
1498 | case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
|
---|
1499 | case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
|
---|
1500 | case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
|
---|
1501 | case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
|
---|
1502 | case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
|
---|
1503 |
|
---|
1504 | case ANCHOR_WORD_BOUND:
|
---|
1505 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
|
---|
1506 | else r = add_opcode(reg, OP_WORD_BOUND);
|
---|
1507 | break;
|
---|
1508 | case ANCHOR_NOT_WORD_BOUND:
|
---|
1509 | if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
|
---|
1510 | else r = add_opcode(reg, OP_NOT_WORD_BOUND);
|
---|
1511 | break;
|
---|
1512 | #ifdef USE_WORD_BEGIN_END
|
---|
1513 | case ANCHOR_WORD_BEGIN:
|
---|
1514 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
|
---|
1515 | else r = add_opcode(reg, OP_WORD_BEGIN);
|
---|
1516 | break;
|
---|
1517 | case ANCHOR_WORD_END:
|
---|
1518 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
|
---|
1519 | else r = add_opcode(reg, OP_WORD_END);
|
---|
1520 | break;
|
---|
1521 | #endif
|
---|
1522 | case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
|
---|
1523 |
|
---|
1524 | case ANCHOR_PREC_READ:
|
---|
1525 | r = add_opcode(reg, OP_PUSH_POS);
|
---|
1526 | if (r) return r;
|
---|
1527 | r = compile_tree(node->target, reg);
|
---|
1528 | if (r) return r;
|
---|
1529 | r = add_opcode(reg, OP_POP_POS);
|
---|
1530 | break;
|
---|
1531 |
|
---|
1532 | case ANCHOR_PREC_READ_NOT:
|
---|
1533 | len = compile_length_tree(node->target, reg);
|
---|
1534 | if (len < 0) return len;
|
---|
1535 | r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
|
---|
1536 | if (r) return r;
|
---|
1537 | r = compile_tree(node->target, reg);
|
---|
1538 | if (r) return r;
|
---|
1539 | r = add_opcode(reg, OP_FAIL_POS);
|
---|
1540 | break;
|
---|
1541 |
|
---|
1542 | case ANCHOR_LOOK_BEHIND:
|
---|
1543 | {
|
---|
1544 | int n;
|
---|
1545 | r = add_opcode(reg, OP_LOOK_BEHIND);
|
---|
1546 | if (r) return r;
|
---|
1547 | if (node->char_len < 0) {
|
---|
1548 | r = get_char_length_tree(node->target, reg, &n);
|
---|
1549 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
1550 | }
|
---|
1551 | else
|
---|
1552 | n = node->char_len;
|
---|
1553 | r = add_length(reg, n);
|
---|
1554 | if (r) return r;
|
---|
1555 | r = compile_tree(node->target, reg);
|
---|
1556 | }
|
---|
1557 | break;
|
---|
1558 |
|
---|
1559 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
1560 | {
|
---|
1561 | int n;
|
---|
1562 | len = compile_length_tree(node->target, reg);
|
---|
1563 | r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
|
---|
1564 | len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
|
---|
1565 | if (r) return r;
|
---|
1566 | if (node->char_len < 0) {
|
---|
1567 | r = get_char_length_tree(node->target, reg, &n);
|
---|
1568 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
1569 | }
|
---|
1570 | else
|
---|
1571 | n = node->char_len;
|
---|
1572 | r = add_length(reg, n);
|
---|
1573 | if (r) return r;
|
---|
1574 | r = compile_tree(node->target, reg);
|
---|
1575 | if (r) return r;
|
---|
1576 | r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
|
---|
1577 | }
|
---|
1578 | break;
|
---|
1579 |
|
---|
1580 | default:
|
---|
1581 | return ONIGERR_TYPE_BUG;
|
---|
1582 | break;
|
---|
1583 | }
|
---|
1584 |
|
---|
1585 | return r;
|
---|
1586 | }
|
---|
1587 |
|
---|
1588 | static int
|
---|
1589 | compile_length_tree(Node* node, regex_t* reg)
|
---|
1590 | {
|
---|
1591 | int len, type, r;
|
---|
1592 |
|
---|
1593 | type = NTYPE(node);
|
---|
1594 | switch (type) {
|
---|
1595 | case NT_LIST:
|
---|
1596 | len = 0;
|
---|
1597 | do {
|
---|
1598 | r = compile_length_tree(NCAR(node), reg);
|
---|
1599 | if (r < 0) return r;
|
---|
1600 | len += r;
|
---|
1601 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
1602 | r = len;
|
---|
1603 | break;
|
---|
1604 |
|
---|
1605 | case NT_ALT:
|
---|
1606 | {
|
---|
1607 | int n = 0;
|
---|
1608 | len = 0;
|
---|
1609 | do {
|
---|
1610 | r = compile_length_tree(NCAR(node), reg);
|
---|
1611 | if (r < 0) return r;
|
---|
1612 | len += r;
|
---|
1613 | n++;
|
---|
1614 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
1615 | r = len;
|
---|
1616 | r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
|
---|
1617 | }
|
---|
1618 | break;
|
---|
1619 |
|
---|
1620 | case NT_STR:
|
---|
1621 | if (NSTRING_IS_RAW(node))
|
---|
1622 | r = compile_length_string_raw_node(NSTR(node), reg);
|
---|
1623 | else
|
---|
1624 | r = compile_length_string_node(node, reg);
|
---|
1625 | break;
|
---|
1626 |
|
---|
1627 | case NT_CCLASS:
|
---|
1628 | r = compile_length_cclass_node(NCCLASS(node), reg);
|
---|
1629 | break;
|
---|
1630 |
|
---|
1631 | case NT_CTYPE:
|
---|
1632 | case NT_CANY:
|
---|
1633 | r = SIZE_OPCODE;
|
---|
1634 | break;
|
---|
1635 |
|
---|
1636 | case NT_BREF:
|
---|
1637 | {
|
---|
1638 | BRefNode* br = NBREF(node);
|
---|
1639 |
|
---|
1640 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
1641 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
1642 | r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
|
---|
1643 | SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
1644 | }
|
---|
1645 | else
|
---|
1646 | #endif
|
---|
1647 | if (br->back_num == 1) {
|
---|
1648 | r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
|
---|
1649 | ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
|
---|
1650 | }
|
---|
1651 | else {
|
---|
1652 | r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
1653 | }
|
---|
1654 | }
|
---|
1655 | break;
|
---|
1656 |
|
---|
1657 | #ifdef USE_SUBEXP_CALL
|
---|
1658 | case NT_CALL:
|
---|
1659 | r = SIZE_OP_CALL;
|
---|
1660 | break;
|
---|
1661 | #endif
|
---|
1662 |
|
---|
1663 | case NT_QTFR:
|
---|
1664 | r = compile_length_quantifier_node(NQTFR(node), reg);
|
---|
1665 | break;
|
---|
1666 |
|
---|
1667 | case NT_ENCLOSE:
|
---|
1668 | r = compile_length_enclose_node(NENCLOSE(node), reg);
|
---|
1669 | break;
|
---|
1670 |
|
---|
1671 | case NT_ANCHOR:
|
---|
1672 | r = compile_length_anchor_node(NANCHOR(node), reg);
|
---|
1673 | break;
|
---|
1674 |
|
---|
1675 | default:
|
---|
1676 | return ONIGERR_TYPE_BUG;
|
---|
1677 | break;
|
---|
1678 | }
|
---|
1679 |
|
---|
1680 | return r;
|
---|
1681 | }
|
---|
1682 |
|
---|
1683 | static int
|
---|
1684 | compile_tree(Node* node, regex_t* reg)
|
---|
1685 | {
|
---|
1686 | int n, type, len, pos, r = 0;
|
---|
1687 |
|
---|
1688 | type = NTYPE(node);
|
---|
1689 | switch (type) {
|
---|
1690 | case NT_LIST:
|
---|
1691 | do {
|
---|
1692 | r = compile_tree(NCAR(node), reg);
|
---|
1693 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1694 | break;
|
---|
1695 |
|
---|
1696 | case NT_ALT:
|
---|
1697 | {
|
---|
1698 | Node* x = node;
|
---|
1699 | len = 0;
|
---|
1700 | do {
|
---|
1701 | len += compile_length_tree(NCAR(x), reg);
|
---|
1702 | if (NCDR(x) != NULL) {
|
---|
1703 | len += SIZE_OP_PUSH + SIZE_OP_JUMP;
|
---|
1704 | }
|
---|
1705 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
1706 | pos = reg->used + len; /* goal position */
|
---|
1707 |
|
---|
1708 | do {
|
---|
1709 | len = compile_length_tree(NCAR(node), reg);
|
---|
1710 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
1711 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
|
---|
1712 | if (r) break;
|
---|
1713 | }
|
---|
1714 | r = compile_tree(NCAR(node), reg);
|
---|
1715 | if (r) break;
|
---|
1716 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
1717 | len = pos - (reg->used + SIZE_OP_JUMP);
|
---|
1718 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
1719 | if (r) break;
|
---|
1720 | }
|
---|
1721 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
1722 | }
|
---|
1723 | break;
|
---|
1724 |
|
---|
1725 | case NT_STR:
|
---|
1726 | if (NSTRING_IS_RAW(node))
|
---|
1727 | r = compile_string_raw_node(NSTR(node), reg);
|
---|
1728 | else
|
---|
1729 | r = compile_string_node(node, reg);
|
---|
1730 | break;
|
---|
1731 |
|
---|
1732 | case NT_CCLASS:
|
---|
1733 | r = compile_cclass_node(NCCLASS(node), reg);
|
---|
1734 | break;
|
---|
1735 |
|
---|
1736 | case NT_CTYPE:
|
---|
1737 | {
|
---|
1738 | int op;
|
---|
1739 |
|
---|
1740 | switch (NCTYPE(node)->ctype) {
|
---|
1741 | case ONIGENC_CTYPE_WORD:
|
---|
1742 | if (NCTYPE(node)->ascii_range != 0) {
|
---|
1743 | if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
|
---|
1744 | else op = OP_ASCII_WORD;
|
---|
1745 | }
|
---|
1746 | else {
|
---|
1747 | if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
|
---|
1748 | else op = OP_WORD;
|
---|
1749 | }
|
---|
1750 | break;
|
---|
1751 | default:
|
---|
1752 | return ONIGERR_TYPE_BUG;
|
---|
1753 | break;
|
---|
1754 | }
|
---|
1755 | r = add_opcode(reg, op);
|
---|
1756 | }
|
---|
1757 | break;
|
---|
1758 |
|
---|
1759 | case NT_CANY:
|
---|
1760 | if (IS_MULTILINE(reg->options))
|
---|
1761 | r = add_opcode(reg, OP_ANYCHAR_ML);
|
---|
1762 | else
|
---|
1763 | r = add_opcode(reg, OP_ANYCHAR);
|
---|
1764 | break;
|
---|
1765 |
|
---|
1766 | case NT_BREF:
|
---|
1767 | {
|
---|
1768 | BRefNode* br = NBREF(node);
|
---|
1769 |
|
---|
1770 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
1771 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
1772 | r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
|
---|
1773 | if (r) return r;
|
---|
1774 | r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
|
---|
1775 | if (r) return r;
|
---|
1776 | r = add_length(reg, br->nest_level);
|
---|
1777 | if (r) return r;
|
---|
1778 |
|
---|
1779 | goto add_bacref_mems;
|
---|
1780 | }
|
---|
1781 | else
|
---|
1782 | #endif
|
---|
1783 | if (br->back_num == 1) {
|
---|
1784 | n = br->back_static[0];
|
---|
1785 | if (IS_IGNORECASE(reg->options)) {
|
---|
1786 | r = add_opcode(reg, OP_BACKREFN_IC);
|
---|
1787 | if (r) return r;
|
---|
1788 | r = add_mem_num(reg, n);
|
---|
1789 | }
|
---|
1790 | else {
|
---|
1791 | switch (n) {
|
---|
1792 | case 1: r = add_opcode(reg, OP_BACKREF1); break;
|
---|
1793 | case 2: r = add_opcode(reg, OP_BACKREF2); break;
|
---|
1794 | default:
|
---|
1795 | r = add_opcode(reg, OP_BACKREFN);
|
---|
1796 | if (r) return r;
|
---|
1797 | r = add_mem_num(reg, n);
|
---|
1798 | break;
|
---|
1799 | }
|
---|
1800 | }
|
---|
1801 | }
|
---|
1802 | else {
|
---|
1803 | int i;
|
---|
1804 | int* p;
|
---|
1805 |
|
---|
1806 | if (IS_IGNORECASE(reg->options)) {
|
---|
1807 | r = add_opcode(reg, OP_BACKREF_MULTI_IC);
|
---|
1808 | }
|
---|
1809 | else {
|
---|
1810 | r = add_opcode(reg, OP_BACKREF_MULTI);
|
---|
1811 | }
|
---|
1812 | if (r) return r;
|
---|
1813 |
|
---|
1814 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
1815 | add_bacref_mems:
|
---|
1816 | #endif
|
---|
1817 | r = add_length(reg, br->back_num);
|
---|
1818 | if (r) return r;
|
---|
1819 | p = BACKREFS_P(br);
|
---|
1820 | for (i = br->back_num - 1; i >= 0; i--) {
|
---|
1821 | r = add_mem_num(reg, p[i]);
|
---|
1822 | if (r) return r;
|
---|
1823 | }
|
---|
1824 | }
|
---|
1825 | }
|
---|
1826 | break;
|
---|
1827 |
|
---|
1828 | #ifdef USE_SUBEXP_CALL
|
---|
1829 | case NT_CALL:
|
---|
1830 | r = compile_call(NCALL(node), reg);
|
---|
1831 | break;
|
---|
1832 | #endif
|
---|
1833 |
|
---|
1834 | case NT_QTFR:
|
---|
1835 | r = compile_quantifier_node(NQTFR(node), reg);
|
---|
1836 | break;
|
---|
1837 |
|
---|
1838 | case NT_ENCLOSE:
|
---|
1839 | r = compile_enclose_node(NENCLOSE(node), reg);
|
---|
1840 | break;
|
---|
1841 |
|
---|
1842 | case NT_ANCHOR:
|
---|
1843 | r = compile_anchor_node(NANCHOR(node), reg);
|
---|
1844 | break;
|
---|
1845 |
|
---|
1846 | default:
|
---|
1847 | #ifdef ONIG_DEBUG
|
---|
1848 | fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
|
---|
1849 | #endif
|
---|
1850 | break;
|
---|
1851 | }
|
---|
1852 |
|
---|
1853 | return r;
|
---|
1854 | }
|
---|
1855 |
|
---|
1856 | #ifdef USE_NAMED_GROUP
|
---|
1857 |
|
---|
1858 | static int
|
---|
1859 | noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
|
---|
1860 | {
|
---|
1861 | int r = 0;
|
---|
1862 | Node* node = *plink;
|
---|
1863 |
|
---|
1864 | switch (NTYPE(node)) {
|
---|
1865 | case NT_LIST:
|
---|
1866 | case NT_ALT:
|
---|
1867 | do {
|
---|
1868 | r = noname_disable_map(&(NCAR(node)), map, counter);
|
---|
1869 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1870 | break;
|
---|
1871 |
|
---|
1872 | case NT_QTFR:
|
---|
1873 | {
|
---|
1874 | Node** ptarget = &(NQTFR(node)->target);
|
---|
1875 | Node* old = *ptarget;
|
---|
1876 | r = noname_disable_map(ptarget, map, counter);
|
---|
1877 | if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
|
---|
1878 | onig_reduce_nested_quantifier(node, *ptarget);
|
---|
1879 | }
|
---|
1880 | }
|
---|
1881 | break;
|
---|
1882 |
|
---|
1883 | case NT_ENCLOSE:
|
---|
1884 | {
|
---|
1885 | EncloseNode* en = NENCLOSE(node);
|
---|
1886 | if (en->type == ENCLOSE_MEMORY) {
|
---|
1887 | if (IS_ENCLOSE_NAMED_GROUP(en)) {
|
---|
1888 | (*counter)++;
|
---|
1889 | map[en->regnum].new_val = *counter;
|
---|
1890 | en->regnum = *counter;
|
---|
1891 | }
|
---|
1892 | else if (en->regnum != 0) {
|
---|
1893 | *plink = en->target;
|
---|
1894 | en->target = NULL_NODE;
|
---|
1895 | onig_node_free(node);
|
---|
1896 | r = noname_disable_map(plink, map, counter);
|
---|
1897 | break;
|
---|
1898 | }
|
---|
1899 | }
|
---|
1900 | r = noname_disable_map(&(en->target), map, counter);
|
---|
1901 | }
|
---|
1902 | break;
|
---|
1903 |
|
---|
1904 | case NT_ANCHOR:
|
---|
1905 | if (NANCHOR(node)->target)
|
---|
1906 | r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
|
---|
1907 | break;
|
---|
1908 |
|
---|
1909 | default:
|
---|
1910 | break;
|
---|
1911 | }
|
---|
1912 |
|
---|
1913 | return r;
|
---|
1914 | }
|
---|
1915 |
|
---|
1916 | static int
|
---|
1917 | renumber_node_backref(Node* node, GroupNumRemap* map)
|
---|
1918 | {
|
---|
1919 | int i, pos, n, old_num;
|
---|
1920 | int *backs;
|
---|
1921 | BRefNode* bn = NBREF(node);
|
---|
1922 |
|
---|
1923 | if (! IS_BACKREF_NAME_REF(bn))
|
---|
1924 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
1925 |
|
---|
1926 | old_num = bn->back_num;
|
---|
1927 | if (IS_NULL(bn->back_dynamic))
|
---|
1928 | backs = bn->back_static;
|
---|
1929 | else
|
---|
1930 | backs = bn->back_dynamic;
|
---|
1931 |
|
---|
1932 | for (i = 0, pos = 0; i < old_num; i++) {
|
---|
1933 | n = map[backs[i]].new_val;
|
---|
1934 | if (n > 0) {
|
---|
1935 | backs[pos] = n;
|
---|
1936 | pos++;
|
---|
1937 | }
|
---|
1938 | }
|
---|
1939 |
|
---|
1940 | bn->back_num = pos;
|
---|
1941 | return 0;
|
---|
1942 | }
|
---|
1943 |
|
---|
1944 | static int
|
---|
1945 | renumber_by_map(Node* node, GroupNumRemap* map)
|
---|
1946 | {
|
---|
1947 | int r = 0;
|
---|
1948 |
|
---|
1949 | switch (NTYPE(node)) {
|
---|
1950 | case NT_LIST:
|
---|
1951 | case NT_ALT:
|
---|
1952 | do {
|
---|
1953 | r = renumber_by_map(NCAR(node), map);
|
---|
1954 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1955 | break;
|
---|
1956 | case NT_QTFR:
|
---|
1957 | r = renumber_by_map(NQTFR(node)->target, map);
|
---|
1958 | break;
|
---|
1959 | case NT_ENCLOSE:
|
---|
1960 | {
|
---|
1961 | EncloseNode* en = NENCLOSE(node);
|
---|
1962 | if (en->type == ENCLOSE_CONDITION)
|
---|
1963 | en->regnum = map[en->regnum].new_val;
|
---|
1964 | r = renumber_by_map(en->target, map);
|
---|
1965 | }
|
---|
1966 | break;
|
---|
1967 |
|
---|
1968 | case NT_BREF:
|
---|
1969 | r = renumber_node_backref(node, map);
|
---|
1970 | break;
|
---|
1971 |
|
---|
1972 | case NT_ANCHOR:
|
---|
1973 | if (NANCHOR(node)->target)
|
---|
1974 | r = renumber_by_map(NANCHOR(node)->target, map);
|
---|
1975 | break;
|
---|
1976 |
|
---|
1977 | default:
|
---|
1978 | break;
|
---|
1979 | }
|
---|
1980 |
|
---|
1981 | return r;
|
---|
1982 | }
|
---|
1983 |
|
---|
1984 | static int
|
---|
1985 | numbered_ref_check(Node* node)
|
---|
1986 | {
|
---|
1987 | int r = 0;
|
---|
1988 |
|
---|
1989 | switch (NTYPE(node)) {
|
---|
1990 | case NT_LIST:
|
---|
1991 | case NT_ALT:
|
---|
1992 | do {
|
---|
1993 | r = numbered_ref_check(NCAR(node));
|
---|
1994 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1995 | break;
|
---|
1996 | case NT_QTFR:
|
---|
1997 | r = numbered_ref_check(NQTFR(node)->target);
|
---|
1998 | break;
|
---|
1999 | case NT_ENCLOSE:
|
---|
2000 | r = numbered_ref_check(NENCLOSE(node)->target);
|
---|
2001 | break;
|
---|
2002 |
|
---|
2003 | case NT_BREF:
|
---|
2004 | if (! IS_BACKREF_NAME_REF(NBREF(node)))
|
---|
2005 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
2006 | break;
|
---|
2007 |
|
---|
2008 | case NT_ANCHOR:
|
---|
2009 | if (NANCHOR(node)->target)
|
---|
2010 | r = numbered_ref_check(NANCHOR(node)->target);
|
---|
2011 | break;
|
---|
2012 |
|
---|
2013 | default:
|
---|
2014 | break;
|
---|
2015 | }
|
---|
2016 |
|
---|
2017 | return r;
|
---|
2018 | }
|
---|
2019 |
|
---|
2020 | static int
|
---|
2021 | disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
|
---|
2022 | {
|
---|
2023 | int r, i, pos, counter;
|
---|
2024 | BitStatusType loc;
|
---|
2025 | GroupNumRemap* map;
|
---|
2026 |
|
---|
2027 | map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
|
---|
2028 | CHECK_NULL_RETURN_MEMERR(map);
|
---|
2029 | for (i = 1; i <= env->num_mem; i++) {
|
---|
2030 | map[i].new_val = 0;
|
---|
2031 | }
|
---|
2032 | counter = 0;
|
---|
2033 | r = noname_disable_map(root, map, &counter);
|
---|
2034 | if (r != 0) return r;
|
---|
2035 |
|
---|
2036 | r = renumber_by_map(*root, map);
|
---|
2037 | if (r != 0) return r;
|
---|
2038 |
|
---|
2039 | for (i = 1, pos = 1; i <= env->num_mem; i++) {
|
---|
2040 | if (map[i].new_val > 0) {
|
---|
2041 | SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
|
---|
2042 | pos++;
|
---|
2043 | }
|
---|
2044 | }
|
---|
2045 |
|
---|
2046 | loc = env->capture_history;
|
---|
2047 | BIT_STATUS_CLEAR(env->capture_history);
|
---|
2048 | for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
|
---|
2049 | if (BIT_STATUS_AT(loc, i)) {
|
---|
2050 | BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
|
---|
2051 | }
|
---|
2052 | }
|
---|
2053 |
|
---|
2054 | env->num_mem = env->num_named;
|
---|
2055 | reg->num_mem = env->num_named;
|
---|
2056 |
|
---|
2057 | return onig_renumber_name_table(reg, map);
|
---|
2058 | }
|
---|
2059 | #endif /* USE_NAMED_GROUP */
|
---|
2060 |
|
---|
2061 | #ifdef USE_SUBEXP_CALL
|
---|
2062 | static int
|
---|
2063 | unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
|
---|
2064 | {
|
---|
2065 | int i, offset;
|
---|
2066 | EncloseNode* en;
|
---|
2067 | AbsAddrType addr;
|
---|
2068 |
|
---|
2069 | for (i = 0; i < uslist->num; i++) {
|
---|
2070 | en = NENCLOSE(uslist->us[i].target);
|
---|
2071 | if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
|
---|
2072 | addr = en->call_addr;
|
---|
2073 | offset = uslist->us[i].offset;
|
---|
2074 |
|
---|
2075 | BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
|
---|
2076 | }
|
---|
2077 | return 0;
|
---|
2078 | }
|
---|
2079 | #endif
|
---|
2080 |
|
---|
2081 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
2082 | static int
|
---|
2083 | quantifiers_memory_node_info(Node* node)
|
---|
2084 | {
|
---|
2085 | int r = 0;
|
---|
2086 |
|
---|
2087 | switch (NTYPE(node)) {
|
---|
2088 | case NT_LIST:
|
---|
2089 | case NT_ALT:
|
---|
2090 | {
|
---|
2091 | int v;
|
---|
2092 | do {
|
---|
2093 | v = quantifiers_memory_node_info(NCAR(node));
|
---|
2094 | if (v > r) r = v;
|
---|
2095 | } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2096 | }
|
---|
2097 | break;
|
---|
2098 |
|
---|
2099 | # ifdef USE_SUBEXP_CALL
|
---|
2100 | case NT_CALL:
|
---|
2101 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
2102 | return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
|
---|
2103 | }
|
---|
2104 | else
|
---|
2105 | r = quantifiers_memory_node_info(NCALL(node)->target);
|
---|
2106 | break;
|
---|
2107 | # endif
|
---|
2108 |
|
---|
2109 | case NT_QTFR:
|
---|
2110 | {
|
---|
2111 | QtfrNode* qn = NQTFR(node);
|
---|
2112 | if (qn->upper != 0) {
|
---|
2113 | r = quantifiers_memory_node_info(qn->target);
|
---|
2114 | }
|
---|
2115 | }
|
---|
2116 | break;
|
---|
2117 |
|
---|
2118 | case NT_ENCLOSE:
|
---|
2119 | {
|
---|
2120 | EncloseNode* en = NENCLOSE(node);
|
---|
2121 | switch (en->type) {
|
---|
2122 | case ENCLOSE_MEMORY:
|
---|
2123 | return NQ_TARGET_IS_EMPTY_MEM;
|
---|
2124 | break;
|
---|
2125 |
|
---|
2126 | case ENCLOSE_OPTION:
|
---|
2127 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2128 | case ENCLOSE_CONDITION:
|
---|
2129 | case ENCLOSE_ABSENT:
|
---|
2130 | r = quantifiers_memory_node_info(en->target);
|
---|
2131 | break;
|
---|
2132 | default:
|
---|
2133 | break;
|
---|
2134 | }
|
---|
2135 | }
|
---|
2136 | break;
|
---|
2137 |
|
---|
2138 | case NT_BREF:
|
---|
2139 | case NT_STR:
|
---|
2140 | case NT_CTYPE:
|
---|
2141 | case NT_CCLASS:
|
---|
2142 | case NT_CANY:
|
---|
2143 | case NT_ANCHOR:
|
---|
2144 | default:
|
---|
2145 | break;
|
---|
2146 | }
|
---|
2147 |
|
---|
2148 | return r;
|
---|
2149 | }
|
---|
2150 | #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
|
---|
2151 |
|
---|
2152 | static int
|
---|
2153 | get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
|
---|
2154 | {
|
---|
2155 | OnigDistance tmin;
|
---|
2156 | int r = 0;
|
---|
2157 |
|
---|
2158 | *min = 0;
|
---|
2159 | switch (NTYPE(node)) {
|
---|
2160 | case NT_BREF:
|
---|
2161 | {
|
---|
2162 | int i;
|
---|
2163 | int* backs;
|
---|
2164 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
2165 | BRefNode* br = NBREF(node);
|
---|
2166 | if (br->state & NST_RECURSION) break;
|
---|
2167 |
|
---|
2168 | backs = BACKREFS_P(br);
|
---|
2169 | if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
2170 | r = get_min_match_length(nodes[backs[0]], min, env);
|
---|
2171 | if (r != 0) break;
|
---|
2172 | for (i = 1; i < br->back_num; i++) {
|
---|
2173 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
2174 | r = get_min_match_length(nodes[backs[i]], &tmin, env);
|
---|
2175 | if (r != 0) break;
|
---|
2176 | if (*min > tmin) *min = tmin;
|
---|
2177 | }
|
---|
2178 | }
|
---|
2179 | break;
|
---|
2180 |
|
---|
2181 | #ifdef USE_SUBEXP_CALL
|
---|
2182 | case NT_CALL:
|
---|
2183 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
2184 | EncloseNode* en = NENCLOSE(NCALL(node)->target);
|
---|
2185 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
2186 | *min = en->min_len;
|
---|
2187 | }
|
---|
2188 | else
|
---|
2189 | r = get_min_match_length(NCALL(node)->target, min, env);
|
---|
2190 | break;
|
---|
2191 | #endif
|
---|
2192 |
|
---|
2193 | case NT_LIST:
|
---|
2194 | do {
|
---|
2195 | r = get_min_match_length(NCAR(node), &tmin, env);
|
---|
2196 | if (r == 0) *min += tmin;
|
---|
2197 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2198 | break;
|
---|
2199 |
|
---|
2200 | case NT_ALT:
|
---|
2201 | {
|
---|
2202 | Node *x, *y;
|
---|
2203 | y = node;
|
---|
2204 | do {
|
---|
2205 | x = NCAR(y);
|
---|
2206 | r = get_min_match_length(x, &tmin, env);
|
---|
2207 | if (r != 0) break;
|
---|
2208 | if (y == node) *min = tmin;
|
---|
2209 | else if (*min > tmin) *min = tmin;
|
---|
2210 | } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
|
---|
2211 | }
|
---|
2212 | break;
|
---|
2213 |
|
---|
2214 | case NT_STR:
|
---|
2215 | {
|
---|
2216 | StrNode* sn = NSTR(node);
|
---|
2217 | *min = sn->end - sn->s;
|
---|
2218 | }
|
---|
2219 | break;
|
---|
2220 |
|
---|
2221 | case NT_CTYPE:
|
---|
2222 | *min = 1;
|
---|
2223 | break;
|
---|
2224 |
|
---|
2225 | case NT_CCLASS:
|
---|
2226 | case NT_CANY:
|
---|
2227 | *min = 1;
|
---|
2228 | break;
|
---|
2229 |
|
---|
2230 | case NT_QTFR:
|
---|
2231 | {
|
---|
2232 | QtfrNode* qn = NQTFR(node);
|
---|
2233 |
|
---|
2234 | if (qn->lower > 0) {
|
---|
2235 | r = get_min_match_length(qn->target, min, env);
|
---|
2236 | if (r == 0)
|
---|
2237 | *min = distance_multiply(*min, qn->lower);
|
---|
2238 | }
|
---|
2239 | }
|
---|
2240 | break;
|
---|
2241 |
|
---|
2242 | case NT_ENCLOSE:
|
---|
2243 | {
|
---|
2244 | EncloseNode* en = NENCLOSE(node);
|
---|
2245 | switch (en->type) {
|
---|
2246 | case ENCLOSE_MEMORY:
|
---|
2247 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
2248 | *min = en->min_len;
|
---|
2249 | else {
|
---|
2250 | if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
2251 | *min = 0; /* recursive */
|
---|
2252 | else {
|
---|
2253 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
2254 | r = get_min_match_length(en->target, min, env);
|
---|
2255 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
2256 | if (r == 0) {
|
---|
2257 | en->min_len = *min;
|
---|
2258 | SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
|
---|
2259 | }
|
---|
2260 | }
|
---|
2261 | }
|
---|
2262 | break;
|
---|
2263 |
|
---|
2264 | case ENCLOSE_OPTION:
|
---|
2265 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2266 | case ENCLOSE_CONDITION:
|
---|
2267 | r = get_min_match_length(en->target, min, env);
|
---|
2268 | break;
|
---|
2269 |
|
---|
2270 | case ENCLOSE_ABSENT:
|
---|
2271 | break;
|
---|
2272 | }
|
---|
2273 | }
|
---|
2274 | break;
|
---|
2275 |
|
---|
2276 | case NT_ANCHOR:
|
---|
2277 | default:
|
---|
2278 | break;
|
---|
2279 | }
|
---|
2280 |
|
---|
2281 | return r;
|
---|
2282 | }
|
---|
2283 |
|
---|
2284 | static int
|
---|
2285 | get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
|
---|
2286 | {
|
---|
2287 | OnigDistance tmax;
|
---|
2288 | int r = 0;
|
---|
2289 |
|
---|
2290 | *max = 0;
|
---|
2291 | switch (NTYPE(node)) {
|
---|
2292 | case NT_LIST:
|
---|
2293 | do {
|
---|
2294 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
2295 | if (r == 0)
|
---|
2296 | *max = distance_add(*max, tmax);
|
---|
2297 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2298 | break;
|
---|
2299 |
|
---|
2300 | case NT_ALT:
|
---|
2301 | do {
|
---|
2302 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
2303 | if (r == 0 && *max < tmax) *max = tmax;
|
---|
2304 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2305 | break;
|
---|
2306 |
|
---|
2307 | case NT_STR:
|
---|
2308 | {
|
---|
2309 | StrNode* sn = NSTR(node);
|
---|
2310 | *max = sn->end - sn->s;
|
---|
2311 | }
|
---|
2312 | break;
|
---|
2313 |
|
---|
2314 | case NT_CTYPE:
|
---|
2315 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
2316 | break;
|
---|
2317 |
|
---|
2318 | case NT_CCLASS:
|
---|
2319 | case NT_CANY:
|
---|
2320 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
2321 | break;
|
---|
2322 |
|
---|
2323 | case NT_BREF:
|
---|
2324 | {
|
---|
2325 | int i;
|
---|
2326 | int* backs;
|
---|
2327 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
2328 | BRefNode* br = NBREF(node);
|
---|
2329 | if (br->state & NST_RECURSION) {
|
---|
2330 | *max = ONIG_INFINITE_DISTANCE;
|
---|
2331 | break;
|
---|
2332 | }
|
---|
2333 | backs = BACKREFS_P(br);
|
---|
2334 | for (i = 0; i < br->back_num; i++) {
|
---|
2335 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
2336 | r = get_max_match_length(nodes[backs[i]], &tmax, env);
|
---|
2337 | if (r != 0) break;
|
---|
2338 | if (*max < tmax) *max = tmax;
|
---|
2339 | }
|
---|
2340 | }
|
---|
2341 | break;
|
---|
2342 |
|
---|
2343 | #ifdef USE_SUBEXP_CALL
|
---|
2344 | case NT_CALL:
|
---|
2345 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
2346 | r = get_max_match_length(NCALL(node)->target, max, env);
|
---|
2347 | else
|
---|
2348 | *max = ONIG_INFINITE_DISTANCE;
|
---|
2349 | break;
|
---|
2350 | #endif
|
---|
2351 |
|
---|
2352 | case NT_QTFR:
|
---|
2353 | {
|
---|
2354 | QtfrNode* qn = NQTFR(node);
|
---|
2355 |
|
---|
2356 | if (qn->upper != 0) {
|
---|
2357 | r = get_max_match_length(qn->target, max, env);
|
---|
2358 | if (r == 0 && *max != 0) {
|
---|
2359 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
2360 | *max = distance_multiply(*max, qn->upper);
|
---|
2361 | else
|
---|
2362 | *max = ONIG_INFINITE_DISTANCE;
|
---|
2363 | }
|
---|
2364 | }
|
---|
2365 | }
|
---|
2366 | break;
|
---|
2367 |
|
---|
2368 | case NT_ENCLOSE:
|
---|
2369 | {
|
---|
2370 | EncloseNode* en = NENCLOSE(node);
|
---|
2371 | switch (en->type) {
|
---|
2372 | case ENCLOSE_MEMORY:
|
---|
2373 | if (IS_ENCLOSE_MAX_FIXED(en))
|
---|
2374 | *max = en->max_len;
|
---|
2375 | else {
|
---|
2376 | if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
2377 | *max = ONIG_INFINITE_DISTANCE;
|
---|
2378 | else {
|
---|
2379 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
2380 | r = get_max_match_length(en->target, max, env);
|
---|
2381 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
2382 | if (r == 0) {
|
---|
2383 | en->max_len = *max;
|
---|
2384 | SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
|
---|
2385 | }
|
---|
2386 | }
|
---|
2387 | }
|
---|
2388 | break;
|
---|
2389 |
|
---|
2390 | case ENCLOSE_OPTION:
|
---|
2391 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2392 | case ENCLOSE_CONDITION:
|
---|
2393 | r = get_max_match_length(en->target, max, env);
|
---|
2394 | break;
|
---|
2395 |
|
---|
2396 | case ENCLOSE_ABSENT:
|
---|
2397 | break;
|
---|
2398 | }
|
---|
2399 | }
|
---|
2400 | break;
|
---|
2401 |
|
---|
2402 | case NT_ANCHOR:
|
---|
2403 | default:
|
---|
2404 | break;
|
---|
2405 | }
|
---|
2406 |
|
---|
2407 | return r;
|
---|
2408 | }
|
---|
2409 |
|
---|
2410 | #define GET_CHAR_LEN_VARLEN -1
|
---|
2411 | #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
|
---|
2412 |
|
---|
2413 | /* fixed size pattern node only */
|
---|
2414 | static int
|
---|
2415 | get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
|
---|
2416 | {
|
---|
2417 | int tlen;
|
---|
2418 | int r = 0;
|
---|
2419 |
|
---|
2420 | level++;
|
---|
2421 | *len = 0;
|
---|
2422 | switch (NTYPE(node)) {
|
---|
2423 | case NT_LIST:
|
---|
2424 | do {
|
---|
2425 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
2426 | if (r == 0)
|
---|
2427 | *len = (int )distance_add(*len, tlen);
|
---|
2428 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2429 | break;
|
---|
2430 |
|
---|
2431 | case NT_ALT:
|
---|
2432 | {
|
---|
2433 | int tlen2;
|
---|
2434 | int varlen = 0;
|
---|
2435 |
|
---|
2436 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
2437 | while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
|
---|
2438 | r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
|
---|
2439 | if (r == 0) {
|
---|
2440 | if (tlen != tlen2)
|
---|
2441 | varlen = 1;
|
---|
2442 | }
|
---|
2443 | }
|
---|
2444 | if (r == 0) {
|
---|
2445 | if (varlen != 0) {
|
---|
2446 | if (level == 1)
|
---|
2447 | r = GET_CHAR_LEN_TOP_ALT_VARLEN;
|
---|
2448 | else
|
---|
2449 | r = GET_CHAR_LEN_VARLEN;
|
---|
2450 | }
|
---|
2451 | else
|
---|
2452 | *len = tlen;
|
---|
2453 | }
|
---|
2454 | }
|
---|
2455 | break;
|
---|
2456 |
|
---|
2457 | case NT_STR:
|
---|
2458 | {
|
---|
2459 | StrNode* sn = NSTR(node);
|
---|
2460 | UChar *s = sn->s;
|
---|
2461 | while (s < sn->end) {
|
---|
2462 | s += enclen(reg->enc, s, sn->end);
|
---|
2463 | (*len)++;
|
---|
2464 | }
|
---|
2465 | }
|
---|
2466 | break;
|
---|
2467 |
|
---|
2468 | case NT_QTFR:
|
---|
2469 | {
|
---|
2470 | QtfrNode* qn = NQTFR(node);
|
---|
2471 | if (qn->lower == qn->upper) {
|
---|
2472 | r = get_char_length_tree1(qn->target, reg, &tlen, level);
|
---|
2473 | if (r == 0)
|
---|
2474 | *len = (int )distance_multiply(tlen, qn->lower);
|
---|
2475 | }
|
---|
2476 | else
|
---|
2477 | r = GET_CHAR_LEN_VARLEN;
|
---|
2478 | }
|
---|
2479 | break;
|
---|
2480 |
|
---|
2481 | #ifdef USE_SUBEXP_CALL
|
---|
2482 | case NT_CALL:
|
---|
2483 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
2484 | r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
|
---|
2485 | else
|
---|
2486 | r = GET_CHAR_LEN_VARLEN;
|
---|
2487 | break;
|
---|
2488 | #endif
|
---|
2489 |
|
---|
2490 | case NT_CTYPE:
|
---|
2491 | *len = 1;
|
---|
2492 | break;
|
---|
2493 |
|
---|
2494 | case NT_CCLASS:
|
---|
2495 | case NT_CANY:
|
---|
2496 | *len = 1;
|
---|
2497 | break;
|
---|
2498 |
|
---|
2499 | case NT_ENCLOSE:
|
---|
2500 | {
|
---|
2501 | EncloseNode* en = NENCLOSE(node);
|
---|
2502 | switch (en->type) {
|
---|
2503 | case ENCLOSE_MEMORY:
|
---|
2504 | #ifdef USE_SUBEXP_CALL
|
---|
2505 | if (IS_ENCLOSE_CLEN_FIXED(en))
|
---|
2506 | *len = en->char_len;
|
---|
2507 | else {
|
---|
2508 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
2509 | if (r == 0) {
|
---|
2510 | en->char_len = *len;
|
---|
2511 | SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
|
---|
2512 | }
|
---|
2513 | }
|
---|
2514 | break;
|
---|
2515 | #endif
|
---|
2516 | case ENCLOSE_OPTION:
|
---|
2517 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2518 | case ENCLOSE_CONDITION:
|
---|
2519 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
2520 | break;
|
---|
2521 | case ENCLOSE_ABSENT:
|
---|
2522 | default:
|
---|
2523 | break;
|
---|
2524 | }
|
---|
2525 | }
|
---|
2526 | break;
|
---|
2527 |
|
---|
2528 | case NT_ANCHOR:
|
---|
2529 | break;
|
---|
2530 |
|
---|
2531 | default:
|
---|
2532 | r = GET_CHAR_LEN_VARLEN;
|
---|
2533 | break;
|
---|
2534 | }
|
---|
2535 |
|
---|
2536 | return r;
|
---|
2537 | }
|
---|
2538 |
|
---|
2539 | static int
|
---|
2540 | get_char_length_tree(Node* node, regex_t* reg, int* len)
|
---|
2541 | {
|
---|
2542 | return get_char_length_tree1(node, reg, len, 0);
|
---|
2543 | }
|
---|
2544 |
|
---|
2545 | /* x is not included y ==> 1 : 0 */
|
---|
2546 | static int
|
---|
2547 | is_not_included(Node* x, Node* y, regex_t* reg)
|
---|
2548 | {
|
---|
2549 | int i;
|
---|
2550 | OnigDistance len;
|
---|
2551 | OnigCodePoint code;
|
---|
2552 | UChar *p;
|
---|
2553 | int ytype;
|
---|
2554 |
|
---|
2555 | retry:
|
---|
2556 | ytype = NTYPE(y);
|
---|
2557 | switch (NTYPE(x)) {
|
---|
2558 | case NT_CTYPE:
|
---|
2559 | {
|
---|
2560 | switch (ytype) {
|
---|
2561 | case NT_CTYPE:
|
---|
2562 | if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
|
---|
2563 | NCTYPE(y)->not != NCTYPE(x)->not &&
|
---|
2564 | NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
|
---|
2565 | return 1;
|
---|
2566 | else
|
---|
2567 | return 0;
|
---|
2568 | break;
|
---|
2569 |
|
---|
2570 | case NT_CCLASS:
|
---|
2571 | swap:
|
---|
2572 | {
|
---|
2573 | Node* tmp;
|
---|
2574 | tmp = x; x = y; y = tmp;
|
---|
2575 | goto retry;
|
---|
2576 | }
|
---|
2577 | break;
|
---|
2578 |
|
---|
2579 | case NT_STR:
|
---|
2580 | goto swap;
|
---|
2581 | break;
|
---|
2582 |
|
---|
2583 | default:
|
---|
2584 | break;
|
---|
2585 | }
|
---|
2586 | }
|
---|
2587 | break;
|
---|
2588 |
|
---|
2589 | case NT_CCLASS:
|
---|
2590 | {
|
---|
2591 | CClassNode* xc = NCCLASS(x);
|
---|
2592 | switch (ytype) {
|
---|
2593 | case NT_CTYPE:
|
---|
2594 | switch (NCTYPE(y)->ctype) {
|
---|
2595 | case ONIGENC_CTYPE_WORD:
|
---|
2596 | if (NCTYPE(y)->not == 0) {
|
---|
2597 | if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
|
---|
2598 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
2599 | if (BITSET_AT(xc->bs, i)) {
|
---|
2600 | if (NCTYPE(y)->ascii_range) {
|
---|
2601 | if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
|
---|
2602 | }
|
---|
2603 | else {
|
---|
2604 | if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
|
---|
2605 | }
|
---|
2606 | }
|
---|
2607 | }
|
---|
2608 | return 1;
|
---|
2609 | }
|
---|
2610 | return 0;
|
---|
2611 | }
|
---|
2612 | else {
|
---|
2613 | if (IS_NOT_NULL(xc->mbuf)) return 0;
|
---|
2614 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
2615 | int is_word;
|
---|
2616 | if (NCTYPE(y)->ascii_range)
|
---|
2617 | is_word = IS_CODE_SB_WORD(reg->enc, i);
|
---|
2618 | else
|
---|
2619 | is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
|
---|
2620 | if (! is_word) {
|
---|
2621 | if (!IS_NCCLASS_NOT(xc)) {
|
---|
2622 | if (BITSET_AT(xc->bs, i))
|
---|
2623 | return 0;
|
---|
2624 | }
|
---|
2625 | else {
|
---|
2626 | if (! BITSET_AT(xc->bs, i))
|
---|
2627 | return 0;
|
---|
2628 | }
|
---|
2629 | }
|
---|
2630 | }
|
---|
2631 | return 1;
|
---|
2632 | }
|
---|
2633 | break;
|
---|
2634 |
|
---|
2635 | default:
|
---|
2636 | break;
|
---|
2637 | }
|
---|
2638 | break;
|
---|
2639 |
|
---|
2640 | case NT_CCLASS:
|
---|
2641 | {
|
---|
2642 | int v;
|
---|
2643 | CClassNode* yc = NCCLASS(y);
|
---|
2644 |
|
---|
2645 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
2646 | v = BITSET_AT(xc->bs, i);
|
---|
2647 | if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
|
---|
2648 | (v == 0 && IS_NCCLASS_NOT(xc))) {
|
---|
2649 | v = BITSET_AT(yc->bs, i);
|
---|
2650 | if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
|
---|
2651 | (v == 0 && IS_NCCLASS_NOT(yc)))
|
---|
2652 | return 0;
|
---|
2653 | }
|
---|
2654 | }
|
---|
2655 | if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
|
---|
2656 | (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
|
---|
2657 | return 1;
|
---|
2658 | return 0;
|
---|
2659 | }
|
---|
2660 | break;
|
---|
2661 |
|
---|
2662 | case NT_STR:
|
---|
2663 | goto swap;
|
---|
2664 | break;
|
---|
2665 |
|
---|
2666 | default:
|
---|
2667 | break;
|
---|
2668 | }
|
---|
2669 | }
|
---|
2670 | break;
|
---|
2671 |
|
---|
2672 | case NT_STR:
|
---|
2673 | {
|
---|
2674 | StrNode* xs = NSTR(x);
|
---|
2675 | if (NSTRING_LEN(x) == 0)
|
---|
2676 | break;
|
---|
2677 |
|
---|
2678 | switch (ytype) {
|
---|
2679 | case NT_CTYPE:
|
---|
2680 | switch (NCTYPE(y)->ctype) {
|
---|
2681 | case ONIGENC_CTYPE_WORD:
|
---|
2682 | if (NCTYPE(y)->ascii_range) {
|
---|
2683 | if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
|
---|
2684 | return NCTYPE(y)->not;
|
---|
2685 | else
|
---|
2686 | return !(NCTYPE(y)->not);
|
---|
2687 | }
|
---|
2688 | else {
|
---|
2689 | if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
|
---|
2690 | return NCTYPE(y)->not;
|
---|
2691 | else
|
---|
2692 | return !(NCTYPE(y)->not);
|
---|
2693 | }
|
---|
2694 | break;
|
---|
2695 | default:
|
---|
2696 | break;
|
---|
2697 | }
|
---|
2698 | break;
|
---|
2699 |
|
---|
2700 | case NT_CCLASS:
|
---|
2701 | {
|
---|
2702 | CClassNode* cc = NCCLASS(y);
|
---|
2703 |
|
---|
2704 | code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
|
---|
2705 | xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
|
---|
2706 | return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
|
---|
2707 | }
|
---|
2708 | break;
|
---|
2709 |
|
---|
2710 | case NT_STR:
|
---|
2711 | {
|
---|
2712 | UChar *q;
|
---|
2713 | StrNode* ys = NSTR(y);
|
---|
2714 | len = NSTRING_LEN(x);
|
---|
2715 | if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
|
---|
2716 | if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
|
---|
2717 | /* tiny version */
|
---|
2718 | return 0;
|
---|
2719 | }
|
---|
2720 | else {
|
---|
2721 | for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
|
---|
2722 | if (*p != *q) return 1;
|
---|
2723 | }
|
---|
2724 | }
|
---|
2725 | }
|
---|
2726 | break;
|
---|
2727 |
|
---|
2728 | default:
|
---|
2729 | break;
|
---|
2730 | }
|
---|
2731 | }
|
---|
2732 | break;
|
---|
2733 |
|
---|
2734 | default:
|
---|
2735 | break;
|
---|
2736 | }
|
---|
2737 |
|
---|
2738 | return 0;
|
---|
2739 | }
|
---|
2740 |
|
---|
2741 | static Node*
|
---|
2742 | get_head_value_node(Node* node, int exact, regex_t* reg)
|
---|
2743 | {
|
---|
2744 | Node* n = NULL_NODE;
|
---|
2745 |
|
---|
2746 | switch (NTYPE(node)) {
|
---|
2747 | case NT_BREF:
|
---|
2748 | case NT_ALT:
|
---|
2749 | case NT_CANY:
|
---|
2750 | #ifdef USE_SUBEXP_CALL
|
---|
2751 | case NT_CALL:
|
---|
2752 | #endif
|
---|
2753 | break;
|
---|
2754 |
|
---|
2755 | case NT_CTYPE:
|
---|
2756 | case NT_CCLASS:
|
---|
2757 | if (exact == 0) {
|
---|
2758 | n = node;
|
---|
2759 | }
|
---|
2760 | break;
|
---|
2761 |
|
---|
2762 | case NT_LIST:
|
---|
2763 | n = get_head_value_node(NCAR(node), exact, reg);
|
---|
2764 | break;
|
---|
2765 |
|
---|
2766 | case NT_STR:
|
---|
2767 | {
|
---|
2768 | StrNode* sn = NSTR(node);
|
---|
2769 |
|
---|
2770 | if (sn->end <= sn->s)
|
---|
2771 | break;
|
---|
2772 |
|
---|
2773 | if (exact != 0 &&
|
---|
2774 | !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
|
---|
2775 | }
|
---|
2776 | else {
|
---|
2777 | n = node;
|
---|
2778 | }
|
---|
2779 | }
|
---|
2780 | break;
|
---|
2781 |
|
---|
2782 | case NT_QTFR:
|
---|
2783 | {
|
---|
2784 | QtfrNode* qn = NQTFR(node);
|
---|
2785 | if (qn->lower > 0) {
|
---|
2786 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
2787 | if (IS_NOT_NULL(qn->head_exact))
|
---|
2788 | n = qn->head_exact;
|
---|
2789 | else
|
---|
2790 | #endif
|
---|
2791 | n = get_head_value_node(qn->target, exact, reg);
|
---|
2792 | }
|
---|
2793 | }
|
---|
2794 | break;
|
---|
2795 |
|
---|
2796 | case NT_ENCLOSE:
|
---|
2797 | {
|
---|
2798 | EncloseNode* en = NENCLOSE(node);
|
---|
2799 | switch (en->type) {
|
---|
2800 | case ENCLOSE_OPTION:
|
---|
2801 | {
|
---|
2802 | OnigOptionType options = reg->options;
|
---|
2803 |
|
---|
2804 | reg->options = NENCLOSE(node)->option;
|
---|
2805 | n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
|
---|
2806 | reg->options = options;
|
---|
2807 | }
|
---|
2808 | break;
|
---|
2809 |
|
---|
2810 | case ENCLOSE_MEMORY:
|
---|
2811 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2812 | case ENCLOSE_CONDITION:
|
---|
2813 | n = get_head_value_node(en->target, exact, reg);
|
---|
2814 | break;
|
---|
2815 |
|
---|
2816 | case ENCLOSE_ABSENT:
|
---|
2817 | break;
|
---|
2818 | }
|
---|
2819 | }
|
---|
2820 | break;
|
---|
2821 |
|
---|
2822 | case NT_ANCHOR:
|
---|
2823 | if (NANCHOR(node)->type == ANCHOR_PREC_READ)
|
---|
2824 | n = get_head_value_node(NANCHOR(node)->target, exact, reg);
|
---|
2825 | break;
|
---|
2826 |
|
---|
2827 | default:
|
---|
2828 | break;
|
---|
2829 | }
|
---|
2830 |
|
---|
2831 | return n;
|
---|
2832 | }
|
---|
2833 |
|
---|
2834 | static int
|
---|
2835 | check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
|
---|
2836 | {
|
---|
2837 | int type, r = 0;
|
---|
2838 |
|
---|
2839 | type = NTYPE(node);
|
---|
2840 | if ((NTYPE2BIT(type) & type_mask) == 0)
|
---|
2841 | return 1;
|
---|
2842 |
|
---|
2843 | switch (type) {
|
---|
2844 | case NT_LIST:
|
---|
2845 | case NT_ALT:
|
---|
2846 | do {
|
---|
2847 | r = check_type_tree(NCAR(node), type_mask, enclose_mask,
|
---|
2848 | anchor_mask);
|
---|
2849 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2850 | break;
|
---|
2851 |
|
---|
2852 | case NT_QTFR:
|
---|
2853 | r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
|
---|
2854 | anchor_mask);
|
---|
2855 | break;
|
---|
2856 |
|
---|
2857 | case NT_ENCLOSE:
|
---|
2858 | {
|
---|
2859 | EncloseNode* en = NENCLOSE(node);
|
---|
2860 | if ((en->type & enclose_mask) == 0)
|
---|
2861 | return 1;
|
---|
2862 |
|
---|
2863 | r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
|
---|
2864 | }
|
---|
2865 | break;
|
---|
2866 |
|
---|
2867 | case NT_ANCHOR:
|
---|
2868 | type = NANCHOR(node)->type;
|
---|
2869 | if ((type & anchor_mask) == 0)
|
---|
2870 | return 1;
|
---|
2871 |
|
---|
2872 | if (NANCHOR(node)->target)
|
---|
2873 | r = check_type_tree(NANCHOR(node)->target,
|
---|
2874 | type_mask, enclose_mask, anchor_mask);
|
---|
2875 | break;
|
---|
2876 |
|
---|
2877 | default:
|
---|
2878 | break;
|
---|
2879 | }
|
---|
2880 | return r;
|
---|
2881 | }
|
---|
2882 |
|
---|
2883 | #ifdef USE_SUBEXP_CALL
|
---|
2884 |
|
---|
2885 | # define RECURSION_EXIST 1
|
---|
2886 | # define RECURSION_INFINITE 2
|
---|
2887 |
|
---|
2888 | static int
|
---|
2889 | subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
|
---|
2890 | {
|
---|
2891 | int type;
|
---|
2892 | int r = 0;
|
---|
2893 |
|
---|
2894 | type = NTYPE(node);
|
---|
2895 | switch (type) {
|
---|
2896 | case NT_LIST:
|
---|
2897 | {
|
---|
2898 | Node *x;
|
---|
2899 | OnigDistance min;
|
---|
2900 | int ret;
|
---|
2901 |
|
---|
2902 | x = node;
|
---|
2903 | do {
|
---|
2904 | ret = subexp_inf_recursive_check(NCAR(x), env, head);
|
---|
2905 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
2906 | r |= ret;
|
---|
2907 | if (head) {
|
---|
2908 | ret = get_min_match_length(NCAR(x), &min, env);
|
---|
2909 | if (ret != 0) return ret;
|
---|
2910 | if (min != 0) head = 0;
|
---|
2911 | }
|
---|
2912 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
2913 | }
|
---|
2914 | break;
|
---|
2915 |
|
---|
2916 | case NT_ALT:
|
---|
2917 | {
|
---|
2918 | int ret;
|
---|
2919 | r = RECURSION_EXIST;
|
---|
2920 | do {
|
---|
2921 | ret = subexp_inf_recursive_check(NCAR(node), env, head);
|
---|
2922 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
2923 | r &= ret;
|
---|
2924 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
2925 | }
|
---|
2926 | break;
|
---|
2927 |
|
---|
2928 | case NT_QTFR:
|
---|
2929 | r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
|
---|
2930 | if (r == RECURSION_EXIST) {
|
---|
2931 | if (NQTFR(node)->lower == 0) r = 0;
|
---|
2932 | }
|
---|
2933 | break;
|
---|
2934 |
|
---|
2935 | case NT_ANCHOR:
|
---|
2936 | {
|
---|
2937 | AnchorNode* an = NANCHOR(node);
|
---|
2938 | switch (an->type) {
|
---|
2939 | case ANCHOR_PREC_READ:
|
---|
2940 | case ANCHOR_PREC_READ_NOT:
|
---|
2941 | case ANCHOR_LOOK_BEHIND:
|
---|
2942 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
2943 | r = subexp_inf_recursive_check(an->target, env, head);
|
---|
2944 | break;
|
---|
2945 | }
|
---|
2946 | }
|
---|
2947 | break;
|
---|
2948 |
|
---|
2949 | case NT_CALL:
|
---|
2950 | r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
|
---|
2951 | break;
|
---|
2952 |
|
---|
2953 | case NT_ENCLOSE:
|
---|
2954 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
2955 | return 0;
|
---|
2956 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
2957 | return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
|
---|
2958 | else {
|
---|
2959 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
2960 | r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
|
---|
2961 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
2962 | }
|
---|
2963 | break;
|
---|
2964 |
|
---|
2965 | default:
|
---|
2966 | break;
|
---|
2967 | }
|
---|
2968 |
|
---|
2969 | return r;
|
---|
2970 | }
|
---|
2971 |
|
---|
2972 | static int
|
---|
2973 | subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
2974 | {
|
---|
2975 | int type;
|
---|
2976 | int r = 0;
|
---|
2977 |
|
---|
2978 | type = NTYPE(node);
|
---|
2979 | switch (type) {
|
---|
2980 | case NT_LIST:
|
---|
2981 | case NT_ALT:
|
---|
2982 | do {
|
---|
2983 | r = subexp_inf_recursive_check_trav(NCAR(node), env);
|
---|
2984 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2985 | break;
|
---|
2986 |
|
---|
2987 | case NT_QTFR:
|
---|
2988 | r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
|
---|
2989 | break;
|
---|
2990 |
|
---|
2991 | case NT_ANCHOR:
|
---|
2992 | {
|
---|
2993 | AnchorNode* an = NANCHOR(node);
|
---|
2994 | switch (an->type) {
|
---|
2995 | case ANCHOR_PREC_READ:
|
---|
2996 | case ANCHOR_PREC_READ_NOT:
|
---|
2997 | case ANCHOR_LOOK_BEHIND:
|
---|
2998 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
2999 | r = subexp_inf_recursive_check_trav(an->target, env);
|
---|
3000 | break;
|
---|
3001 | }
|
---|
3002 | }
|
---|
3003 | break;
|
---|
3004 |
|
---|
3005 | case NT_ENCLOSE:
|
---|
3006 | {
|
---|
3007 | EncloseNode* en = NENCLOSE(node);
|
---|
3008 |
|
---|
3009 | if (IS_ENCLOSE_RECURSION(en)) {
|
---|
3010 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
3011 | r = subexp_inf_recursive_check(en->target, env, 1);
|
---|
3012 | if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
|
---|
3013 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
3014 | }
|
---|
3015 | r = subexp_inf_recursive_check_trav(en->target, env);
|
---|
3016 | }
|
---|
3017 |
|
---|
3018 | break;
|
---|
3019 |
|
---|
3020 | default:
|
---|
3021 | break;
|
---|
3022 | }
|
---|
3023 |
|
---|
3024 | return r;
|
---|
3025 | }
|
---|
3026 |
|
---|
3027 | static int
|
---|
3028 | subexp_recursive_check(Node* node)
|
---|
3029 | {
|
---|
3030 | int r = 0;
|
---|
3031 |
|
---|
3032 | switch (NTYPE(node)) {
|
---|
3033 | case NT_LIST:
|
---|
3034 | case NT_ALT:
|
---|
3035 | do {
|
---|
3036 | r |= subexp_recursive_check(NCAR(node));
|
---|
3037 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
3038 | break;
|
---|
3039 |
|
---|
3040 | case NT_QTFR:
|
---|
3041 | r = subexp_recursive_check(NQTFR(node)->target);
|
---|
3042 | break;
|
---|
3043 |
|
---|
3044 | case NT_ANCHOR:
|
---|
3045 | {
|
---|
3046 | AnchorNode* an = NANCHOR(node);
|
---|
3047 | switch (an->type) {
|
---|
3048 | case ANCHOR_PREC_READ:
|
---|
3049 | case ANCHOR_PREC_READ_NOT:
|
---|
3050 | case ANCHOR_LOOK_BEHIND:
|
---|
3051 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
3052 | r = subexp_recursive_check(an->target);
|
---|
3053 | break;
|
---|
3054 | }
|
---|
3055 | }
|
---|
3056 | break;
|
---|
3057 |
|
---|
3058 | case NT_CALL:
|
---|
3059 | r = subexp_recursive_check(NCALL(node)->target);
|
---|
3060 | if (r != 0) SET_CALL_RECURSION(node);
|
---|
3061 | break;
|
---|
3062 |
|
---|
3063 | case NT_ENCLOSE:
|
---|
3064 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
3065 | return 0;
|
---|
3066 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
3067 | return 1; /* recursion */
|
---|
3068 | else {
|
---|
3069 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
3070 | r = subexp_recursive_check(NENCLOSE(node)->target);
|
---|
3071 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
3072 | }
|
---|
3073 | break;
|
---|
3074 |
|
---|
3075 | default:
|
---|
3076 | break;
|
---|
3077 | }
|
---|
3078 |
|
---|
3079 | return r;
|
---|
3080 | }
|
---|
3081 |
|
---|
3082 |
|
---|
3083 | static int
|
---|
3084 | subexp_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
3085 | {
|
---|
3086 | # define FOUND_CALLED_NODE 1
|
---|
3087 |
|
---|
3088 | int type;
|
---|
3089 | int r = 0;
|
---|
3090 |
|
---|
3091 | type = NTYPE(node);
|
---|
3092 | switch (type) {
|
---|
3093 | case NT_LIST:
|
---|
3094 | case NT_ALT:
|
---|
3095 | {
|
---|
3096 | int ret;
|
---|
3097 | do {
|
---|
3098 | ret = subexp_recursive_check_trav(NCAR(node), env);
|
---|
3099 | if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
|
---|
3100 | else if (ret < 0) return ret;
|
---|
3101 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
3102 | }
|
---|
3103 | break;
|
---|
3104 |
|
---|
3105 | case NT_QTFR:
|
---|
3106 | r = subexp_recursive_check_trav(NQTFR(node)->target, env);
|
---|
3107 | if (NQTFR(node)->upper == 0) {
|
---|
3108 | if (r == FOUND_CALLED_NODE)
|
---|
3109 | NQTFR(node)->is_refered = 1;
|
---|
3110 | }
|
---|
3111 | break;
|
---|
3112 |
|
---|
3113 | case NT_ANCHOR:
|
---|
3114 | {
|
---|
3115 | AnchorNode* an = NANCHOR(node);
|
---|
3116 | switch (an->type) {
|
---|
3117 | case ANCHOR_PREC_READ:
|
---|
3118 | case ANCHOR_PREC_READ_NOT:
|
---|
3119 | case ANCHOR_LOOK_BEHIND:
|
---|
3120 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
3121 | r = subexp_recursive_check_trav(an->target, env);
|
---|
3122 | break;
|
---|
3123 | }
|
---|
3124 | }
|
---|
3125 | break;
|
---|
3126 |
|
---|
3127 | case NT_ENCLOSE:
|
---|
3128 | {
|
---|
3129 | EncloseNode* en = NENCLOSE(node);
|
---|
3130 |
|
---|
3131 | if (! IS_ENCLOSE_RECURSION(en)) {
|
---|
3132 | if (IS_ENCLOSE_CALLED(en)) {
|
---|
3133 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
3134 | r = subexp_recursive_check(en->target);
|
---|
3135 | if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
|
---|
3136 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
3137 | }
|
---|
3138 | }
|
---|
3139 | r = subexp_recursive_check_trav(en->target, env);
|
---|
3140 | if (IS_ENCLOSE_CALLED(en))
|
---|
3141 | r |= FOUND_CALLED_NODE;
|
---|
3142 | }
|
---|
3143 | break;
|
---|
3144 |
|
---|
3145 | default:
|
---|
3146 | break;
|
---|
3147 | }
|
---|
3148 |
|
---|
3149 | return r;
|
---|
3150 | }
|
---|
3151 |
|
---|
3152 | static int
|
---|
3153 | setup_subexp_call(Node* node, ScanEnv* env)
|
---|
3154 | {
|
---|
3155 | int type;
|
---|
3156 | int r = 0;
|
---|
3157 |
|
---|
3158 | type = NTYPE(node);
|
---|
3159 | switch (type) {
|
---|
3160 | case NT_LIST:
|
---|
3161 | do {
|
---|
3162 | r = setup_subexp_call(NCAR(node), env);
|
---|
3163 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3164 | break;
|
---|
3165 |
|
---|
3166 | case NT_ALT:
|
---|
3167 | do {
|
---|
3168 | r = setup_subexp_call(NCAR(node), env);
|
---|
3169 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3170 | break;
|
---|
3171 |
|
---|
3172 | case NT_QTFR:
|
---|
3173 | r = setup_subexp_call(NQTFR(node)->target, env);
|
---|
3174 | break;
|
---|
3175 | case NT_ENCLOSE:
|
---|
3176 | r = setup_subexp_call(NENCLOSE(node)->target, env);
|
---|
3177 | break;
|
---|
3178 |
|
---|
3179 | case NT_CALL:
|
---|
3180 | {
|
---|
3181 | CallNode* cn = NCALL(node);
|
---|
3182 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
3183 |
|
---|
3184 | if (cn->group_num != 0) {
|
---|
3185 | int gnum = cn->group_num;
|
---|
3186 |
|
---|
3187 | # ifdef USE_NAMED_GROUP
|
---|
3188 | if (env->num_named > 0 &&
|
---|
3189 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
3190 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
3191 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
3192 | }
|
---|
3193 | # endif
|
---|
3194 | if (gnum > env->num_mem) {
|
---|
3195 | onig_scan_env_set_error_string(env,
|
---|
3196 | ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
|
---|
3197 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;
|
---|
3198 | }
|
---|
3199 |
|
---|
3200 | # ifdef USE_NAMED_GROUP
|
---|
3201 | set_call_attr:
|
---|
3202 | # endif
|
---|
3203 | cn->target = nodes[cn->group_num];
|
---|
3204 | if (IS_NULL(cn->target)) {
|
---|
3205 | onig_scan_env_set_error_string(env,
|
---|
3206 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
3207 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
3208 | }
|
---|
3209 | SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
|
---|
3210 | BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
|
---|
3211 | cn->unset_addr_list = env->unset_addr_list;
|
---|
3212 | }
|
---|
3213 | # ifdef USE_NAMED_GROUP
|
---|
3214 | # ifdef USE_PERL_SUBEXP_CALL
|
---|
3215 | else if (cn->name == cn->name_end) {
|
---|
3216 | goto set_call_attr;
|
---|
3217 | }
|
---|
3218 | # endif
|
---|
3219 | else {
|
---|
3220 | int *refs;
|
---|
3221 |
|
---|
3222 | int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
|
---|
3223 | &refs);
|
---|
3224 | if (n <= 0) {
|
---|
3225 | onig_scan_env_set_error_string(env,
|
---|
3226 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
3227 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
3228 | }
|
---|
3229 | else if (n > 1 &&
|
---|
3230 | ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
|
---|
3231 | onig_scan_env_set_error_string(env,
|
---|
3232 | ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
|
---|
3233 | return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
|
---|
3234 | }
|
---|
3235 | else {
|
---|
3236 | cn->group_num = refs[0];
|
---|
3237 | goto set_call_attr;
|
---|
3238 | }
|
---|
3239 | }
|
---|
3240 | # endif
|
---|
3241 | }
|
---|
3242 | break;
|
---|
3243 |
|
---|
3244 | case NT_ANCHOR:
|
---|
3245 | {
|
---|
3246 | AnchorNode* an = NANCHOR(node);
|
---|
3247 |
|
---|
3248 | switch (an->type) {
|
---|
3249 | case ANCHOR_PREC_READ:
|
---|
3250 | case ANCHOR_PREC_READ_NOT:
|
---|
3251 | case ANCHOR_LOOK_BEHIND:
|
---|
3252 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
3253 | r = setup_subexp_call(an->target, env);
|
---|
3254 | break;
|
---|
3255 | }
|
---|
3256 | }
|
---|
3257 | break;
|
---|
3258 |
|
---|
3259 | default:
|
---|
3260 | break;
|
---|
3261 | }
|
---|
3262 |
|
---|
3263 | return r;
|
---|
3264 | }
|
---|
3265 | #endif
|
---|
3266 |
|
---|
3267 | /* divide different length alternatives in look-behind.
|
---|
3268 | (?<=A|B) ==> (?<=A)|(?<=B)
|
---|
3269 | (?<!A|B) ==> (?<!A)(?<!B)
|
---|
3270 | */
|
---|
3271 | static int
|
---|
3272 | divide_look_behind_alternatives(Node* node)
|
---|
3273 | {
|
---|
3274 | Node *head, *np, *insert_node;
|
---|
3275 | AnchorNode* an = NANCHOR(node);
|
---|
3276 | int anc_type = an->type;
|
---|
3277 |
|
---|
3278 | head = an->target;
|
---|
3279 | np = NCAR(head);
|
---|
3280 | swap_node(node, head);
|
---|
3281 | NCAR(node) = head;
|
---|
3282 | NANCHOR(head)->target = np;
|
---|
3283 |
|
---|
3284 | np = node;
|
---|
3285 | while ((np = NCDR(np)) != NULL_NODE) {
|
---|
3286 | insert_node = onig_node_new_anchor(anc_type);
|
---|
3287 | CHECK_NULL_RETURN_MEMERR(insert_node);
|
---|
3288 | NANCHOR(insert_node)->target = NCAR(np);
|
---|
3289 | NCAR(np) = insert_node;
|
---|
3290 | }
|
---|
3291 |
|
---|
3292 | if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
|
---|
3293 | np = node;
|
---|
3294 | do {
|
---|
3295 | SET_NTYPE(np, NT_LIST); /* alt -> list */
|
---|
3296 | } while ((np = NCDR(np)) != NULL_NODE);
|
---|
3297 | }
|
---|
3298 | return 0;
|
---|
3299 | }
|
---|
3300 |
|
---|
3301 | static int
|
---|
3302 | setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
|
---|
3303 | {
|
---|
3304 | int r, len;
|
---|
3305 | AnchorNode* an = NANCHOR(node);
|
---|
3306 |
|
---|
3307 | r = get_char_length_tree(an->target, reg, &len);
|
---|
3308 | if (r == 0)
|
---|
3309 | an->char_len = len;
|
---|
3310 | else if (r == GET_CHAR_LEN_VARLEN)
|
---|
3311 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
3312 | else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
|
---|
3313 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
|
---|
3314 | r = divide_look_behind_alternatives(node);
|
---|
3315 | else
|
---|
3316 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
3317 | }
|
---|
3318 |
|
---|
3319 | return r;
|
---|
3320 | }
|
---|
3321 |
|
---|
3322 | static int
|
---|
3323 | next_setup(Node* node, Node* next_node, regex_t* reg)
|
---|
3324 | {
|
---|
3325 | int type;
|
---|
3326 |
|
---|
3327 | retry:
|
---|
3328 | type = NTYPE(node);
|
---|
3329 | if (type == NT_QTFR) {
|
---|
3330 | QtfrNode* qn = NQTFR(node);
|
---|
3331 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
|
---|
3332 | #ifdef USE_QTFR_PEEK_NEXT
|
---|
3333 | Node* n = get_head_value_node(next_node, 1, reg);
|
---|
3334 | /* '\0': for UTF-16BE etc... */
|
---|
3335 | if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
|
---|
3336 | qn->next_head_exact = n;
|
---|
3337 | }
|
---|
3338 | #endif
|
---|
3339 | /* automatic possessification a*b ==> (?>a*)b */
|
---|
3340 | if (qn->lower <= 1) {
|
---|
3341 | int ttype = NTYPE(qn->target);
|
---|
3342 | if (IS_NODE_TYPE_SIMPLE(ttype)) {
|
---|
3343 | Node *x, *y;
|
---|
3344 | x = get_head_value_node(qn->target, 0, reg);
|
---|
3345 | if (IS_NOT_NULL(x)) {
|
---|
3346 | y = get_head_value_node(next_node, 0, reg);
|
---|
3347 | if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
|
---|
3348 | Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
|
---|
3349 | CHECK_NULL_RETURN_MEMERR(en);
|
---|
3350 | SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
3351 | swap_node(node, en);
|
---|
3352 | NENCLOSE(node)->target = en;
|
---|
3353 | }
|
---|
3354 | }
|
---|
3355 | }
|
---|
3356 | }
|
---|
3357 | }
|
---|
3358 | }
|
---|
3359 | else if (type == NT_ENCLOSE) {
|
---|
3360 | EncloseNode* en = NENCLOSE(node);
|
---|
3361 | if (en->type == ENCLOSE_MEMORY) {
|
---|
3362 | node = en->target;
|
---|
3363 | goto retry;
|
---|
3364 | }
|
---|
3365 | }
|
---|
3366 | return 0;
|
---|
3367 | }
|
---|
3368 |
|
---|
3369 |
|
---|
3370 | static int
|
---|
3371 | update_string_node_case_fold(regex_t* reg, Node *node)
|
---|
3372 | {
|
---|
3373 | UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
3374 | UChar *sbuf, *ebuf, *sp;
|
---|
3375 | int r, i, len;
|
---|
3376 | OnigDistance sbuf_size;
|
---|
3377 | StrNode* sn = NSTR(node);
|
---|
3378 |
|
---|
3379 | end = sn->end;
|
---|
3380 | sbuf_size = (end - sn->s) * 2;
|
---|
3381 | sbuf = (UChar* )xmalloc(sbuf_size);
|
---|
3382 | CHECK_NULL_RETURN_MEMERR(sbuf);
|
---|
3383 | ebuf = sbuf + sbuf_size;
|
---|
3384 |
|
---|
3385 | sp = sbuf;
|
---|
3386 | p = sn->s;
|
---|
3387 | while (p < end) {
|
---|
3388 | len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
|
---|
3389 | for (i = 0; i < len; i++) {
|
---|
3390 | if (sp >= ebuf) {
|
---|
3391 | UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
|
---|
3392 | if (IS_NULL(p)) {
|
---|
3393 | xfree(sbuf);
|
---|
3394 | return ONIGERR_MEMORY;
|
---|
3395 | }
|
---|
3396 | sbuf = p;
|
---|
3397 | sp = sbuf + sbuf_size;
|
---|
3398 | sbuf_size *= 2;
|
---|
3399 | ebuf = sbuf + sbuf_size;
|
---|
3400 | }
|
---|
3401 |
|
---|
3402 | *sp++ = buf[i];
|
---|
3403 | }
|
---|
3404 | }
|
---|
3405 |
|
---|
3406 | r = onig_node_str_set(node, sbuf, sp);
|
---|
3407 |
|
---|
3408 | xfree(sbuf);
|
---|
3409 | return r;
|
---|
3410 | }
|
---|
3411 |
|
---|
3412 | static int
|
---|
3413 | expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
|
---|
3414 | regex_t* reg)
|
---|
3415 | {
|
---|
3416 | int r;
|
---|
3417 | Node *node;
|
---|
3418 |
|
---|
3419 | node = onig_node_new_str(s, end);
|
---|
3420 | if (IS_NULL(node)) return ONIGERR_MEMORY;
|
---|
3421 |
|
---|
3422 | r = update_string_node_case_fold(reg, node);
|
---|
3423 | if (r != 0) {
|
---|
3424 | onig_node_free(node);
|
---|
3425 | return r;
|
---|
3426 | }
|
---|
3427 |
|
---|
3428 | NSTRING_SET_AMBIG(node);
|
---|
3429 | NSTRING_SET_DONT_GET_OPT_INFO(node);
|
---|
3430 | *rnode = node;
|
---|
3431 | return 0;
|
---|
3432 | }
|
---|
3433 |
|
---|
3434 | static int
|
---|
3435 | is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
|
---|
3436 | int slen)
|
---|
3437 | {
|
---|
3438 | int i;
|
---|
3439 |
|
---|
3440 | for (i = 0; i < item_num; i++) {
|
---|
3441 | if (items[i].byte_len != slen) {
|
---|
3442 | return 1;
|
---|
3443 | }
|
---|
3444 | if (items[i].code_len != 1) {
|
---|
3445 | return 1;
|
---|
3446 | }
|
---|
3447 | }
|
---|
3448 | return 0;
|
---|
3449 | }
|
---|
3450 |
|
---|
3451 | static int
|
---|
3452 | expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
---|
3453 | UChar *p, int slen, UChar *end,
|
---|
3454 | regex_t* reg, Node **rnode)
|
---|
3455 | {
|
---|
3456 | int r, i, j, len, varlen;
|
---|
3457 | Node *anode, *var_anode, *snode, *xnode, *an;
|
---|
3458 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
3459 |
|
---|
3460 | *rnode = var_anode = NULL_NODE;
|
---|
3461 |
|
---|
3462 | varlen = 0;
|
---|
3463 | for (i = 0; i < item_num; i++) {
|
---|
3464 | if (items[i].byte_len != slen) {
|
---|
3465 | varlen = 1;
|
---|
3466 | break;
|
---|
3467 | }
|
---|
3468 | }
|
---|
3469 |
|
---|
3470 | if (varlen != 0) {
|
---|
3471 | *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3472 | if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
|
---|
3473 |
|
---|
3474 | xnode = onig_node_new_list(NULL, NULL);
|
---|
3475 | if (IS_NULL(xnode)) goto mem_err;
|
---|
3476 | NCAR(var_anode) = xnode;
|
---|
3477 |
|
---|
3478 | anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3479 | if (IS_NULL(anode)) goto mem_err;
|
---|
3480 | NCAR(xnode) = anode;
|
---|
3481 | }
|
---|
3482 | else {
|
---|
3483 | *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3484 | if (IS_NULL(anode)) return ONIGERR_MEMORY;
|
---|
3485 | }
|
---|
3486 |
|
---|
3487 | snode = onig_node_new_str(p, p + slen);
|
---|
3488 | if (IS_NULL(snode)) goto mem_err;
|
---|
3489 |
|
---|
3490 | NCAR(anode) = snode;
|
---|
3491 |
|
---|
3492 | for (i = 0; i < item_num; i++) {
|
---|
3493 | snode = onig_node_new_str(NULL, NULL);
|
---|
3494 | if (IS_NULL(snode)) goto mem_err;
|
---|
3495 |
|
---|
3496 | for (j = 0; j < items[i].code_len; j++) {
|
---|
3497 | len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
|
---|
3498 | if (len < 0) {
|
---|
3499 | r = len;
|
---|
3500 | goto mem_err2;
|
---|
3501 | }
|
---|
3502 |
|
---|
3503 | r = onig_node_str_cat(snode, buf, buf + len);
|
---|
3504 | if (r != 0) goto mem_err2;
|
---|
3505 | }
|
---|
3506 |
|
---|
3507 | an = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3508 | if (IS_NULL(an)) {
|
---|
3509 | goto mem_err2;
|
---|
3510 | }
|
---|
3511 |
|
---|
3512 | if (items[i].byte_len != slen) {
|
---|
3513 | Node *rem;
|
---|
3514 | UChar *q = p + items[i].byte_len;
|
---|
3515 |
|
---|
3516 | if (q < end) {
|
---|
3517 | r = expand_case_fold_make_rem_string(&rem, q, end, reg);
|
---|
3518 | if (r != 0) {
|
---|
3519 | onig_node_free(an);
|
---|
3520 | goto mem_err2;
|
---|
3521 | }
|
---|
3522 |
|
---|
3523 | xnode = onig_node_list_add(NULL_NODE, snode);
|
---|
3524 | if (IS_NULL(xnode)) {
|
---|
3525 | onig_node_free(an);
|
---|
3526 | onig_node_free(rem);
|
---|
3527 | goto mem_err2;
|
---|
3528 | }
|
---|
3529 | if (IS_NULL(onig_node_list_add(xnode, rem))) {
|
---|
3530 | onig_node_free(an);
|
---|
3531 | onig_node_free(xnode);
|
---|
3532 | onig_node_free(rem);
|
---|
3533 | goto mem_err;
|
---|
3534 | }
|
---|
3535 |
|
---|
3536 | NCAR(an) = xnode;
|
---|
3537 | }
|
---|
3538 | else {
|
---|
3539 | NCAR(an) = snode;
|
---|
3540 | }
|
---|
3541 |
|
---|
3542 | NCDR(var_anode) = an;
|
---|
3543 | var_anode = an;
|
---|
3544 | }
|
---|
3545 | else {
|
---|
3546 | NCAR(an) = snode;
|
---|
3547 | NCDR(anode) = an;
|
---|
3548 | anode = an;
|
---|
3549 | }
|
---|
3550 | }
|
---|
3551 |
|
---|
3552 | return varlen;
|
---|
3553 |
|
---|
3554 | mem_err2:
|
---|
3555 | onig_node_free(snode);
|
---|
3556 |
|
---|
3557 | mem_err:
|
---|
3558 | onig_node_free(*rnode);
|
---|
3559 |
|
---|
3560 | return ONIGERR_MEMORY;
|
---|
3561 | }
|
---|
3562 |
|
---|
3563 | static int
|
---|
3564 | expand_case_fold_string(Node* node, regex_t* reg)
|
---|
3565 | {
|
---|
3566 | #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
|
---|
3567 |
|
---|
3568 | int r, n, len, alt_num;
|
---|
3569 | int varlen = 0;
|
---|
3570 | UChar *start, *end, *p;
|
---|
3571 | Node *top_root, *root, *snode, *prev_node;
|
---|
3572 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
3573 | StrNode* sn = NSTR(node);
|
---|
3574 |
|
---|
3575 | if (NSTRING_IS_AMBIG(node)) return 0;
|
---|
3576 |
|
---|
3577 | start = sn->s;
|
---|
3578 | end = sn->end;
|
---|
3579 | if (start >= end) return 0;
|
---|
3580 |
|
---|
3581 | r = 0;
|
---|
3582 | top_root = root = prev_node = snode = NULL_NODE;
|
---|
3583 | alt_num = 1;
|
---|
3584 | p = start;
|
---|
3585 | while (p < end) {
|
---|
3586 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
|
---|
3587 | p, end, items);
|
---|
3588 | if (n < 0) {
|
---|
3589 | r = n;
|
---|
3590 | goto err;
|
---|
3591 | }
|
---|
3592 |
|
---|
3593 | len = enclen(reg->enc, p, end);
|
---|
3594 |
|
---|
3595 | varlen = is_case_fold_variable_len(n, items, len);
|
---|
3596 | if (n == 0 || varlen == 0) {
|
---|
3597 | if (IS_NULL(snode)) {
|
---|
3598 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
3599 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
3600 | if (IS_NULL(root)) {
|
---|
3601 | onig_node_free(prev_node);
|
---|
3602 | goto mem_err;
|
---|
3603 | }
|
---|
3604 | }
|
---|
3605 |
|
---|
3606 | prev_node = snode = onig_node_new_str(NULL, NULL);
|
---|
3607 | if (IS_NULL(snode)) goto mem_err;
|
---|
3608 | if (IS_NOT_NULL(root)) {
|
---|
3609 | if (IS_NULL(onig_node_list_add(root, snode))) {
|
---|
3610 | onig_node_free(snode);
|
---|
3611 | goto mem_err;
|
---|
3612 | }
|
---|
3613 | }
|
---|
3614 | }
|
---|
3615 |
|
---|
3616 | r = onig_node_str_cat(snode, p, p + len);
|
---|
3617 | if (r != 0) goto err;
|
---|
3618 | }
|
---|
3619 | else {
|
---|
3620 | alt_num *= (n + 1);
|
---|
3621 | if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
|
---|
3622 |
|
---|
3623 | if (IS_NOT_NULL(snode)) {
|
---|
3624 | r = update_string_node_case_fold(reg, snode);
|
---|
3625 | if (r == 0) {
|
---|
3626 | NSTRING_SET_AMBIG(snode);
|
---|
3627 | }
|
---|
3628 | }
|
---|
3629 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
3630 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
3631 | if (IS_NULL(root)) {
|
---|
3632 | onig_node_free(prev_node);
|
---|
3633 | goto mem_err;
|
---|
3634 | }
|
---|
3635 | }
|
---|
3636 |
|
---|
3637 | r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
|
---|
3638 | if (r < 0) goto mem_err;
|
---|
3639 | if (r == 1) {
|
---|
3640 | if (IS_NULL(root)) {
|
---|
3641 | top_root = prev_node;
|
---|
3642 | }
|
---|
3643 | else {
|
---|
3644 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
3645 | onig_node_free(prev_node);
|
---|
3646 | goto mem_err;
|
---|
3647 | }
|
---|
3648 | }
|
---|
3649 |
|
---|
3650 | root = NCAR(prev_node);
|
---|
3651 | }
|
---|
3652 | else { /* r == 0 */
|
---|
3653 | if (IS_NOT_NULL(root)) {
|
---|
3654 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
3655 | onig_node_free(prev_node);
|
---|
3656 | goto mem_err;
|
---|
3657 | }
|
---|
3658 | }
|
---|
3659 | }
|
---|
3660 |
|
---|
3661 | snode = NULL_NODE;
|
---|
3662 | }
|
---|
3663 |
|
---|
3664 | p += len;
|
---|
3665 | }
|
---|
3666 | if (IS_NOT_NULL(snode)) {
|
---|
3667 | r = update_string_node_case_fold(reg, snode);
|
---|
3668 | if (r == 0) {
|
---|
3669 | NSTRING_SET_AMBIG(snode);
|
---|
3670 | }
|
---|
3671 | }
|
---|
3672 |
|
---|
3673 | if (p < end) {
|
---|
3674 | Node *srem;
|
---|
3675 |
|
---|
3676 | r = expand_case_fold_make_rem_string(&srem, p, end, reg);
|
---|
3677 | if (r != 0) goto mem_err;
|
---|
3678 |
|
---|
3679 | if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
|
---|
3680 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
3681 | if (IS_NULL(root)) {
|
---|
3682 | onig_node_free(srem);
|
---|
3683 | onig_node_free(prev_node);
|
---|
3684 | goto mem_err;
|
---|
3685 | }
|
---|
3686 | }
|
---|
3687 |
|
---|
3688 | if (IS_NULL(root)) {
|
---|
3689 | prev_node = srem;
|
---|
3690 | }
|
---|
3691 | else {
|
---|
3692 | if (IS_NULL(onig_node_list_add(root, srem))) {
|
---|
3693 | onig_node_free(srem);
|
---|
3694 | goto mem_err;
|
---|
3695 | }
|
---|
3696 | }
|
---|
3697 | }
|
---|
3698 |
|
---|
3699 | /* ending */
|
---|
3700 | top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
|
---|
3701 | swap_node(node, top_root);
|
---|
3702 | onig_node_free(top_root);
|
---|
3703 | return 0;
|
---|
3704 |
|
---|
3705 | mem_err:
|
---|
3706 | r = ONIGERR_MEMORY;
|
---|
3707 |
|
---|
3708 | err:
|
---|
3709 | onig_node_free(top_root);
|
---|
3710 | return r;
|
---|
3711 | }
|
---|
3712 |
|
---|
3713 |
|
---|
3714 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
3715 |
|
---|
3716 | # define CEC_THRES_NUM_BIG_REPEAT 512
|
---|
3717 | # define CEC_INFINITE_NUM 0x7fffffff
|
---|
3718 |
|
---|
3719 | # define CEC_IN_INFINITE_REPEAT (1<<0)
|
---|
3720 | # define CEC_IN_FINITE_REPEAT (1<<1)
|
---|
3721 | # define CEC_CONT_BIG_REPEAT (1<<2)
|
---|
3722 |
|
---|
3723 | static int
|
---|
3724 | setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
---|
3725 | {
|
---|
3726 | int type;
|
---|
3727 | int r = state;
|
---|
3728 |
|
---|
3729 | type = NTYPE(node);
|
---|
3730 | switch (type) {
|
---|
3731 | case NT_LIST:
|
---|
3732 | {
|
---|
3733 | Node* prev = NULL_NODE;
|
---|
3734 | do {
|
---|
3735 | r = setup_comb_exp_check(NCAR(node), r, env);
|
---|
3736 | prev = NCAR(node);
|
---|
3737 | } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3738 | }
|
---|
3739 | break;
|
---|
3740 |
|
---|
3741 | case NT_ALT:
|
---|
3742 | {
|
---|
3743 | int ret;
|
---|
3744 | do {
|
---|
3745 | ret = setup_comb_exp_check(NCAR(node), state, env);
|
---|
3746 | r |= ret;
|
---|
3747 | } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3748 | }
|
---|
3749 | break;
|
---|
3750 |
|
---|
3751 | case NT_QTFR:
|
---|
3752 | {
|
---|
3753 | int child_state = state;
|
---|
3754 | int add_state = 0;
|
---|
3755 | QtfrNode* qn = NQTFR(node);
|
---|
3756 | Node* target = qn->target;
|
---|
3757 | int var_num;
|
---|
3758 |
|
---|
3759 | if (! IS_REPEAT_INFINITE(qn->upper)) {
|
---|
3760 | if (qn->upper > 1) {
|
---|
3761 | /* {0,1}, {1,1} are allowed */
|
---|
3762 | child_state |= CEC_IN_FINITE_REPEAT;
|
---|
3763 |
|
---|
3764 | /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
|
---|
3765 | if (env->backrefed_mem == 0) {
|
---|
3766 | if (NTYPE(qn->target) == NT_ENCLOSE) {
|
---|
3767 | EncloseNode* en = NENCLOSE(qn->target);
|
---|
3768 | if (en->type == ENCLOSE_MEMORY) {
|
---|
3769 | if (NTYPE(en->target) == NT_QTFR) {
|
---|
3770 | QtfrNode* q = NQTFR(en->target);
|
---|
3771 | if (IS_REPEAT_INFINITE(q->upper)
|
---|
3772 | && q->greedy == qn->greedy) {
|
---|
3773 | qn->upper = (qn->lower == 0 ? 1 : qn->lower);
|
---|
3774 | if (qn->upper == 1)
|
---|
3775 | child_state = state;
|
---|
3776 | }
|
---|
3777 | }
|
---|
3778 | }
|
---|
3779 | }
|
---|
3780 | }
|
---|
3781 | }
|
---|
3782 | }
|
---|
3783 |
|
---|
3784 | if (state & CEC_IN_FINITE_REPEAT) {
|
---|
3785 | qn->comb_exp_check_num = -1;
|
---|
3786 | }
|
---|
3787 | else {
|
---|
3788 | if (IS_REPEAT_INFINITE(qn->upper)) {
|
---|
3789 | var_num = CEC_INFINITE_NUM;
|
---|
3790 | child_state |= CEC_IN_INFINITE_REPEAT;
|
---|
3791 | }
|
---|
3792 | else {
|
---|
3793 | var_num = qn->upper - qn->lower;
|
---|
3794 | }
|
---|
3795 |
|
---|
3796 | if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
|
---|
3797 | add_state |= CEC_CONT_BIG_REPEAT;
|
---|
3798 |
|
---|
3799 | if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
|
---|
3800 | ((state & CEC_CONT_BIG_REPEAT) != 0 &&
|
---|
3801 | var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
|
---|
3802 | if (qn->comb_exp_check_num == 0) {
|
---|
3803 | env->num_comb_exp_check++;
|
---|
3804 | qn->comb_exp_check_num = env->num_comb_exp_check;
|
---|
3805 | if (env->curr_max_regnum > env->comb_exp_max_regnum)
|
---|
3806 | env->comb_exp_max_regnum = env->curr_max_regnum;
|
---|
3807 | }
|
---|
3808 | }
|
---|
3809 | }
|
---|
3810 |
|
---|
3811 | r = setup_comb_exp_check(target, child_state, env);
|
---|
3812 | r |= add_state;
|
---|
3813 | }
|
---|
3814 | break;
|
---|
3815 |
|
---|
3816 | case NT_ENCLOSE:
|
---|
3817 | {
|
---|
3818 | EncloseNode* en = NENCLOSE(node);
|
---|
3819 |
|
---|
3820 | switch (en->type) {
|
---|
3821 | case ENCLOSE_MEMORY:
|
---|
3822 | {
|
---|
3823 | if (env->curr_max_regnum < en->regnum)
|
---|
3824 | env->curr_max_regnum = en->regnum;
|
---|
3825 |
|
---|
3826 | r = setup_comb_exp_check(en->target, state, env);
|
---|
3827 | }
|
---|
3828 | break;
|
---|
3829 |
|
---|
3830 | default:
|
---|
3831 | r = setup_comb_exp_check(en->target, state, env);
|
---|
3832 | break;
|
---|
3833 | }
|
---|
3834 | }
|
---|
3835 | break;
|
---|
3836 |
|
---|
3837 | # ifdef USE_SUBEXP_CALL
|
---|
3838 | case NT_CALL:
|
---|
3839 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
3840 | env->has_recursion = 1;
|
---|
3841 | else
|
---|
3842 | r = setup_comb_exp_check(NCALL(node)->target, state, env);
|
---|
3843 | break;
|
---|
3844 | # endif
|
---|
3845 |
|
---|
3846 | default:
|
---|
3847 | break;
|
---|
3848 | }
|
---|
3849 |
|
---|
3850 | return r;
|
---|
3851 | }
|
---|
3852 | #endif
|
---|
3853 |
|
---|
3854 | #define IN_ALT (1<<0)
|
---|
3855 | #define IN_NOT (1<<1)
|
---|
3856 | #define IN_REPEAT (1<<2)
|
---|
3857 | #define IN_VAR_REPEAT (1<<3)
|
---|
3858 | #define IN_CALL (1<<4)
|
---|
3859 | #define IN_RECCALL (1<<5)
|
---|
3860 |
|
---|
3861 | /* setup_tree does the following work.
|
---|
3862 | 1. check empty loop. (set qn->target_empty_info)
|
---|
3863 | 2. expand ignore-case in char class.
|
---|
3864 | 3. set memory status bit flags. (reg->mem_stats)
|
---|
3865 | 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
|
---|
3866 | 5. find invalid patterns in look-behind.
|
---|
3867 | 6. expand repeated string.
|
---|
3868 | */
|
---|
3869 | static int
|
---|
3870 | setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
---|
3871 | {
|
---|
3872 | int type;
|
---|
3873 | int r = 0;
|
---|
3874 |
|
---|
3875 | restart:
|
---|
3876 | type = NTYPE(node);
|
---|
3877 | switch (type) {
|
---|
3878 | case NT_LIST:
|
---|
3879 | {
|
---|
3880 | Node* prev = NULL_NODE;
|
---|
3881 | do {
|
---|
3882 | r = setup_tree(NCAR(node), reg, state, env);
|
---|
3883 | if (IS_NOT_NULL(prev) && r == 0) {
|
---|
3884 | r = next_setup(prev, NCAR(node), reg);
|
---|
3885 | }
|
---|
3886 | prev = NCAR(node);
|
---|
3887 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3888 | }
|
---|
3889 | break;
|
---|
3890 |
|
---|
3891 | case NT_ALT:
|
---|
3892 | do {
|
---|
3893 | r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
|
---|
3894 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3895 | break;
|
---|
3896 |
|
---|
3897 | case NT_CCLASS:
|
---|
3898 | break;
|
---|
3899 |
|
---|
3900 | case NT_STR:
|
---|
3901 | if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
|
---|
3902 | r = expand_case_fold_string(node, reg);
|
---|
3903 | }
|
---|
3904 | break;
|
---|
3905 |
|
---|
3906 | case NT_CTYPE:
|
---|
3907 | case NT_CANY:
|
---|
3908 | break;
|
---|
3909 |
|
---|
3910 | #ifdef USE_SUBEXP_CALL
|
---|
3911 | case NT_CALL:
|
---|
3912 | break;
|
---|
3913 | #endif
|
---|
3914 |
|
---|
3915 | case NT_BREF:
|
---|
3916 | {
|
---|
3917 | int i;
|
---|
3918 | int* p;
|
---|
3919 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
3920 | BRefNode* br = NBREF(node);
|
---|
3921 | p = BACKREFS_P(br);
|
---|
3922 | for (i = 0; i < br->back_num; i++) {
|
---|
3923 | if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
3924 | BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
|
---|
3925 | BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
|
---|
3926 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
3927 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
3928 | BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
|
---|
3929 | }
|
---|
3930 | #endif
|
---|
3931 | SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
|
---|
3932 | }
|
---|
3933 | }
|
---|
3934 | break;
|
---|
3935 |
|
---|
3936 | case NT_QTFR:
|
---|
3937 | {
|
---|
3938 | OnigDistance d;
|
---|
3939 | QtfrNode* qn = NQTFR(node);
|
---|
3940 | Node* target = qn->target;
|
---|
3941 |
|
---|
3942 | if ((state & IN_REPEAT) != 0) {
|
---|
3943 | qn->state |= NST_IN_REPEAT;
|
---|
3944 | }
|
---|
3945 |
|
---|
3946 | if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
|
---|
3947 | r = get_min_match_length(target, &d, env);
|
---|
3948 | if (r) break;
|
---|
3949 | if (d == 0) {
|
---|
3950 | qn->target_empty_info = NQ_TARGET_IS_EMPTY;
|
---|
3951 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
3952 | r = quantifiers_memory_node_info(target);
|
---|
3953 | if (r < 0) break;
|
---|
3954 | if (r > 0) {
|
---|
3955 | qn->target_empty_info = r;
|
---|
3956 | }
|
---|
3957 | #endif
|
---|
3958 | #if 0
|
---|
3959 | r = get_max_match_length(target, &d, env);
|
---|
3960 | if (r == 0 && d == 0) {
|
---|
3961 | /* ()* ==> ()?, ()+ ==> () */
|
---|
3962 | qn->upper = 1;
|
---|
3963 | if (qn->lower > 1) qn->lower = 1;
|
---|
3964 | if (NTYPE(target) == NT_STR) {
|
---|
3965 | qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
|
---|
3966 | }
|
---|
3967 | }
|
---|
3968 | #endif
|
---|
3969 | }
|
---|
3970 | }
|
---|
3971 |
|
---|
3972 | state |= IN_REPEAT;
|
---|
3973 | if (qn->lower != qn->upper)
|
---|
3974 | state |= IN_VAR_REPEAT;
|
---|
3975 | r = setup_tree(target, reg, state, env);
|
---|
3976 | if (r) break;
|
---|
3977 |
|
---|
3978 | /* expand string */
|
---|
3979 | #define EXPAND_STRING_MAX_LENGTH 100
|
---|
3980 | if (NTYPE(target) == NT_STR) {
|
---|
3981 | if (qn->lower > 1) {
|
---|
3982 | int i, n = qn->lower;
|
---|
3983 | OnigDistance len = NSTRING_LEN(target);
|
---|
3984 | StrNode* sn = NSTR(target);
|
---|
3985 | Node* np;
|
---|
3986 |
|
---|
3987 | np = onig_node_new_str(sn->s, sn->end);
|
---|
3988 | if (IS_NULL(np)) return ONIGERR_MEMORY;
|
---|
3989 | NSTR(np)->flag = sn->flag;
|
---|
3990 |
|
---|
3991 | for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
|
---|
3992 | r = onig_node_str_cat(np, sn->s, sn->end);
|
---|
3993 | if (r) {
|
---|
3994 | onig_node_free(np);
|
---|
3995 | return r;
|
---|
3996 | }
|
---|
3997 | }
|
---|
3998 | if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
|
---|
3999 | Node *np1, *np2;
|
---|
4000 |
|
---|
4001 | qn->lower -= i;
|
---|
4002 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
4003 | qn->upper -= i;
|
---|
4004 |
|
---|
4005 | np1 = onig_node_new_list(np, NULL);
|
---|
4006 | if (IS_NULL(np1)) {
|
---|
4007 | onig_node_free(np);
|
---|
4008 | return ONIGERR_MEMORY;
|
---|
4009 | }
|
---|
4010 | swap_node(np1, node);
|
---|
4011 | np2 = onig_node_list_add(node, np1);
|
---|
4012 | if (IS_NULL(np2)) {
|
---|
4013 | onig_node_free(np1);
|
---|
4014 | return ONIGERR_MEMORY;
|
---|
4015 | }
|
---|
4016 | }
|
---|
4017 | else {
|
---|
4018 | swap_node(np, node);
|
---|
4019 | onig_node_free(np);
|
---|
4020 | }
|
---|
4021 | break; /* break case NT_QTFR: */
|
---|
4022 | }
|
---|
4023 | }
|
---|
4024 |
|
---|
4025 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
4026 | if (qn->greedy && (qn->target_empty_info != 0)) {
|
---|
4027 | if (NTYPE(target) == NT_QTFR) {
|
---|
4028 | QtfrNode* tqn = NQTFR(target);
|
---|
4029 | if (IS_NOT_NULL(tqn->head_exact)) {
|
---|
4030 | qn->head_exact = tqn->head_exact;
|
---|
4031 | tqn->head_exact = NULL;
|
---|
4032 | }
|
---|
4033 | }
|
---|
4034 | else {
|
---|
4035 | qn->head_exact = get_head_value_node(qn->target, 1, reg);
|
---|
4036 | }
|
---|
4037 | }
|
---|
4038 | #endif
|
---|
4039 | }
|
---|
4040 | break;
|
---|
4041 |
|
---|
4042 | case NT_ENCLOSE:
|
---|
4043 | {
|
---|
4044 | EncloseNode* en = NENCLOSE(node);
|
---|
4045 |
|
---|
4046 | switch (en->type) {
|
---|
4047 | case ENCLOSE_OPTION:
|
---|
4048 | {
|
---|
4049 | OnigOptionType options = reg->options;
|
---|
4050 | reg->options = NENCLOSE(node)->option;
|
---|
4051 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
4052 | reg->options = options;
|
---|
4053 | }
|
---|
4054 | break;
|
---|
4055 |
|
---|
4056 | case ENCLOSE_MEMORY:
|
---|
4057 | if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
|
---|
4058 | BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
|
---|
4059 | /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
|
---|
4060 | }
|
---|
4061 | if (IS_ENCLOSE_CALLED(en))
|
---|
4062 | state |= IN_CALL;
|
---|
4063 | if (IS_ENCLOSE_RECURSION(en))
|
---|
4064 | state |= IN_RECCALL;
|
---|
4065 | else if ((state & IN_RECCALL) != 0)
|
---|
4066 | SET_CALL_RECURSION(node);
|
---|
4067 | r = setup_tree(en->target, reg, state, env);
|
---|
4068 | break;
|
---|
4069 |
|
---|
4070 | case ENCLOSE_STOP_BACKTRACK:
|
---|
4071 | {
|
---|
4072 | Node* target = en->target;
|
---|
4073 | r = setup_tree(target, reg, state, env);
|
---|
4074 | if (NTYPE(target) == NT_QTFR) {
|
---|
4075 | QtfrNode* tqn = NQTFR(target);
|
---|
4076 | if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
|
---|
4077 | tqn->greedy != 0) { /* (?>a*), a*+ etc... */
|
---|
4078 | int qtype = NTYPE(tqn->target);
|
---|
4079 | if (IS_NODE_TYPE_SIMPLE(qtype))
|
---|
4080 | SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
4081 | }
|
---|
4082 | }
|
---|
4083 | }
|
---|
4084 | break;
|
---|
4085 |
|
---|
4086 | case ENCLOSE_CONDITION:
|
---|
4087 | #ifdef USE_NAMED_GROUP
|
---|
4088 | if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
|
---|
4089 | env->num_named > 0 &&
|
---|
4090 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
4091 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
4092 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
4093 | }
|
---|
4094 | #endif
|
---|
4095 | if (NENCLOSE(node)->regnum > env->num_mem)
|
---|
4096 | return ONIGERR_INVALID_BACKREF;
|
---|
4097 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
4098 | break;
|
---|
4099 |
|
---|
4100 | case ENCLOSE_ABSENT:
|
---|
4101 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
4102 | break;
|
---|
4103 | }
|
---|
4104 | }
|
---|
4105 | break;
|
---|
4106 |
|
---|
4107 | case NT_ANCHOR:
|
---|
4108 | {
|
---|
4109 | AnchorNode* an = NANCHOR(node);
|
---|
4110 |
|
---|
4111 | switch (an->type) {
|
---|
4112 | case ANCHOR_PREC_READ:
|
---|
4113 | r = setup_tree(an->target, reg, state, env);
|
---|
4114 | break;
|
---|
4115 | case ANCHOR_PREC_READ_NOT:
|
---|
4116 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
4117 | break;
|
---|
4118 |
|
---|
4119 | /* allowed node types in look-behind */
|
---|
4120 | #define ALLOWED_TYPE_IN_LB \
|
---|
4121 | ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
|
---|
4122 | BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
|
---|
4123 |
|
---|
4124 | #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
|
---|
4125 | #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
|
---|
4126 |
|
---|
4127 | #define ALLOWED_ANCHOR_IN_LB \
|
---|
4128 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
4129 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
4130 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
4131 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
4132 | #define ALLOWED_ANCHOR_IN_LB_NOT \
|
---|
4133 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
4134 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
4135 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
4136 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
4137 |
|
---|
4138 | case ANCHOR_LOOK_BEHIND:
|
---|
4139 | {
|
---|
4140 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
4141 | ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
|
---|
4142 | if (r < 0) return r;
|
---|
4143 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
4144 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
4145 | r = setup_tree(an->target, reg, state, env);
|
---|
4146 | if (r != 0) return r;
|
---|
4147 | r = setup_look_behind(node, reg, env);
|
---|
4148 | }
|
---|
4149 | break;
|
---|
4150 |
|
---|
4151 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
4152 | {
|
---|
4153 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
4154 | ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
|
---|
4155 | if (r < 0) return r;
|
---|
4156 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
4157 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
4158 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
4159 | if (r != 0) return r;
|
---|
4160 | r = setup_look_behind(node, reg, env);
|
---|
4161 | }
|
---|
4162 | break;
|
---|
4163 | }
|
---|
4164 | }
|
---|
4165 | break;
|
---|
4166 |
|
---|
4167 | default:
|
---|
4168 | break;
|
---|
4169 | }
|
---|
4170 |
|
---|
4171 | return r;
|
---|
4172 | }
|
---|
4173 |
|
---|
4174 | #ifndef USE_SUNDAY_QUICK_SEARCH
|
---|
4175 | /* set skip map for Boyer-Moore search */
|
---|
4176 | static int
|
---|
4177 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
4178 | UChar skip[], int** int_skip, int ignore_case)
|
---|
4179 | {
|
---|
4180 | OnigDistance i, len;
|
---|
4181 | int clen, flen, n, j, k;
|
---|
4182 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
4183 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
4184 | OnigEncoding enc = reg->enc;
|
---|
4185 |
|
---|
4186 | len = end - s;
|
---|
4187 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
4188 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
|
---|
4189 |
|
---|
4190 | n = 0;
|
---|
4191 | for (i = 0; i < len - 1; i += clen) {
|
---|
4192 | p = s + i;
|
---|
4193 | if (ignore_case)
|
---|
4194 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4195 | p, end, items);
|
---|
4196 | clen = enclen(enc, p, end);
|
---|
4197 | if (p + clen > end)
|
---|
4198 | clen = (int )(end - p);
|
---|
4199 |
|
---|
4200 | for (j = 0; j < n; j++) {
|
---|
4201 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4202 | return 1; /* different length isn't supported. */
|
---|
4203 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4204 | if (flen != clen)
|
---|
4205 | return 1; /* different length isn't supported. */
|
---|
4206 | }
|
---|
4207 | for (j = 0; j < clen; j++) {
|
---|
4208 | skip[s[i + j]] = (UChar )(len - 1 - i - j);
|
---|
4209 | for (k = 0; k < n; k++) {
|
---|
4210 | skip[buf[k][j]] = (UChar )(len - 1 - i - j);
|
---|
4211 | }
|
---|
4212 | }
|
---|
4213 | }
|
---|
4214 | }
|
---|
4215 | else {
|
---|
4216 | # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
|
---|
4217 | /* This should not happen. */
|
---|
4218 | return ONIGERR_TYPE_BUG;
|
---|
4219 | # else
|
---|
4220 | if (IS_NULL(*int_skip)) {
|
---|
4221 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
4222 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
4223 | }
|
---|
4224 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
|
---|
4225 |
|
---|
4226 | n = 0;
|
---|
4227 | for (i = 0; i < len - 1; i += clen) {
|
---|
4228 | p = s + i;
|
---|
4229 | if (ignore_case)
|
---|
4230 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4231 | p, end, items);
|
---|
4232 | clen = enclen(enc, p, end);
|
---|
4233 | if (p + clen > end)
|
---|
4234 | clen = (int )(end - p);
|
---|
4235 |
|
---|
4236 | for (j = 0; j < n; j++) {
|
---|
4237 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4238 | return 1; /* different length isn't supported. */
|
---|
4239 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4240 | if (flen != clen)
|
---|
4241 | return 1; /* different length isn't supported. */
|
---|
4242 | }
|
---|
4243 | for (j = 0; j < clen; j++) {
|
---|
4244 | (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
|
---|
4245 | for (k = 0; k < n; k++) {
|
---|
4246 | (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
|
---|
4247 | }
|
---|
4248 | }
|
---|
4249 | }
|
---|
4250 | # endif
|
---|
4251 | }
|
---|
4252 | return 0;
|
---|
4253 | }
|
---|
4254 |
|
---|
4255 | #else /* USE_SUNDAY_QUICK_SEARCH */
|
---|
4256 |
|
---|
4257 | /* set skip map for Sunday's quick search */
|
---|
4258 | static int
|
---|
4259 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
4260 | UChar skip[], int** int_skip, int ignore_case)
|
---|
4261 | {
|
---|
4262 | OnigDistance i, len;
|
---|
4263 | int clen, flen, n, j, k;
|
---|
4264 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
4265 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
4266 | OnigEncoding enc = reg->enc;
|
---|
4267 |
|
---|
4268 | len = end - s;
|
---|
4269 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
4270 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
|
---|
4271 |
|
---|
4272 | n = 0;
|
---|
4273 | for (i = 0; i < len; i += clen) {
|
---|
4274 | p = s + i;
|
---|
4275 | if (ignore_case)
|
---|
4276 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4277 | p, end, items);
|
---|
4278 | clen = enclen(enc, p, end);
|
---|
4279 | if (p + clen > end)
|
---|
4280 | clen = (int )(end - p);
|
---|
4281 |
|
---|
4282 | for (j = 0; j < n; j++) {
|
---|
4283 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4284 | return 1; /* different length isn't supported. */
|
---|
4285 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4286 | if (flen != clen)
|
---|
4287 | return 1; /* different length isn't supported. */
|
---|
4288 | }
|
---|
4289 | for (j = 0; j < clen; j++) {
|
---|
4290 | skip[s[i + j]] = (UChar )(len - i - j);
|
---|
4291 | for (k = 0; k < n; k++) {
|
---|
4292 | skip[buf[k][j]] = (UChar )(len - i - j);
|
---|
4293 | }
|
---|
4294 | }
|
---|
4295 | }
|
---|
4296 | }
|
---|
4297 | else {
|
---|
4298 | # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
|
---|
4299 | /* This should not happen. */
|
---|
4300 | return ONIGERR_TYPE_BUG;
|
---|
4301 | # else
|
---|
4302 | if (IS_NULL(*int_skip)) {
|
---|
4303 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
4304 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
4305 | }
|
---|
4306 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
|
---|
4307 |
|
---|
4308 | n = 0;
|
---|
4309 | for (i = 0; i < len; i += clen) {
|
---|
4310 | p = s + i;
|
---|
4311 | if (ignore_case)
|
---|
4312 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4313 | p, end, items);
|
---|
4314 | clen = enclen(enc, p, end);
|
---|
4315 | if (p + clen > end)
|
---|
4316 | clen = (int )(end - p);
|
---|
4317 |
|
---|
4318 | for (j = 0; j < n; j++) {
|
---|
4319 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4320 | return 1; /* different length isn't supported. */
|
---|
4321 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4322 | if (flen != clen)
|
---|
4323 | return 1; /* different length isn't supported. */
|
---|
4324 | }
|
---|
4325 | for (j = 0; j < clen; j++) {
|
---|
4326 | (*int_skip)[s[i + j]] = (int )(len - i - j);
|
---|
4327 | for (k = 0; k < n; k++) {
|
---|
4328 | (*int_skip)[buf[k][j]] = (int )(len - i - j);
|
---|
4329 | }
|
---|
4330 | }
|
---|
4331 | }
|
---|
4332 | # endif
|
---|
4333 | }
|
---|
4334 | return 0;
|
---|
4335 | }
|
---|
4336 | #endif /* USE_SUNDAY_QUICK_SEARCH */
|
---|
4337 |
|
---|
4338 | typedef struct {
|
---|
4339 | OnigDistance min; /* min byte length */
|
---|
4340 | OnigDistance max; /* max byte length */
|
---|
4341 | } MinMaxLen;
|
---|
4342 |
|
---|
4343 | typedef struct {
|
---|
4344 | MinMaxLen mmd;
|
---|
4345 | OnigEncoding enc;
|
---|
4346 | OnigOptionType options;
|
---|
4347 | OnigCaseFoldType case_fold_flag;
|
---|
4348 | ScanEnv* scan_env;
|
---|
4349 | } OptEnv;
|
---|
4350 |
|
---|
4351 | typedef struct {
|
---|
4352 | int left_anchor;
|
---|
4353 | int right_anchor;
|
---|
4354 | } OptAncInfo;
|
---|
4355 |
|
---|
4356 | typedef struct {
|
---|
4357 | MinMaxLen mmd; /* info position */
|
---|
4358 | OptAncInfo anc;
|
---|
4359 |
|
---|
4360 | int reach_end;
|
---|
4361 | int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
|
---|
4362 | int len;
|
---|
4363 | UChar s[OPT_EXACT_MAXLEN];
|
---|
4364 | } OptExactInfo;
|
---|
4365 |
|
---|
4366 | typedef struct {
|
---|
4367 | MinMaxLen mmd; /* info position */
|
---|
4368 | OptAncInfo anc;
|
---|
4369 |
|
---|
4370 | int value; /* weighted value */
|
---|
4371 | UChar map[ONIG_CHAR_TABLE_SIZE];
|
---|
4372 | } OptMapInfo;
|
---|
4373 |
|
---|
4374 | typedef struct {
|
---|
4375 | MinMaxLen len;
|
---|
4376 |
|
---|
4377 | OptAncInfo anc;
|
---|
4378 | OptExactInfo exb; /* boundary */
|
---|
4379 | OptExactInfo exm; /* middle */
|
---|
4380 | OptExactInfo expr; /* prec read (?=...) */
|
---|
4381 |
|
---|
4382 | OptMapInfo map; /* boundary */
|
---|
4383 | } NodeOptInfo;
|
---|
4384 |
|
---|
4385 |
|
---|
4386 | static int
|
---|
4387 | map_position_value(OnigEncoding enc, int i)
|
---|
4388 | {
|
---|
4389 | static const short int ByteValTable[] = {
|
---|
4390 | 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
|
---|
4391 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
---|
4392 | 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
|
---|
4393 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
|
---|
4394 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
4395 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
|
---|
4396 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
4397 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
---|
4398 | };
|
---|
4399 |
|
---|
4400 | if (i < numberof(ByteValTable)) {
|
---|
4401 | if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
|
---|
4402 | return 20;
|
---|
4403 | else
|
---|
4404 | return (int )ByteValTable[i];
|
---|
4405 | }
|
---|
4406 | else
|
---|
4407 | return 4; /* Take it easy. */
|
---|
4408 | }
|
---|
4409 |
|
---|
4410 | static int
|
---|
4411 | distance_value(MinMaxLen* mm)
|
---|
4412 | {
|
---|
4413 | /* 1000 / (min-max-dist + 1) */
|
---|
4414 | static const short int dist_vals[] = {
|
---|
4415 | 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
|
---|
4416 | 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
|
---|
4417 | 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
|
---|
4418 | 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
|
---|
4419 | 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
|
---|
4420 | 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
|
---|
4421 | 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
|
---|
4422 | 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
|
---|
4423 | 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
|
---|
4424 | 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
|
---|
4425 | };
|
---|
4426 |
|
---|
4427 | OnigDistance d;
|
---|
4428 |
|
---|
4429 | if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
|
---|
4430 |
|
---|
4431 | d = mm->max - mm->min;
|
---|
4432 | if (d < numberof(dist_vals))
|
---|
4433 | /* return dist_vals[d] * 16 / (mm->min + 12); */
|
---|
4434 | return (int )dist_vals[d];
|
---|
4435 | else
|
---|
4436 | return 1;
|
---|
4437 | }
|
---|
4438 |
|
---|
4439 | static int
|
---|
4440 | comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
|
---|
4441 | {
|
---|
4442 | if (v2 <= 0) return -1;
|
---|
4443 | if (v1 <= 0) return 1;
|
---|
4444 |
|
---|
4445 | v1 *= distance_value(d1);
|
---|
4446 | v2 *= distance_value(d2);
|
---|
4447 |
|
---|
4448 | if (v2 > v1) return 1;
|
---|
4449 | if (v2 < v1) return -1;
|
---|
4450 |
|
---|
4451 | if (d2->min < d1->min) return 1;
|
---|
4452 | if (d2->min > d1->min) return -1;
|
---|
4453 | return 0;
|
---|
4454 | }
|
---|
4455 |
|
---|
4456 | static int
|
---|
4457 | is_equal_mml(MinMaxLen* a, MinMaxLen* b)
|
---|
4458 | {
|
---|
4459 | return (a->min == b->min && a->max == b->max) ? 1 : 0;
|
---|
4460 | }
|
---|
4461 |
|
---|
4462 |
|
---|
4463 | static void
|
---|
4464 | set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
|
---|
4465 | {
|
---|
4466 | mml->min = min;
|
---|
4467 | mml->max = max;
|
---|
4468 | }
|
---|
4469 |
|
---|
4470 | static void
|
---|
4471 | clear_mml(MinMaxLen* mml)
|
---|
4472 | {
|
---|
4473 | mml->min = mml->max = 0;
|
---|
4474 | }
|
---|
4475 |
|
---|
4476 | static void
|
---|
4477 | copy_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
4478 | {
|
---|
4479 | to->min = from->min;
|
---|
4480 | to->max = from->max;
|
---|
4481 | }
|
---|
4482 |
|
---|
4483 | static void
|
---|
4484 | add_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
4485 | {
|
---|
4486 | to->min = distance_add(to->min, from->min);
|
---|
4487 | to->max = distance_add(to->max, from->max);
|
---|
4488 | }
|
---|
4489 |
|
---|
4490 | #if 0
|
---|
4491 | static void
|
---|
4492 | add_len_mml(MinMaxLen* to, OnigDistance len)
|
---|
4493 | {
|
---|
4494 | to->min = distance_add(to->min, len);
|
---|
4495 | to->max = distance_add(to->max, len);
|
---|
4496 | }
|
---|
4497 | #endif
|
---|
4498 |
|
---|
4499 | static void
|
---|
4500 | alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
4501 | {
|
---|
4502 | if (to->min > from->min) to->min = from->min;
|
---|
4503 | if (to->max < from->max) to->max = from->max;
|
---|
4504 | }
|
---|
4505 |
|
---|
4506 | static void
|
---|
4507 | copy_opt_env(OptEnv* to, OptEnv* from)
|
---|
4508 | {
|
---|
4509 | *to = *from;
|
---|
4510 | }
|
---|
4511 |
|
---|
4512 | static void
|
---|
4513 | clear_opt_anc_info(OptAncInfo* anc)
|
---|
4514 | {
|
---|
4515 | anc->left_anchor = 0;
|
---|
4516 | anc->right_anchor = 0;
|
---|
4517 | }
|
---|
4518 |
|
---|
4519 | static void
|
---|
4520 | copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
|
---|
4521 | {
|
---|
4522 | *to = *from;
|
---|
4523 | }
|
---|
4524 |
|
---|
4525 | static void
|
---|
4526 | concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
|
---|
4527 | OnigDistance left_len, OnigDistance right_len)
|
---|
4528 | {
|
---|
4529 | clear_opt_anc_info(to);
|
---|
4530 |
|
---|
4531 | to->left_anchor = left->left_anchor;
|
---|
4532 | if (left_len == 0) {
|
---|
4533 | to->left_anchor |= right->left_anchor;
|
---|
4534 | }
|
---|
4535 |
|
---|
4536 | to->right_anchor = right->right_anchor;
|
---|
4537 | if (right_len == 0) {
|
---|
4538 | to->right_anchor |= left->right_anchor;
|
---|
4539 | }
|
---|
4540 | else {
|
---|
4541 | to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
|
---|
4542 | }
|
---|
4543 | }
|
---|
4544 |
|
---|
4545 | static int
|
---|
4546 | is_left_anchor(int anc)
|
---|
4547 | {
|
---|
4548 | if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
|
---|
4549 | anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
|
---|
4550 | anc == ANCHOR_PREC_READ_NOT)
|
---|
4551 | return 0;
|
---|
4552 |
|
---|
4553 | return 1;
|
---|
4554 | }
|
---|
4555 |
|
---|
4556 | static int
|
---|
4557 | is_set_opt_anc_info(OptAncInfo* to, int anc)
|
---|
4558 | {
|
---|
4559 | if ((to->left_anchor & anc) != 0) return 1;
|
---|
4560 |
|
---|
4561 | return ((to->right_anchor & anc) != 0 ? 1 : 0);
|
---|
4562 | }
|
---|
4563 |
|
---|
4564 | static void
|
---|
4565 | add_opt_anc_info(OptAncInfo* to, int anc)
|
---|
4566 | {
|
---|
4567 | if (is_left_anchor(anc))
|
---|
4568 | to->left_anchor |= anc;
|
---|
4569 | else
|
---|
4570 | to->right_anchor |= anc;
|
---|
4571 | }
|
---|
4572 |
|
---|
4573 | static void
|
---|
4574 | remove_opt_anc_info(OptAncInfo* to, int anc)
|
---|
4575 | {
|
---|
4576 | if (is_left_anchor(anc))
|
---|
4577 | to->left_anchor &= ~anc;
|
---|
4578 | else
|
---|
4579 | to->right_anchor &= ~anc;
|
---|
4580 | }
|
---|
4581 |
|
---|
4582 | static void
|
---|
4583 | alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
|
---|
4584 | {
|
---|
4585 | to->left_anchor &= add->left_anchor;
|
---|
4586 | to->right_anchor &= add->right_anchor;
|
---|
4587 | }
|
---|
4588 |
|
---|
4589 | static int
|
---|
4590 | is_full_opt_exact_info(OptExactInfo* ex)
|
---|
4591 | {
|
---|
4592 | return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
|
---|
4593 | }
|
---|
4594 |
|
---|
4595 | static void
|
---|
4596 | clear_opt_exact_info(OptExactInfo* ex)
|
---|
4597 | {
|
---|
4598 | clear_mml(&ex->mmd);
|
---|
4599 | clear_opt_anc_info(&ex->anc);
|
---|
4600 | ex->reach_end = 0;
|
---|
4601 | ex->ignore_case = -1; /* unset */
|
---|
4602 | ex->len = 0;
|
---|
4603 | ex->s[0] = '\0';
|
---|
4604 | }
|
---|
4605 |
|
---|
4606 | static void
|
---|
4607 | copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
|
---|
4608 | {
|
---|
4609 | *to = *from;
|
---|
4610 | }
|
---|
4611 |
|
---|
4612 | static void
|
---|
4613 | concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
|
---|
4614 | {
|
---|
4615 | int i, j, len;
|
---|
4616 | UChar *p, *end;
|
---|
4617 | OptAncInfo tanc;
|
---|
4618 |
|
---|
4619 | if (to->ignore_case < 0)
|
---|
4620 | to->ignore_case = add->ignore_case;
|
---|
4621 | else if (to->ignore_case != add->ignore_case)
|
---|
4622 | return ; /* avoid */
|
---|
4623 |
|
---|
4624 | p = add->s;
|
---|
4625 | end = p + add->len;
|
---|
4626 | for (i = to->len; p < end; ) {
|
---|
4627 | len = enclen(enc, p, end);
|
---|
4628 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
4629 | for (j = 0; j < len && p < end; j++)
|
---|
4630 | to->s[i++] = *p++;
|
---|
4631 | }
|
---|
4632 |
|
---|
4633 | to->len = i;
|
---|
4634 | to->reach_end = (p == end ? add->reach_end : 0);
|
---|
4635 |
|
---|
4636 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
|
---|
4637 | if (! to->reach_end) tanc.right_anchor = 0;
|
---|
4638 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
4639 | }
|
---|
4640 |
|
---|
4641 | static void
|
---|
4642 | concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
|
---|
4643 | int raw ARG_UNUSED, OnigEncoding enc)
|
---|
4644 | {
|
---|
4645 | int i, j, len;
|
---|
4646 | UChar *p;
|
---|
4647 |
|
---|
4648 | for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
|
---|
4649 | len = enclen(enc, p, end);
|
---|
4650 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
4651 | for (j = 0; j < len && p < end; j++)
|
---|
4652 | to->s[i++] = *p++;
|
---|
4653 | }
|
---|
4654 |
|
---|
4655 | to->len = i;
|
---|
4656 | }
|
---|
4657 |
|
---|
4658 | static void
|
---|
4659 | alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
|
---|
4660 | {
|
---|
4661 | int i, j, len;
|
---|
4662 |
|
---|
4663 | if (add->len == 0 || to->len == 0) {
|
---|
4664 | clear_opt_exact_info(to);
|
---|
4665 | return ;
|
---|
4666 | }
|
---|
4667 |
|
---|
4668 | if (! is_equal_mml(&to->mmd, &add->mmd)) {
|
---|
4669 | clear_opt_exact_info(to);
|
---|
4670 | return ;
|
---|
4671 | }
|
---|
4672 |
|
---|
4673 | for (i = 0; i < to->len && i < add->len; ) {
|
---|
4674 | if (to->s[i] != add->s[i]) break;
|
---|
4675 | len = enclen(env->enc, to->s + i, to->s + to->len);
|
---|
4676 |
|
---|
4677 | for (j = 1; j < len; j++) {
|
---|
4678 | if (to->s[i+j] != add->s[i+j]) break;
|
---|
4679 | }
|
---|
4680 | if (j < len) break;
|
---|
4681 | i += len;
|
---|
4682 | }
|
---|
4683 |
|
---|
4684 | if (! add->reach_end || i < add->len || i < to->len) {
|
---|
4685 | to->reach_end = 0;
|
---|
4686 | }
|
---|
4687 | to->len = i;
|
---|
4688 | if (to->ignore_case < 0)
|
---|
4689 | to->ignore_case = add->ignore_case;
|
---|
4690 | else if (add->ignore_case >= 0)
|
---|
4691 | to->ignore_case |= add->ignore_case;
|
---|
4692 |
|
---|
4693 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
4694 | if (! to->reach_end) to->anc.right_anchor = 0;
|
---|
4695 | }
|
---|
4696 |
|
---|
4697 | static void
|
---|
4698 | select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
|
---|
4699 | {
|
---|
4700 | int v1, v2;
|
---|
4701 |
|
---|
4702 | v1 = now->len;
|
---|
4703 | v2 = alt->len;
|
---|
4704 |
|
---|
4705 | if (v2 == 0) {
|
---|
4706 | return ;
|
---|
4707 | }
|
---|
4708 | else if (v1 == 0) {
|
---|
4709 | copy_opt_exact_info(now, alt);
|
---|
4710 | return ;
|
---|
4711 | }
|
---|
4712 | else if (v1 <= 2 && v2 <= 2) {
|
---|
4713 | /* ByteValTable[x] is big value --> low price */
|
---|
4714 | v2 = map_position_value(enc, now->s[0]);
|
---|
4715 | v1 = map_position_value(enc, alt->s[0]);
|
---|
4716 |
|
---|
4717 | if (now->len > 1) v1 += 5;
|
---|
4718 | if (alt->len > 1) v2 += 5;
|
---|
4719 | }
|
---|
4720 |
|
---|
4721 | if (now->ignore_case <= 0) v1 *= 2;
|
---|
4722 | if (alt->ignore_case <= 0) v2 *= 2;
|
---|
4723 |
|
---|
4724 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
4725 | copy_opt_exact_info(now, alt);
|
---|
4726 | }
|
---|
4727 |
|
---|
4728 | static void
|
---|
4729 | clear_opt_map_info(OptMapInfo* map)
|
---|
4730 | {
|
---|
4731 | static const OptMapInfo clean_info = {
|
---|
4732 | {0, 0}, {0, 0}, 0,
|
---|
4733 | {
|
---|
4734 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4735 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4736 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4737 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4738 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4739 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4740 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4741 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4742 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4743 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4744 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4745 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4746 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4747 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4748 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4749 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
---|
4750 | }
|
---|
4751 | };
|
---|
4752 |
|
---|
4753 | xmemcpy(map, &clean_info, sizeof(OptMapInfo));
|
---|
4754 | }
|
---|
4755 |
|
---|
4756 | static void
|
---|
4757 | copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
|
---|
4758 | {
|
---|
4759 | *to = *from;
|
---|
4760 | }
|
---|
4761 |
|
---|
4762 | static void
|
---|
4763 | add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
|
---|
4764 | {
|
---|
4765 | if (map->map[c] == 0) {
|
---|
4766 | map->map[c] = 1;
|
---|
4767 | map->value += map_position_value(enc, c);
|
---|
4768 | }
|
---|
4769 | }
|
---|
4770 |
|
---|
4771 | static int
|
---|
4772 | add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
---|
4773 | OnigEncoding enc, OnigCaseFoldType case_fold_flag)
|
---|
4774 | {
|
---|
4775 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
4776 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
4777 | int i, n;
|
---|
4778 |
|
---|
4779 | add_char_opt_map_info(map, p[0], enc);
|
---|
4780 |
|
---|
4781 | case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
|
---|
4782 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
|
---|
4783 | if (n < 0) return n;
|
---|
4784 |
|
---|
4785 | for (i = 0; i < n; i++) {
|
---|
4786 | ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
|
---|
4787 | add_char_opt_map_info(map, buf[0], enc);
|
---|
4788 | }
|
---|
4789 |
|
---|
4790 | return 0;
|
---|
4791 | }
|
---|
4792 |
|
---|
4793 | static void
|
---|
4794 | select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
|
---|
4795 | {
|
---|
4796 | const int z = 1<<15; /* 32768: something big value */
|
---|
4797 |
|
---|
4798 | int v1, v2;
|
---|
4799 |
|
---|
4800 | if (alt->value == 0) return ;
|
---|
4801 | if (now->value == 0) {
|
---|
4802 | copy_opt_map_info(now, alt);
|
---|
4803 | return ;
|
---|
4804 | }
|
---|
4805 |
|
---|
4806 | v1 = z / now->value;
|
---|
4807 | v2 = z / alt->value;
|
---|
4808 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
4809 | copy_opt_map_info(now, alt);
|
---|
4810 | }
|
---|
4811 |
|
---|
4812 | static int
|
---|
4813 | comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
|
---|
4814 | {
|
---|
4815 | #define COMP_EM_BASE 20
|
---|
4816 | int ve, vm;
|
---|
4817 |
|
---|
4818 | if (m->value <= 0) return -1;
|
---|
4819 |
|
---|
4820 | ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
|
---|
4821 | vm = COMP_EM_BASE * 5 * 2 / m->value;
|
---|
4822 | return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
|
---|
4823 | }
|
---|
4824 |
|
---|
4825 | static void
|
---|
4826 | alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
|
---|
4827 | {
|
---|
4828 | int i, val;
|
---|
4829 |
|
---|
4830 | /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
|
---|
4831 | if (to->value == 0) return ;
|
---|
4832 | if (add->value == 0 || to->mmd.max < add->mmd.min) {
|
---|
4833 | clear_opt_map_info(to);
|
---|
4834 | return ;
|
---|
4835 | }
|
---|
4836 |
|
---|
4837 | alt_merge_mml(&to->mmd, &add->mmd);
|
---|
4838 |
|
---|
4839 | val = 0;
|
---|
4840 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
4841 | if (add->map[i])
|
---|
4842 | to->map[i] = 1;
|
---|
4843 |
|
---|
4844 | if (to->map[i])
|
---|
4845 | val += map_position_value(enc, i);
|
---|
4846 | }
|
---|
4847 | to->value = val;
|
---|
4848 |
|
---|
4849 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
4850 | }
|
---|
4851 |
|
---|
4852 | static void
|
---|
4853 | set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
|
---|
4854 | {
|
---|
4855 | copy_mml(&(opt->exb.mmd), mmd);
|
---|
4856 | copy_mml(&(opt->expr.mmd), mmd);
|
---|
4857 | copy_mml(&(opt->map.mmd), mmd);
|
---|
4858 | }
|
---|
4859 |
|
---|
4860 | static void
|
---|
4861 | clear_node_opt_info(NodeOptInfo* opt)
|
---|
4862 | {
|
---|
4863 | clear_mml(&opt->len);
|
---|
4864 | clear_opt_anc_info(&opt->anc);
|
---|
4865 | clear_opt_exact_info(&opt->exb);
|
---|
4866 | clear_opt_exact_info(&opt->exm);
|
---|
4867 | clear_opt_exact_info(&opt->expr);
|
---|
4868 | clear_opt_map_info(&opt->map);
|
---|
4869 | }
|
---|
4870 |
|
---|
4871 | static void
|
---|
4872 | copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
|
---|
4873 | {
|
---|
4874 | *to = *from;
|
---|
4875 | }
|
---|
4876 |
|
---|
4877 | static void
|
---|
4878 | concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
|
---|
4879 | {
|
---|
4880 | int exb_reach, exm_reach;
|
---|
4881 | OptAncInfo tanc;
|
---|
4882 |
|
---|
4883 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
|
---|
4884 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
4885 |
|
---|
4886 | if (add->exb.len > 0 && to->len.max == 0) {
|
---|
4887 | concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
|
---|
4888 | to->len.max, add->len.max);
|
---|
4889 | copy_opt_anc_info(&add->exb.anc, &tanc);
|
---|
4890 | }
|
---|
4891 |
|
---|
4892 | if (add->map.value > 0 && to->len.max == 0) {
|
---|
4893 | if (add->map.mmd.max == 0)
|
---|
4894 | add->map.anc.left_anchor |= to->anc.left_anchor;
|
---|
4895 | }
|
---|
4896 |
|
---|
4897 | exb_reach = to->exb.reach_end;
|
---|
4898 | exm_reach = to->exm.reach_end;
|
---|
4899 |
|
---|
4900 | if (add->len.max != 0)
|
---|
4901 | to->exb.reach_end = to->exm.reach_end = 0;
|
---|
4902 |
|
---|
4903 | if (add->exb.len > 0) {
|
---|
4904 | if (exb_reach) {
|
---|
4905 | concat_opt_exact_info(&to->exb, &add->exb, enc);
|
---|
4906 | clear_opt_exact_info(&add->exb);
|
---|
4907 | }
|
---|
4908 | else if (exm_reach) {
|
---|
4909 | concat_opt_exact_info(&to->exm, &add->exb, enc);
|
---|
4910 | clear_opt_exact_info(&add->exb);
|
---|
4911 | }
|
---|
4912 | }
|
---|
4913 | select_opt_exact_info(enc, &to->exm, &add->exb);
|
---|
4914 | select_opt_exact_info(enc, &to->exm, &add->exm);
|
---|
4915 |
|
---|
4916 | if (to->expr.len > 0) {
|
---|
4917 | if (add->len.max > 0) {
|
---|
4918 | if (to->expr.len > (int )add->len.max)
|
---|
4919 | to->expr.len = (int )add->len.max;
|
---|
4920 |
|
---|
4921 | if (to->expr.mmd.max == 0)
|
---|
4922 | select_opt_exact_info(enc, &to->exb, &to->expr);
|
---|
4923 | else
|
---|
4924 | select_opt_exact_info(enc, &to->exm, &to->expr);
|
---|
4925 | }
|
---|
4926 | }
|
---|
4927 | else if (add->expr.len > 0) {
|
---|
4928 | copy_opt_exact_info(&to->expr, &add->expr);
|
---|
4929 | }
|
---|
4930 |
|
---|
4931 | select_opt_map_info(&to->map, &add->map);
|
---|
4932 |
|
---|
4933 | add_mml(&to->len, &add->len);
|
---|
4934 | }
|
---|
4935 |
|
---|
4936 | static void
|
---|
4937 | alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
|
---|
4938 | {
|
---|
4939 | alt_merge_opt_anc_info (&to->anc, &add->anc);
|
---|
4940 | alt_merge_opt_exact_info(&to->exb, &add->exb, env);
|
---|
4941 | alt_merge_opt_exact_info(&to->exm, &add->exm, env);
|
---|
4942 | alt_merge_opt_exact_info(&to->expr, &add->expr, env);
|
---|
4943 | alt_merge_opt_map_info(env->enc, &to->map, &add->map);
|
---|
4944 |
|
---|
4945 | alt_merge_mml(&to->len, &add->len);
|
---|
4946 | }
|
---|
4947 |
|
---|
4948 |
|
---|
4949 | #define MAX_NODE_OPT_INFO_REF_COUNT 5
|
---|
4950 |
|
---|
4951 | static int
|
---|
4952 | optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
---|
4953 | {
|
---|
4954 | int type;
|
---|
4955 | int r = 0;
|
---|
4956 |
|
---|
4957 | clear_node_opt_info(opt);
|
---|
4958 | set_bound_node_opt_info(opt, &env->mmd);
|
---|
4959 |
|
---|
4960 | type = NTYPE(node);
|
---|
4961 | switch (type) {
|
---|
4962 | case NT_LIST:
|
---|
4963 | {
|
---|
4964 | OptEnv nenv;
|
---|
4965 | NodeOptInfo nopt;
|
---|
4966 | Node* nd = node;
|
---|
4967 |
|
---|
4968 | copy_opt_env(&nenv, env);
|
---|
4969 | do {
|
---|
4970 | r = optimize_node_left(NCAR(nd), &nopt, &nenv);
|
---|
4971 | if (r == 0) {
|
---|
4972 | add_mml(&nenv.mmd, &nopt.len);
|
---|
4973 | concat_left_node_opt_info(env->enc, opt, &nopt);
|
---|
4974 | }
|
---|
4975 | } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
4976 | }
|
---|
4977 | break;
|
---|
4978 |
|
---|
4979 | case NT_ALT:
|
---|
4980 | {
|
---|
4981 | NodeOptInfo nopt;
|
---|
4982 | Node* nd = node;
|
---|
4983 |
|
---|
4984 | do {
|
---|
4985 | r = optimize_node_left(NCAR(nd), &nopt, env);
|
---|
4986 | if (r == 0) {
|
---|
4987 | if (nd == node) copy_node_opt_info(opt, &nopt);
|
---|
4988 | else alt_merge_node_opt_info(opt, &nopt, env);
|
---|
4989 | }
|
---|
4990 | } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
4991 | }
|
---|
4992 | break;
|
---|
4993 |
|
---|
4994 | case NT_STR:
|
---|
4995 | {
|
---|
4996 | StrNode* sn = NSTR(node);
|
---|
4997 | OnigDistance slen = sn->end - sn->s;
|
---|
4998 | int is_raw = NSTRING_IS_RAW(node);
|
---|
4999 |
|
---|
5000 | if (! NSTRING_IS_AMBIG(node)) {
|
---|
5001 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
5002 | is_raw, env->enc);
|
---|
5003 | opt->exb.ignore_case = 0;
|
---|
5004 | if (slen > 0) {
|
---|
5005 | add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
|
---|
5006 | }
|
---|
5007 | set_mml(&opt->len, slen, slen);
|
---|
5008 | }
|
---|
5009 | else {
|
---|
5010 | OnigDistance max;
|
---|
5011 |
|
---|
5012 | if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
|
---|
5013 | int n = onigenc_strlen(env->enc, sn->s, sn->end);
|
---|
5014 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
|
---|
5015 | }
|
---|
5016 | else {
|
---|
5017 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
5018 | is_raw, env->enc);
|
---|
5019 | opt->exb.ignore_case = 1;
|
---|
5020 |
|
---|
5021 | if (slen > 0) {
|
---|
5022 | r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
|
---|
5023 | env->enc, env->case_fold_flag);
|
---|
5024 | if (r != 0) break;
|
---|
5025 | }
|
---|
5026 |
|
---|
5027 | max = slen;
|
---|
5028 | }
|
---|
5029 |
|
---|
5030 | set_mml(&opt->len, slen, max);
|
---|
5031 | }
|
---|
5032 |
|
---|
5033 | if ((OnigDistance )opt->exb.len == slen)
|
---|
5034 | opt->exb.reach_end = 1;
|
---|
5035 | }
|
---|
5036 | break;
|
---|
5037 |
|
---|
5038 | case NT_CCLASS:
|
---|
5039 | {
|
---|
5040 | int i, z;
|
---|
5041 | CClassNode* cc = NCCLASS(node);
|
---|
5042 |
|
---|
5043 | /* no need to check ignore case. (set in setup_tree()) */
|
---|
5044 |
|
---|
5045 | if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
|
---|
5046 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
5047 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
5048 |
|
---|
5049 | set_mml(&opt->len, min, max);
|
---|
5050 | }
|
---|
5051 | else {
|
---|
5052 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
5053 | z = BITSET_AT(cc->bs, i);
|
---|
5054 | if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
|
---|
5055 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
5056 | }
|
---|
5057 | }
|
---|
5058 | set_mml(&opt->len, 1, 1);
|
---|
5059 | }
|
---|
5060 | }
|
---|
5061 | break;
|
---|
5062 |
|
---|
5063 | case NT_CTYPE:
|
---|
5064 | {
|
---|
5065 | int i, min, max;
|
---|
5066 | int maxcode;
|
---|
5067 |
|
---|
5068 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
5069 |
|
---|
5070 | if (max == 1) {
|
---|
5071 | min = 1;
|
---|
5072 |
|
---|
5073 | maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
|
---|
5074 | switch (NCTYPE(node)->ctype) {
|
---|
5075 | case ONIGENC_CTYPE_WORD:
|
---|
5076 | if (NCTYPE(node)->not != 0) {
|
---|
5077 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
5078 | if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
|
---|
5079 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
5080 | }
|
---|
5081 | }
|
---|
5082 | }
|
---|
5083 | else {
|
---|
5084 | for (i = 0; i < maxcode; i++) {
|
---|
5085 | if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
|
---|
5086 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
5087 | }
|
---|
5088 | }
|
---|
5089 | }
|
---|
5090 | break;
|
---|
5091 | }
|
---|
5092 | }
|
---|
5093 | else {
|
---|
5094 | min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
5095 | }
|
---|
5096 | set_mml(&opt->len, min, max);
|
---|
5097 | }
|
---|
5098 | break;
|
---|
5099 |
|
---|
5100 | case NT_CANY:
|
---|
5101 | {
|
---|
5102 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
5103 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
5104 | set_mml(&opt->len, min, max);
|
---|
5105 | }
|
---|
5106 | break;
|
---|
5107 |
|
---|
5108 | case NT_ANCHOR:
|
---|
5109 | switch (NANCHOR(node)->type) {
|
---|
5110 | case ANCHOR_BEGIN_BUF:
|
---|
5111 | case ANCHOR_BEGIN_POSITION:
|
---|
5112 | case ANCHOR_BEGIN_LINE:
|
---|
5113 | case ANCHOR_END_BUF:
|
---|
5114 | case ANCHOR_SEMI_END_BUF:
|
---|
5115 | case ANCHOR_END_LINE:
|
---|
5116 | case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
|
---|
5117 | case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
|
---|
5118 | add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
|
---|
5119 | break;
|
---|
5120 |
|
---|
5121 | case ANCHOR_PREC_READ:
|
---|
5122 | {
|
---|
5123 | NodeOptInfo nopt;
|
---|
5124 |
|
---|
5125 | r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
|
---|
5126 | if (r == 0) {
|
---|
5127 | if (nopt.exb.len > 0)
|
---|
5128 | copy_opt_exact_info(&opt->expr, &nopt.exb);
|
---|
5129 | else if (nopt.exm.len > 0)
|
---|
5130 | copy_opt_exact_info(&opt->expr, &nopt.exm);
|
---|
5131 |
|
---|
5132 | opt->expr.reach_end = 0;
|
---|
5133 |
|
---|
5134 | if (nopt.map.value > 0)
|
---|
5135 | copy_opt_map_info(&opt->map, &nopt.map);
|
---|
5136 | }
|
---|
5137 | }
|
---|
5138 | break;
|
---|
5139 |
|
---|
5140 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
5141 | break;
|
---|
5142 | }
|
---|
5143 | break;
|
---|
5144 |
|
---|
5145 | case NT_BREF:
|
---|
5146 | {
|
---|
5147 | int i;
|
---|
5148 | int* backs;
|
---|
5149 | OnigDistance min, max, tmin, tmax;
|
---|
5150 | Node** nodes = SCANENV_MEM_NODES(env->scan_env);
|
---|
5151 | BRefNode* br = NBREF(node);
|
---|
5152 |
|
---|
5153 | if (br->state & NST_RECURSION) {
|
---|
5154 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
5155 | break;
|
---|
5156 | }
|
---|
5157 | backs = BACKREFS_P(br);
|
---|
5158 | r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
|
---|
5159 | if (r != 0) break;
|
---|
5160 | r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
|
---|
5161 | if (r != 0) break;
|
---|
5162 | for (i = 1; i < br->back_num; i++) {
|
---|
5163 | r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
|
---|
5164 | if (r != 0) break;
|
---|
5165 | r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
|
---|
5166 | if (r != 0) break;
|
---|
5167 | if (min > tmin) min = tmin;
|
---|
5168 | if (max < tmax) max = tmax;
|
---|
5169 | }
|
---|
5170 | if (r == 0) set_mml(&opt->len, min, max);
|
---|
5171 | }
|
---|
5172 | break;
|
---|
5173 |
|
---|
5174 | #ifdef USE_SUBEXP_CALL
|
---|
5175 | case NT_CALL:
|
---|
5176 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
5177 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
5178 | else {
|
---|
5179 | OnigOptionType save = env->options;
|
---|
5180 | env->options = NENCLOSE(NCALL(node)->target)->option;
|
---|
5181 | r = optimize_node_left(NCALL(node)->target, opt, env);
|
---|
5182 | env->options = save;
|
---|
5183 | }
|
---|
5184 | break;
|
---|
5185 | #endif
|
---|
5186 |
|
---|
5187 | case NT_QTFR:
|
---|
5188 | {
|
---|
5189 | int i;
|
---|
5190 | OnigDistance min, max;
|
---|
5191 | NodeOptInfo nopt;
|
---|
5192 | QtfrNode* qn = NQTFR(node);
|
---|
5193 |
|
---|
5194 | r = optimize_node_left(qn->target, &nopt, env);
|
---|
5195 | if (r) break;
|
---|
5196 |
|
---|
5197 | if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
|
---|
5198 | if (env->mmd.max == 0 &&
|
---|
5199 | NTYPE(qn->target) == NT_CANY && qn->greedy) {
|
---|
5200 | if (IS_MULTILINE(env->options))
|
---|
5201 | /* implicit anchor: /.*a/ ==> /\A.*a/ */
|
---|
5202 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
|
---|
5203 | else
|
---|
5204 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
|
---|
5205 | }
|
---|
5206 | }
|
---|
5207 | else {
|
---|
5208 | if (qn->lower > 0) {
|
---|
5209 | copy_node_opt_info(opt, &nopt);
|
---|
5210 | if (nopt.exb.len > 0) {
|
---|
5211 | if (nopt.exb.reach_end) {
|
---|
5212 | for (i = 2; i <= qn->lower &&
|
---|
5213 | ! is_full_opt_exact_info(&opt->exb); i++) {
|
---|
5214 | concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
|
---|
5215 | }
|
---|
5216 | if (i < qn->lower) {
|
---|
5217 | opt->exb.reach_end = 0;
|
---|
5218 | }
|
---|
5219 | }
|
---|
5220 | }
|
---|
5221 |
|
---|
5222 | if (qn->lower != qn->upper) {
|
---|
5223 | opt->exb.reach_end = 0;
|
---|
5224 | opt->exm.reach_end = 0;
|
---|
5225 | }
|
---|
5226 | if (qn->lower > 1)
|
---|
5227 | opt->exm.reach_end = 0;
|
---|
5228 | }
|
---|
5229 | }
|
---|
5230 |
|
---|
5231 | min = distance_multiply(nopt.len.min, qn->lower);
|
---|
5232 | if (IS_REPEAT_INFINITE(qn->upper))
|
---|
5233 | max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
|
---|
5234 | else
|
---|
5235 | max = distance_multiply(nopt.len.max, qn->upper);
|
---|
5236 |
|
---|
5237 | set_mml(&opt->len, min, max);
|
---|
5238 | }
|
---|
5239 | break;
|
---|
5240 |
|
---|
5241 | case NT_ENCLOSE:
|
---|
5242 | {
|
---|
5243 | EncloseNode* en = NENCLOSE(node);
|
---|
5244 |
|
---|
5245 | switch (en->type) {
|
---|
5246 | case ENCLOSE_OPTION:
|
---|
5247 | {
|
---|
5248 | OnigOptionType save = env->options;
|
---|
5249 |
|
---|
5250 | env->options = en->option;
|
---|
5251 | r = optimize_node_left(en->target, opt, env);
|
---|
5252 | env->options = save;
|
---|
5253 | }
|
---|
5254 | break;
|
---|
5255 |
|
---|
5256 | case ENCLOSE_MEMORY:
|
---|
5257 | #ifdef USE_SUBEXP_CALL
|
---|
5258 | en->opt_count++;
|
---|
5259 | if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
|
---|
5260 | OnigDistance min, max;
|
---|
5261 |
|
---|
5262 | min = 0;
|
---|
5263 | max = ONIG_INFINITE_DISTANCE;
|
---|
5264 | if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
|
---|
5265 | if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
|
---|
5266 | set_mml(&opt->len, min, max);
|
---|
5267 | }
|
---|
5268 | else
|
---|
5269 | #endif
|
---|
5270 | {
|
---|
5271 | r = optimize_node_left(en->target, opt, env);
|
---|
5272 |
|
---|
5273 | if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
|
---|
5274 | if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
|
---|
5275 | remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
|
---|
5276 | }
|
---|
5277 | }
|
---|
5278 | break;
|
---|
5279 |
|
---|
5280 | case ENCLOSE_STOP_BACKTRACK:
|
---|
5281 | case ENCLOSE_CONDITION:
|
---|
5282 | r = optimize_node_left(en->target, opt, env);
|
---|
5283 | break;
|
---|
5284 |
|
---|
5285 | case ENCLOSE_ABSENT:
|
---|
5286 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
5287 | break;
|
---|
5288 | }
|
---|
5289 | }
|
---|
5290 | break;
|
---|
5291 |
|
---|
5292 | default:
|
---|
5293 | #ifdef ONIG_DEBUG
|
---|
5294 | fprintf(stderr, "optimize_node_left: undefined node type %d\n",
|
---|
5295 | NTYPE(node));
|
---|
5296 | #endif
|
---|
5297 | r = ONIGERR_TYPE_BUG;
|
---|
5298 | break;
|
---|
5299 | }
|
---|
5300 |
|
---|
5301 | return r;
|
---|
5302 | }
|
---|
5303 |
|
---|
5304 | static int
|
---|
5305 | set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
|
---|
5306 | {
|
---|
5307 | int r;
|
---|
5308 | int allow_reverse;
|
---|
5309 |
|
---|
5310 | if (e->len == 0) return 0;
|
---|
5311 |
|
---|
5312 | reg->exact = (UChar* )xmalloc(e->len);
|
---|
5313 | CHECK_NULL_RETURN_MEMERR(reg->exact);
|
---|
5314 | xmemcpy(reg->exact, e->s, e->len);
|
---|
5315 | reg->exact_end = reg->exact + e->len;
|
---|
5316 |
|
---|
5317 | allow_reverse =
|
---|
5318 | ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
|
---|
5319 |
|
---|
5320 | if (e->ignore_case > 0) {
|
---|
5321 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
5322 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
5323 | reg->map, &(reg->int_map), 1);
|
---|
5324 | if (r == 0) {
|
---|
5325 | reg->optimize = (allow_reverse != 0
|
---|
5326 | ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
|
---|
5327 | }
|
---|
5328 | else {
|
---|
5329 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
5330 | }
|
---|
5331 | }
|
---|
5332 | else {
|
---|
5333 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
5334 | }
|
---|
5335 | }
|
---|
5336 | else {
|
---|
5337 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
5338 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
5339 | reg->map, &(reg->int_map), 0);
|
---|
5340 | if (r == 0) {
|
---|
5341 | reg->optimize = (allow_reverse != 0
|
---|
5342 | ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
|
---|
5343 | }
|
---|
5344 | else {
|
---|
5345 | reg->optimize = ONIG_OPTIMIZE_EXACT;
|
---|
5346 | }
|
---|
5347 | }
|
---|
5348 | else {
|
---|
5349 | reg->optimize = ONIG_OPTIMIZE_EXACT;
|
---|
5350 | }
|
---|
5351 | }
|
---|
5352 |
|
---|
5353 | reg->dmin = e->mmd.min;
|
---|
5354 | reg->dmax = e->mmd.max;
|
---|
5355 |
|
---|
5356 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
5357 | reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
|
---|
5358 | }
|
---|
5359 |
|
---|
5360 | return 0;
|
---|
5361 | }
|
---|
5362 |
|
---|
5363 | static void
|
---|
5364 | set_optimize_map_info(regex_t* reg, OptMapInfo* m)
|
---|
5365 | {
|
---|
5366 | int i;
|
---|
5367 |
|
---|
5368 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
5369 | reg->map[i] = m->map[i];
|
---|
5370 |
|
---|
5371 | reg->optimize = ONIG_OPTIMIZE_MAP;
|
---|
5372 | reg->dmin = m->mmd.min;
|
---|
5373 | reg->dmax = m->mmd.max;
|
---|
5374 |
|
---|
5375 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
5376 | reg->threshold_len = (int )(reg->dmin + 1);
|
---|
5377 | }
|
---|
5378 | }
|
---|
5379 |
|
---|
5380 | static void
|
---|
5381 | set_sub_anchor(regex_t* reg, OptAncInfo* anc)
|
---|
5382 | {
|
---|
5383 | reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
|
---|
5384 | reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
|
---|
5385 | }
|
---|
5386 |
|
---|
5387 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
5388 | static void print_optimize_info(FILE* f, regex_t* reg);
|
---|
5389 | #endif
|
---|
5390 |
|
---|
5391 | static int
|
---|
5392 | set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
|
---|
5393 | {
|
---|
5394 |
|
---|
5395 | int r;
|
---|
5396 | NodeOptInfo opt;
|
---|
5397 | OptEnv env;
|
---|
5398 |
|
---|
5399 | env.enc = reg->enc;
|
---|
5400 | env.options = reg->options;
|
---|
5401 | env.case_fold_flag = reg->case_fold_flag;
|
---|
5402 | env.scan_env = scan_env;
|
---|
5403 | clear_mml(&env.mmd);
|
---|
5404 |
|
---|
5405 | r = optimize_node_left(node, &opt, &env);
|
---|
5406 | if (r) return r;
|
---|
5407 |
|
---|
5408 | reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
|
---|
5409 | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
|
---|
5410 | ANCHOR_LOOK_BEHIND);
|
---|
5411 |
|
---|
5412 | if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
|
---|
5413 | reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
|
---|
5414 |
|
---|
5415 | reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
|
---|
5416 | ANCHOR_PREC_READ_NOT);
|
---|
5417 |
|
---|
5418 | if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
|
---|
5419 | reg->anchor_dmin = opt.len.min;
|
---|
5420 | reg->anchor_dmax = opt.len.max;
|
---|
5421 | }
|
---|
5422 |
|
---|
5423 | if (opt.exb.len > 0 || opt.exm.len > 0) {
|
---|
5424 | select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
|
---|
5425 | if (opt.map.value > 0 &&
|
---|
5426 | comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
|
---|
5427 | goto set_map;
|
---|
5428 | }
|
---|
5429 | else {
|
---|
5430 | r = set_optimize_exact_info(reg, &opt.exb);
|
---|
5431 | set_sub_anchor(reg, &opt.exb.anc);
|
---|
5432 | }
|
---|
5433 | }
|
---|
5434 | else if (opt.map.value > 0) {
|
---|
5435 | set_map:
|
---|
5436 | set_optimize_map_info(reg, &opt.map);
|
---|
5437 | set_sub_anchor(reg, &opt.map.anc);
|
---|
5438 | }
|
---|
5439 | else {
|
---|
5440 | reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
|
---|
5441 | if (opt.len.max == 0)
|
---|
5442 | reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
|
---|
5443 | }
|
---|
5444 |
|
---|
5445 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
5446 | print_optimize_info(stderr, reg);
|
---|
5447 | #endif
|
---|
5448 | return r;
|
---|
5449 | }
|
---|
5450 |
|
---|
5451 | static void
|
---|
5452 | clear_optimize_info(regex_t* reg)
|
---|
5453 | {
|
---|
5454 | reg->optimize = ONIG_OPTIMIZE_NONE;
|
---|
5455 | reg->anchor = 0;
|
---|
5456 | reg->anchor_dmin = 0;
|
---|
5457 | reg->anchor_dmax = 0;
|
---|
5458 | reg->sub_anchor = 0;
|
---|
5459 | reg->exact_end = (UChar* )NULL;
|
---|
5460 | reg->threshold_len = 0;
|
---|
5461 | if (IS_NOT_NULL(reg->exact)) {
|
---|
5462 | xfree(reg->exact);
|
---|
5463 | reg->exact = (UChar* )NULL;
|
---|
5464 | }
|
---|
5465 | }
|
---|
5466 |
|
---|
5467 | #ifdef ONIG_DEBUG
|
---|
5468 |
|
---|
5469 | static void print_enc_string(FILE* fp, OnigEncoding enc,
|
---|
5470 | const UChar *s, const UChar *end)
|
---|
5471 | {
|
---|
5472 | fprintf(fp, "\nPATTERN: /");
|
---|
5473 |
|
---|
5474 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
5475 | const UChar *p;
|
---|
5476 | OnigCodePoint code;
|
---|
5477 |
|
---|
5478 | p = s;
|
---|
5479 | while (p < end) {
|
---|
5480 | code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
---|
5481 | if (code >= 0x80) {
|
---|
5482 | fprintf(fp, " 0x%04x ", (int )code);
|
---|
5483 | }
|
---|
5484 | else {
|
---|
5485 | fputc((int )code, fp);
|
---|
5486 | }
|
---|
5487 |
|
---|
5488 | p += enclen(enc, p, end);
|
---|
5489 | }
|
---|
5490 | }
|
---|
5491 | else {
|
---|
5492 | while (s < end) {
|
---|
5493 | fputc((int )*s, fp);
|
---|
5494 | s++;
|
---|
5495 | }
|
---|
5496 | }
|
---|
5497 |
|
---|
5498 | fprintf(fp, "/ (%s)\n", enc->name);
|
---|
5499 | }
|
---|
5500 | #endif /* ONIG_DEBUG */
|
---|
5501 |
|
---|
5502 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
5503 | static void
|
---|
5504 | print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
|
---|
5505 | {
|
---|
5506 | if (a == ONIG_INFINITE_DISTANCE)
|
---|
5507 | fputs("inf", f);
|
---|
5508 | else
|
---|
5509 | fprintf(f, "(%"PRIuPTR")", a);
|
---|
5510 |
|
---|
5511 | fputs("-", f);
|
---|
5512 |
|
---|
5513 | if (b == ONIG_INFINITE_DISTANCE)
|
---|
5514 | fputs("inf", f);
|
---|
5515 | else
|
---|
5516 | fprintf(f, "(%"PRIuPTR")", b);
|
---|
5517 | }
|
---|
5518 |
|
---|
5519 | static void
|
---|
5520 | print_anchor(FILE* f, int anchor)
|
---|
5521 | {
|
---|
5522 | int q = 0;
|
---|
5523 |
|
---|
5524 | fprintf(f, "[");
|
---|
5525 |
|
---|
5526 | if (anchor & ANCHOR_BEGIN_BUF) {
|
---|
5527 | fprintf(f, "begin-buf");
|
---|
5528 | q = 1;
|
---|
5529 | }
|
---|
5530 | if (anchor & ANCHOR_BEGIN_LINE) {
|
---|
5531 | if (q) fprintf(f, ", ");
|
---|
5532 | q = 1;
|
---|
5533 | fprintf(f, "begin-line");
|
---|
5534 | }
|
---|
5535 | if (anchor & ANCHOR_BEGIN_POSITION) {
|
---|
5536 | if (q) fprintf(f, ", ");
|
---|
5537 | q = 1;
|
---|
5538 | fprintf(f, "begin-pos");
|
---|
5539 | }
|
---|
5540 | if (anchor & ANCHOR_END_BUF) {
|
---|
5541 | if (q) fprintf(f, ", ");
|
---|
5542 | q = 1;
|
---|
5543 | fprintf(f, "end-buf");
|
---|
5544 | }
|
---|
5545 | if (anchor & ANCHOR_SEMI_END_BUF) {
|
---|
5546 | if (q) fprintf(f, ", ");
|
---|
5547 | q = 1;
|
---|
5548 | fprintf(f, "semi-end-buf");
|
---|
5549 | }
|
---|
5550 | if (anchor & ANCHOR_END_LINE) {
|
---|
5551 | if (q) fprintf(f, ", ");
|
---|
5552 | q = 1;
|
---|
5553 | fprintf(f, "end-line");
|
---|
5554 | }
|
---|
5555 | if (anchor & ANCHOR_ANYCHAR_STAR) {
|
---|
5556 | if (q) fprintf(f, ", ");
|
---|
5557 | q = 1;
|
---|
5558 | fprintf(f, "anychar-star");
|
---|
5559 | }
|
---|
5560 | if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
|
---|
5561 | if (q) fprintf(f, ", ");
|
---|
5562 | fprintf(f, "anychar-star-ml");
|
---|
5563 | }
|
---|
5564 |
|
---|
5565 | fprintf(f, "]");
|
---|
5566 | }
|
---|
5567 |
|
---|
5568 | static void
|
---|
5569 | print_optimize_info(FILE* f, regex_t* reg)
|
---|
5570 | {
|
---|
5571 | static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
|
---|
5572 | "EXACT_IC", "MAP",
|
---|
5573 | "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
|
---|
5574 |
|
---|
5575 | fprintf(f, "optimize: %s\n", on[reg->optimize]);
|
---|
5576 | fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
|
---|
5577 | if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
|
---|
5578 | print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
|
---|
5579 | fprintf(f, "\n");
|
---|
5580 |
|
---|
5581 | if (reg->optimize) {
|
---|
5582 | fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
|
---|
5583 | fprintf(f, "\n");
|
---|
5584 | }
|
---|
5585 | fprintf(f, "\n");
|
---|
5586 |
|
---|
5587 | if (reg->exact) {
|
---|
5588 | UChar *p;
|
---|
5589 | fprintf(f, "exact: [");
|
---|
5590 | for (p = reg->exact; p < reg->exact_end; p++) {
|
---|
5591 | fputc(*p, f);
|
---|
5592 | }
|
---|
5593 | fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
|
---|
5594 | }
|
---|
5595 | else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
|
---|
5596 | int c, i, n = 0;
|
---|
5597 |
|
---|
5598 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
5599 | if (reg->map[i]) n++;
|
---|
5600 |
|
---|
5601 | fprintf(f, "map: n=%d\n", n);
|
---|
5602 | if (n > 0) {
|
---|
5603 | c = 0;
|
---|
5604 | fputc('[', f);
|
---|
5605 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
5606 | if (reg->map[i] != 0) {
|
---|
5607 | if (c > 0) fputs(", ", f);
|
---|
5608 | c++;
|
---|
5609 | if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
|
---|
5610 | ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
|
---|
5611 | fputc(i, f);
|
---|
5612 | else
|
---|
5613 | fprintf(f, "%d", i);
|
---|
5614 | }
|
---|
5615 | }
|
---|
5616 | fprintf(f, "]\n");
|
---|
5617 | }
|
---|
5618 | }
|
---|
5619 | }
|
---|
5620 | #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
|
---|
5621 |
|
---|
5622 |
|
---|
5623 | extern void
|
---|
5624 | onig_free_body(regex_t* reg)
|
---|
5625 | {
|
---|
5626 | if (IS_NOT_NULL(reg)) {
|
---|
5627 | if (IS_NOT_NULL(reg->p)) xfree(reg->p);
|
---|
5628 | if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
|
---|
5629 | if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
|
---|
5630 | if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
|
---|
5631 | if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
|
---|
5632 | if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
|
---|
5633 |
|
---|
5634 | #ifdef USE_NAMED_GROUP
|
---|
5635 | onig_names_free(reg);
|
---|
5636 | #endif
|
---|
5637 | }
|
---|
5638 | }
|
---|
5639 |
|
---|
5640 | extern void
|
---|
5641 | onig_free(regex_t* reg)
|
---|
5642 | {
|
---|
5643 | if (IS_NOT_NULL(reg)) {
|
---|
5644 | onig_free_body(reg);
|
---|
5645 | xfree(reg);
|
---|
5646 | }
|
---|
5647 | }
|
---|
5648 |
|
---|
5649 | #ifdef RUBY
|
---|
5650 | size_t
|
---|
5651 | onig_memsize(const regex_t *reg)
|
---|
5652 | {
|
---|
5653 | size_t size = sizeof(regex_t);
|
---|
5654 | if (IS_NULL(reg)) return 0;
|
---|
5655 | if (IS_NOT_NULL(reg->p)) size += reg->alloc;
|
---|
5656 | if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
|
---|
5657 | if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
5658 | if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
5659 | if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
|
---|
5660 | if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
|
---|
5661 |
|
---|
5662 | return size;
|
---|
5663 | }
|
---|
5664 |
|
---|
5665 | size_t
|
---|
5666 | onig_region_memsize(const OnigRegion *regs)
|
---|
5667 | {
|
---|
5668 | size_t size = sizeof(*regs);
|
---|
5669 | if (IS_NULL(regs)) return 0;
|
---|
5670 | size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
|
---|
5671 | return size;
|
---|
5672 | }
|
---|
5673 | #endif
|
---|
5674 |
|
---|
5675 | #define REGEX_TRANSFER(to,from) do {\
|
---|
5676 | onig_free_body(to);\
|
---|
5677 | xmemcpy(to, from, sizeof(regex_t));\
|
---|
5678 | xfree(from);\
|
---|
5679 | } while (0)
|
---|
5680 |
|
---|
5681 | #if 0
|
---|
5682 | extern void
|
---|
5683 | onig_transfer(regex_t* to, regex_t* from)
|
---|
5684 | {
|
---|
5685 | REGEX_TRANSFER(to, from);
|
---|
5686 | }
|
---|
5687 | #endif
|
---|
5688 |
|
---|
5689 | #ifdef ONIG_DEBUG_COMPILE
|
---|
5690 | static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
|
---|
5691 | #endif
|
---|
5692 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
5693 | static void print_tree(FILE* f, Node* node);
|
---|
5694 | #endif
|
---|
5695 |
|
---|
5696 | #ifdef RUBY
|
---|
5697 | extern int
|
---|
5698 | onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
5699 | OnigErrorInfo* einfo)
|
---|
5700 | {
|
---|
5701 | return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
|
---|
5702 | }
|
---|
5703 | #endif
|
---|
5704 |
|
---|
5705 | #ifdef RUBY
|
---|
5706 | extern int
|
---|
5707 | onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
5708 | OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
|
---|
5709 | #else
|
---|
5710 | extern int
|
---|
5711 | onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
5712 | OnigErrorInfo* einfo)
|
---|
5713 | #endif
|
---|
5714 | {
|
---|
5715 | #define COMPILE_INIT_SIZE 20
|
---|
5716 |
|
---|
5717 | int r;
|
---|
5718 | OnigDistance init_size;
|
---|
5719 | Node* root;
|
---|
5720 | ScanEnv scan_env = {0};
|
---|
5721 | #ifdef USE_SUBEXP_CALL
|
---|
5722 | UnsetAddrList uslist;
|
---|
5723 | #endif
|
---|
5724 |
|
---|
5725 | if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
|
---|
5726 |
|
---|
5727 | #ifdef RUBY
|
---|
5728 | scan_env.sourcefile = sourcefile;
|
---|
5729 | scan_env.sourceline = sourceline;
|
---|
5730 | #endif
|
---|
5731 |
|
---|
5732 | #ifdef ONIG_DEBUG
|
---|
5733 | print_enc_string(stderr, reg->enc, pattern, pattern_end);
|
---|
5734 | #endif
|
---|
5735 |
|
---|
5736 | if (reg->alloc == 0) {
|
---|
5737 | init_size = (pattern_end - pattern) * 2;
|
---|
5738 | if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
|
---|
5739 | r = BBUF_INIT(reg, init_size);
|
---|
5740 | if (r != 0) goto end;
|
---|
5741 | }
|
---|
5742 | else
|
---|
5743 | reg->used = 0;
|
---|
5744 |
|
---|
5745 | reg->num_mem = 0;
|
---|
5746 | reg->num_repeat = 0;
|
---|
5747 | reg->num_null_check = 0;
|
---|
5748 | reg->repeat_range_alloc = 0;
|
---|
5749 | reg->repeat_range = (OnigRepeatRange* )NULL;
|
---|
5750 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
5751 | reg->num_comb_exp_check = 0;
|
---|
5752 | #endif
|
---|
5753 |
|
---|
5754 | r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
|
---|
5755 | if (r != 0) goto err;
|
---|
5756 |
|
---|
5757 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
5758 | # if 0
|
---|
5759 | fprintf(stderr, "ORIGINAL PARSE TREE:\n");
|
---|
5760 | print_tree(stderr, root);
|
---|
5761 | # endif
|
---|
5762 | #endif
|
---|
5763 |
|
---|
5764 | #ifdef USE_NAMED_GROUP
|
---|
5765 | /* mixed use named group and no-named group */
|
---|
5766 | if (scan_env.num_named > 0 &&
|
---|
5767 | IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
5768 | !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
5769 | if (scan_env.num_named != scan_env.num_mem)
|
---|
5770 | r = disable_noname_group_capture(&root, reg, &scan_env);
|
---|
5771 | else
|
---|
5772 | r = numbered_ref_check(root);
|
---|
5773 |
|
---|
5774 | if (r != 0) goto err;
|
---|
5775 | }
|
---|
5776 | #endif
|
---|
5777 |
|
---|
5778 | #ifdef USE_SUBEXP_CALL
|
---|
5779 | if (scan_env.num_call > 0) {
|
---|
5780 | r = unset_addr_list_init(&uslist, scan_env.num_call);
|
---|
5781 | if (r != 0) goto err;
|
---|
5782 | scan_env.unset_addr_list = &uslist;
|
---|
5783 | r = setup_subexp_call(root, &scan_env);
|
---|
5784 | if (r != 0) goto err_unset;
|
---|
5785 | r = subexp_recursive_check_trav(root, &scan_env);
|
---|
5786 | if (r < 0) goto err_unset;
|
---|
5787 | r = subexp_inf_recursive_check_trav(root, &scan_env);
|
---|
5788 | if (r != 0) goto err_unset;
|
---|
5789 |
|
---|
5790 | reg->num_call = scan_env.num_call;
|
---|
5791 | }
|
---|
5792 | else
|
---|
5793 | reg->num_call = 0;
|
---|
5794 | #endif
|
---|
5795 |
|
---|
5796 | r = setup_tree(root, reg, 0, &scan_env);
|
---|
5797 | if (r != 0) goto err_unset;
|
---|
5798 |
|
---|
5799 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
5800 | print_tree(stderr, root);
|
---|
5801 | #endif
|
---|
5802 |
|
---|
5803 | reg->capture_history = scan_env.capture_history;
|
---|
5804 | reg->bt_mem_start = scan_env.bt_mem_start;
|
---|
5805 | reg->bt_mem_start |= reg->capture_history;
|
---|
5806 | if (IS_FIND_CONDITION(reg->options))
|
---|
5807 | BIT_STATUS_ON_ALL(reg->bt_mem_end);
|
---|
5808 | else {
|
---|
5809 | reg->bt_mem_end = scan_env.bt_mem_end;
|
---|
5810 | reg->bt_mem_end |= reg->capture_history;
|
---|
5811 | }
|
---|
5812 |
|
---|
5813 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
5814 | if (scan_env.backrefed_mem == 0
|
---|
5815 | # ifdef USE_SUBEXP_CALL
|
---|
5816 | || scan_env.num_call == 0
|
---|
5817 | # endif
|
---|
5818 | ) {
|
---|
5819 | setup_comb_exp_check(root, 0, &scan_env);
|
---|
5820 | # ifdef USE_SUBEXP_CALL
|
---|
5821 | if (scan_env.has_recursion != 0) {
|
---|
5822 | scan_env.num_comb_exp_check = 0;
|
---|
5823 | }
|
---|
5824 | else
|
---|
5825 | # endif
|
---|
5826 | if (scan_env.comb_exp_max_regnum > 0) {
|
---|
5827 | int i;
|
---|
5828 | for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
|
---|
5829 | if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
|
---|
5830 | scan_env.num_comb_exp_check = 0;
|
---|
5831 | break;
|
---|
5832 | }
|
---|
5833 | }
|
---|
5834 | }
|
---|
5835 | }
|
---|
5836 |
|
---|
5837 | reg->num_comb_exp_check = scan_env.num_comb_exp_check;
|
---|
5838 | #endif
|
---|
5839 |
|
---|
5840 | clear_optimize_info(reg);
|
---|
5841 | #ifndef ONIG_DONT_OPTIMIZE
|
---|
5842 | r = set_optimize_info_from_tree(root, reg, &scan_env);
|
---|
5843 | if (r != 0) goto err_unset;
|
---|
5844 | #endif
|
---|
5845 |
|
---|
5846 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
|
---|
5847 | xfree(scan_env.mem_nodes_dynamic);
|
---|
5848 | scan_env.mem_nodes_dynamic = (Node** )NULL;
|
---|
5849 | }
|
---|
5850 |
|
---|
5851 | r = compile_tree(root, reg);
|
---|
5852 | if (r == 0) {
|
---|
5853 | r = add_opcode(reg, OP_END);
|
---|
5854 | #ifdef USE_SUBEXP_CALL
|
---|
5855 | if (scan_env.num_call > 0) {
|
---|
5856 | r = unset_addr_list_fix(&uslist, reg);
|
---|
5857 | unset_addr_list_end(&uslist);
|
---|
5858 | if (r) goto err;
|
---|
5859 | }
|
---|
5860 | #endif
|
---|
5861 |
|
---|
5862 | if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
|
---|
5863 | reg->stack_pop_level = STACK_POP_LEVEL_ALL;
|
---|
5864 | else {
|
---|
5865 | if (reg->bt_mem_start != 0)
|
---|
5866 | reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
|
---|
5867 | else
|
---|
5868 | reg->stack_pop_level = STACK_POP_LEVEL_FREE;
|
---|
5869 | }
|
---|
5870 | }
|
---|
5871 | #ifdef USE_SUBEXP_CALL
|
---|
5872 | else if (scan_env.num_call > 0) {
|
---|
5873 | unset_addr_list_end(&uslist);
|
---|
5874 | }
|
---|
5875 | #endif
|
---|
5876 | onig_node_free(root);
|
---|
5877 |
|
---|
5878 | #ifdef ONIG_DEBUG_COMPILE
|
---|
5879 | # ifdef USE_NAMED_GROUP
|
---|
5880 | onig_print_names(stderr, reg);
|
---|
5881 | # endif
|
---|
5882 | print_compiled_byte_code_list(stderr, reg);
|
---|
5883 | #endif
|
---|
5884 |
|
---|
5885 | end:
|
---|
5886 | return r;
|
---|
5887 |
|
---|
5888 | err_unset:
|
---|
5889 | #ifdef USE_SUBEXP_CALL
|
---|
5890 | if (scan_env.num_call > 0) {
|
---|
5891 | unset_addr_list_end(&uslist);
|
---|
5892 | }
|
---|
5893 | #endif
|
---|
5894 | err:
|
---|
5895 | if (IS_NOT_NULL(scan_env.error)) {
|
---|
5896 | if (IS_NOT_NULL(einfo)) {
|
---|
5897 | einfo->enc = scan_env.enc;
|
---|
5898 | einfo->par = scan_env.error;
|
---|
5899 | einfo->par_end = scan_env.error_end;
|
---|
5900 | }
|
---|
5901 | }
|
---|
5902 |
|
---|
5903 | onig_node_free(root);
|
---|
5904 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
|
---|
5905 | xfree(scan_env.mem_nodes_dynamic);
|
---|
5906 | return r;
|
---|
5907 | }
|
---|
5908 |
|
---|
5909 | static int onig_inited = 0;
|
---|
5910 |
|
---|
5911 | extern int
|
---|
5912 | onig_reg_init(regex_t* reg, OnigOptionType option,
|
---|
5913 | OnigCaseFoldType case_fold_flag,
|
---|
5914 | OnigEncoding enc, const OnigSyntaxType* syntax)
|
---|
5915 | {
|
---|
5916 | if (! onig_inited)
|
---|
5917 | onig_init();
|
---|
5918 |
|
---|
5919 | if (IS_NULL(reg))
|
---|
5920 | return ONIGERR_INVALID_ARGUMENT;
|
---|
5921 |
|
---|
5922 | if (ONIGENC_IS_UNDEF(enc))
|
---|
5923 | return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
|
---|
5924 |
|
---|
5925 | if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
|
---|
5926 | == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
5927 | return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
|
---|
5928 | }
|
---|
5929 |
|
---|
5930 | if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
|
---|
5931 | option |= syntax->options;
|
---|
5932 | option &= ~ONIG_OPTION_SINGLELINE;
|
---|
5933 | }
|
---|
5934 | else
|
---|
5935 | option |= syntax->options;
|
---|
5936 |
|
---|
5937 | (reg)->enc = enc;
|
---|
5938 | (reg)->options = option;
|
---|
5939 | (reg)->syntax = syntax;
|
---|
5940 | (reg)->optimize = 0;
|
---|
5941 | (reg)->exact = (UChar* )NULL;
|
---|
5942 | (reg)->int_map = (int* )NULL;
|
---|
5943 | (reg)->int_map_backward = (int* )NULL;
|
---|
5944 | (reg)->chain = (regex_t* )NULL;
|
---|
5945 |
|
---|
5946 | (reg)->p = (UChar* )NULL;
|
---|
5947 | (reg)->alloc = 0;
|
---|
5948 | (reg)->used = 0;
|
---|
5949 | (reg)->name_table = (void* )NULL;
|
---|
5950 |
|
---|
5951 | (reg)->case_fold_flag = case_fold_flag;
|
---|
5952 | return 0;
|
---|
5953 | }
|
---|
5954 |
|
---|
5955 | extern int
|
---|
5956 | onig_new_without_alloc(regex_t* reg, const UChar* pattern,
|
---|
5957 | const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
|
---|
5958 | const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
|
---|
5959 | {
|
---|
5960 | int r;
|
---|
5961 |
|
---|
5962 | r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
5963 | if (r) return r;
|
---|
5964 |
|
---|
5965 | r = onig_compile(reg, pattern, pattern_end, einfo);
|
---|
5966 | return r;
|
---|
5967 | }
|
---|
5968 |
|
---|
5969 | extern int
|
---|
5970 | onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
---|
5971 | OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
|
---|
5972 | OnigErrorInfo* einfo)
|
---|
5973 | {
|
---|
5974 | int r;
|
---|
5975 |
|
---|
5976 | *reg = (regex_t* )xmalloc(sizeof(regex_t));
|
---|
5977 | if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
---|
5978 |
|
---|
5979 | r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
5980 | if (r) goto err;
|
---|
5981 |
|
---|
5982 | r = onig_compile(*reg, pattern, pattern_end, einfo);
|
---|
5983 | if (r) {
|
---|
5984 | err:
|
---|
5985 | onig_free(*reg);
|
---|
5986 | *reg = NULL;
|
---|
5987 | }
|
---|
5988 | return r;
|
---|
5989 | }
|
---|
5990 |
|
---|
5991 | extern int
|
---|
5992 | onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
|
---|
5993 | {
|
---|
5994 | return onig_init();
|
---|
5995 | }
|
---|
5996 |
|
---|
5997 | extern int
|
---|
5998 | onig_init(void)
|
---|
5999 | {
|
---|
6000 | if (onig_inited != 0)
|
---|
6001 | return 0;
|
---|
6002 |
|
---|
6003 | onig_inited = 1;
|
---|
6004 |
|
---|
6005 | #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
|
---|
6006 | _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
|
---|
6007 | #endif
|
---|
6008 |
|
---|
6009 | onigenc_init();
|
---|
6010 | /* onigenc_set_default_caseconv_table((UChar* )0); */
|
---|
6011 |
|
---|
6012 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
6013 | onig_statistics_init();
|
---|
6014 | #endif
|
---|
6015 |
|
---|
6016 | return 0;
|
---|
6017 | }
|
---|
6018 |
|
---|
6019 |
|
---|
6020 | static OnigEndCallListItemType* EndCallTop;
|
---|
6021 |
|
---|
6022 | extern void onig_add_end_call(void (*func)(void))
|
---|
6023 | {
|
---|
6024 | OnigEndCallListItemType* item;
|
---|
6025 |
|
---|
6026 | item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
|
---|
6027 | if (item == 0) return ;
|
---|
6028 |
|
---|
6029 | item->next = EndCallTop;
|
---|
6030 | item->func = func;
|
---|
6031 |
|
---|
6032 | EndCallTop = item;
|
---|
6033 | }
|
---|
6034 |
|
---|
6035 | static void
|
---|
6036 | exec_end_call_list(void)
|
---|
6037 | {
|
---|
6038 | OnigEndCallListItemType* prev;
|
---|
6039 | void (*func)(void);
|
---|
6040 |
|
---|
6041 | while (EndCallTop != 0) {
|
---|
6042 | func = EndCallTop->func;
|
---|
6043 | (*func)();
|
---|
6044 |
|
---|
6045 | prev = EndCallTop;
|
---|
6046 | EndCallTop = EndCallTop->next;
|
---|
6047 | xfree(prev);
|
---|
6048 | }
|
---|
6049 | }
|
---|
6050 |
|
---|
6051 | extern int
|
---|
6052 | onig_end(void)
|
---|
6053 | {
|
---|
6054 | exec_end_call_list();
|
---|
6055 |
|
---|
6056 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
6057 | onig_print_statistics(stderr);
|
---|
6058 | #endif
|
---|
6059 |
|
---|
6060 | #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
|
---|
6061 | _CrtDumpMemoryLeaks();
|
---|
6062 | #endif
|
---|
6063 |
|
---|
6064 | onig_inited = 0;
|
---|
6065 |
|
---|
6066 | return 0;
|
---|
6067 | }
|
---|
6068 |
|
---|
6069 | extern int
|
---|
6070 | onig_is_in_code_range(const UChar* p, OnigCodePoint code)
|
---|
6071 | {
|
---|
6072 | OnigCodePoint n, *data;
|
---|
6073 | OnigCodePoint low, high, x;
|
---|
6074 |
|
---|
6075 | GET_CODE_POINT(n, p);
|
---|
6076 | data = (OnigCodePoint* )p;
|
---|
6077 | data++;
|
---|
6078 |
|
---|
6079 | for (low = 0, high = n; low < high; ) {
|
---|
6080 | x = (low + high) >> 1;
|
---|
6081 | if (code > data[x * 2 + 1])
|
---|
6082 | low = x + 1;
|
---|
6083 | else
|
---|
6084 | high = x;
|
---|
6085 | }
|
---|
6086 |
|
---|
6087 | return ((low < n && code >= data[low * 2]) ? 1 : 0);
|
---|
6088 | }
|
---|
6089 |
|
---|
6090 | extern int
|
---|
6091 | onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
|
---|
6092 | {
|
---|
6093 | int found;
|
---|
6094 |
|
---|
6095 | if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
|
---|
6096 | if (IS_NULL(cc->mbuf)) {
|
---|
6097 | found = 0;
|
---|
6098 | }
|
---|
6099 | else {
|
---|
6100 | found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
|
---|
6101 | }
|
---|
6102 | }
|
---|
6103 | else {
|
---|
6104 | found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
|
---|
6105 | }
|
---|
6106 |
|
---|
6107 | if (IS_NCCLASS_NOT(cc))
|
---|
6108 | return !found;
|
---|
6109 | else
|
---|
6110 | return found;
|
---|
6111 | }
|
---|
6112 |
|
---|
6113 | extern int
|
---|
6114 | onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
|
---|
6115 | {
|
---|
6116 | int len;
|
---|
6117 |
|
---|
6118 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
6119 | len = 2;
|
---|
6120 | }
|
---|
6121 | else {
|
---|
6122 | len = ONIGENC_CODE_TO_MBCLEN(enc, code);
|
---|
6123 | }
|
---|
6124 | return onig_is_code_in_cc_len(len, code, cc);
|
---|
6125 | }
|
---|
6126 |
|
---|
6127 |
|
---|
6128 | #ifdef ONIG_DEBUG
|
---|
6129 |
|
---|
6130 | /* arguments type */
|
---|
6131 | # define ARG_SPECIAL -1
|
---|
6132 | # define ARG_NON 0
|
---|
6133 | # define ARG_RELADDR 1
|
---|
6134 | # define ARG_ABSADDR 2
|
---|
6135 | # define ARG_LENGTH 3
|
---|
6136 | # define ARG_MEMNUM 4
|
---|
6137 | # define ARG_OPTION 5
|
---|
6138 | # define ARG_STATE_CHECK 6
|
---|
6139 |
|
---|
6140 | OnigOpInfoType OnigOpInfo[] = {
|
---|
6141 | { OP_FINISH, "finish", ARG_NON },
|
---|
6142 | { OP_END, "end", ARG_NON },
|
---|
6143 | { OP_EXACT1, "exact1", ARG_SPECIAL },
|
---|
6144 | { OP_EXACT2, "exact2", ARG_SPECIAL },
|
---|
6145 | { OP_EXACT3, "exact3", ARG_SPECIAL },
|
---|
6146 | { OP_EXACT4, "exact4", ARG_SPECIAL },
|
---|
6147 | { OP_EXACT5, "exact5", ARG_SPECIAL },
|
---|
6148 | { OP_EXACTN, "exactn", ARG_SPECIAL },
|
---|
6149 | { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
|
---|
6150 | { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
|
---|
6151 | { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
|
---|
6152 | { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
|
---|
6153 | { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
|
---|
6154 | { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
|
---|
6155 | { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
|
---|
6156 | { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
|
---|
6157 | { OP_CCLASS, "cclass", ARG_SPECIAL },
|
---|
6158 | { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
|
---|
6159 | { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
|
---|
6160 | { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
|
---|
6161 | { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
|
---|
6162 | { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
|
---|
6163 | { OP_ANYCHAR, "anychar", ARG_NON },
|
---|
6164 | { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
|
---|
6165 | { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
|
---|
6166 | { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
|
---|
6167 | { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
|
---|
6168 | { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
|
---|
6169 | { OP_WORD, "word", ARG_NON },
|
---|
6170 | { OP_NOT_WORD, "not-word", ARG_NON },
|
---|
6171 | { OP_WORD_BOUND, "word-bound", ARG_NON },
|
---|
6172 | { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
|
---|
6173 | { OP_WORD_BEGIN, "word-begin", ARG_NON },
|
---|
6174 | { OP_WORD_END, "word-end", ARG_NON },
|
---|
6175 | { OP_ASCII_WORD, "ascii-word", ARG_NON },
|
---|
6176 | { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
|
---|
6177 | { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
|
---|
6178 | { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
|
---|
6179 | { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
|
---|
6180 | { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
|
---|
6181 | { OP_BEGIN_BUF, "begin-buf", ARG_NON },
|
---|
6182 | { OP_END_BUF, "end-buf", ARG_NON },
|
---|
6183 | { OP_BEGIN_LINE, "begin-line", ARG_NON },
|
---|
6184 | { OP_END_LINE, "end-line", ARG_NON },
|
---|
6185 | { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
|
---|
6186 | { OP_BEGIN_POSITION, "begin-position", ARG_NON },
|
---|
6187 | { OP_BACKREF1, "backref1", ARG_NON },
|
---|
6188 | { OP_BACKREF2, "backref2", ARG_NON },
|
---|
6189 | { OP_BACKREFN, "backrefn", ARG_MEMNUM },
|
---|
6190 | { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
|
---|
6191 | { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
|
---|
6192 | { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
|
---|
6193 | { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
|
---|
6194 | { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
|
---|
6195 | { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
|
---|
6196 | { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
|
---|
6197 | { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
|
---|
6198 | { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
|
---|
6199 | { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
|
---|
6200 | { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
|
---|
6201 | { OP_SET_OPTION, "set-option", ARG_OPTION },
|
---|
6202 | { OP_KEEP, "keep", ARG_NON },
|
---|
6203 | { OP_FAIL, "fail", ARG_NON },
|
---|
6204 | { OP_JUMP, "jump", ARG_RELADDR },
|
---|
6205 | { OP_PUSH, "push", ARG_RELADDR },
|
---|
6206 | { OP_POP, "pop", ARG_NON },
|
---|
6207 | { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
|
---|
6208 | { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
|
---|
6209 | { OP_REPEAT, "repeat", ARG_SPECIAL },
|
---|
6210 | { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
|
---|
6211 | { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
|
---|
6212 | { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
|
---|
6213 | { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
|
---|
6214 | { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
|
---|
6215 | { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
|
---|
6216 | { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
|
---|
6217 | { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
|
---|
6218 | { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
|
---|
6219 | { OP_PUSH_POS, "push-pos", ARG_NON },
|
---|
6220 | { OP_POP_POS, "pop-pos", ARG_NON },
|
---|
6221 | { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
|
---|
6222 | { OP_FAIL_POS, "fail-pos", ARG_NON },
|
---|
6223 | { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
|
---|
6224 | { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
|
---|
6225 | { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
|
---|
6226 | { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
|
---|
6227 | { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
|
---|
6228 | { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
|
---|
6229 | { OP_ABSENT, "absent", ARG_RELADDR },
|
---|
6230 | { OP_ABSENT_END, "absent-end", ARG_NON },
|
---|
6231 | { OP_CALL, "call", ARG_ABSADDR },
|
---|
6232 | { OP_RETURN, "return", ARG_NON },
|
---|
6233 | { OP_CONDITION, "condition", ARG_SPECIAL },
|
---|
6234 | { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
|
---|
6235 | { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
|
---|
6236 | { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
|
---|
6237 | { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
|
---|
6238 | { OP_STATE_CHECK_ANYCHAR_ML_STAR,
|
---|
6239 | "state-check-anychar-ml*", ARG_STATE_CHECK },
|
---|
6240 | { -1, "", ARG_NON }
|
---|
6241 | };
|
---|
6242 |
|
---|
6243 | static const char*
|
---|
6244 | op2name(int opcode)
|
---|
6245 | {
|
---|
6246 | int i;
|
---|
6247 |
|
---|
6248 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
6249 | if (opcode == OnigOpInfo[i].opcode)
|
---|
6250 | return OnigOpInfo[i].name;
|
---|
6251 | }
|
---|
6252 | return "";
|
---|
6253 | }
|
---|
6254 |
|
---|
6255 | static int
|
---|
6256 | op2arg_type(int opcode)
|
---|
6257 | {
|
---|
6258 | int i;
|
---|
6259 |
|
---|
6260 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
6261 | if (opcode == OnigOpInfo[i].opcode)
|
---|
6262 | return OnigOpInfo[i].arg_type;
|
---|
6263 | }
|
---|
6264 | return ARG_SPECIAL;
|
---|
6265 | }
|
---|
6266 |
|
---|
6267 | # ifdef ONIG_DEBUG_PARSE_TREE
|
---|
6268 | static void
|
---|
6269 | Indent(FILE* f, int indent)
|
---|
6270 | {
|
---|
6271 | int i;
|
---|
6272 | for (i = 0; i < indent; i++) putc(' ', f);
|
---|
6273 | }
|
---|
6274 | # endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
6275 |
|
---|
6276 | static void
|
---|
6277 | p_string(FILE* f, ptrdiff_t len, UChar* s)
|
---|
6278 | {
|
---|
6279 | fputs(":", f);
|
---|
6280 | while (len-- > 0) { fputc(*s++, f); }
|
---|
6281 | }
|
---|
6282 |
|
---|
6283 | static void
|
---|
6284 | p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
|
---|
6285 | {
|
---|
6286 | int x = len * mb_len;
|
---|
6287 |
|
---|
6288 | fprintf(f, ":%d:", len);
|
---|
6289 | while (x-- > 0) { fputc(*s++, f); }
|
---|
6290 | }
|
---|
6291 |
|
---|
6292 | extern void
|
---|
6293 | onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
|
---|
6294 | OnigEncoding enc)
|
---|
6295 | {
|
---|
6296 | int i, n, arg_type;
|
---|
6297 | RelAddrType addr;
|
---|
6298 | LengthType len;
|
---|
6299 | MemNumType mem;
|
---|
6300 | StateCheckNumType scn;
|
---|
6301 | OnigCodePoint code;
|
---|
6302 | UChar *q;
|
---|
6303 |
|
---|
6304 | fprintf(f, "[%s", op2name(*bp));
|
---|
6305 | arg_type = op2arg_type(*bp);
|
---|
6306 | if (arg_type != ARG_SPECIAL) {
|
---|
6307 | bp++;
|
---|
6308 | switch (arg_type) {
|
---|
6309 | case ARG_NON:
|
---|
6310 | break;
|
---|
6311 | case ARG_RELADDR:
|
---|
6312 | GET_RELADDR_INC(addr, bp);
|
---|
6313 | fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
|
---|
6314 | break;
|
---|
6315 | case ARG_ABSADDR:
|
---|
6316 | GET_ABSADDR_INC(addr, bp);
|
---|
6317 | fprintf(f, ":(%d)", addr);
|
---|
6318 | break;
|
---|
6319 | case ARG_LENGTH:
|
---|
6320 | GET_LENGTH_INC(len, bp);
|
---|
6321 | fprintf(f, ":%d", len);
|
---|
6322 | break;
|
---|
6323 | case ARG_MEMNUM:
|
---|
6324 | mem = *((MemNumType* )bp);
|
---|
6325 | bp += SIZE_MEMNUM;
|
---|
6326 | fprintf(f, ":%d", mem);
|
---|
6327 | break;
|
---|
6328 | case ARG_OPTION:
|
---|
6329 | {
|
---|
6330 | OnigOptionType option = *((OnigOptionType* )bp);
|
---|
6331 | bp += SIZE_OPTION;
|
---|
6332 | fprintf(f, ":%d", option);
|
---|
6333 | }
|
---|
6334 | break;
|
---|
6335 |
|
---|
6336 | case ARG_STATE_CHECK:
|
---|
6337 | scn = *((StateCheckNumType* )bp);
|
---|
6338 | bp += SIZE_STATE_CHECK_NUM;
|
---|
6339 | fprintf(f, ":%d", scn);
|
---|
6340 | break;
|
---|
6341 | }
|
---|
6342 | }
|
---|
6343 | else {
|
---|
6344 | switch (*bp++) {
|
---|
6345 | case OP_EXACT1:
|
---|
6346 | case OP_ANYCHAR_STAR_PEEK_NEXT:
|
---|
6347 | case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
|
---|
6348 | p_string(f, 1, bp++); break;
|
---|
6349 | case OP_EXACT2:
|
---|
6350 | p_string(f, 2, bp); bp += 2; break;
|
---|
6351 | case OP_EXACT3:
|
---|
6352 | p_string(f, 3, bp); bp += 3; break;
|
---|
6353 | case OP_EXACT4:
|
---|
6354 | p_string(f, 4, bp); bp += 4; break;
|
---|
6355 | case OP_EXACT5:
|
---|
6356 | p_string(f, 5, bp); bp += 5; break;
|
---|
6357 | case OP_EXACTN:
|
---|
6358 | GET_LENGTH_INC(len, bp);
|
---|
6359 | p_len_string(f, len, 1, bp);
|
---|
6360 | bp += len;
|
---|
6361 | break;
|
---|
6362 |
|
---|
6363 | case OP_EXACTMB2N1:
|
---|
6364 | p_string(f, 2, bp); bp += 2; break;
|
---|
6365 | case OP_EXACTMB2N2:
|
---|
6366 | p_string(f, 4, bp); bp += 4; break;
|
---|
6367 | case OP_EXACTMB2N3:
|
---|
6368 | p_string(f, 6, bp); bp += 6; break;
|
---|
6369 | case OP_EXACTMB2N:
|
---|
6370 | GET_LENGTH_INC(len, bp);
|
---|
6371 | p_len_string(f, len, 2, bp);
|
---|
6372 | bp += len * 2;
|
---|
6373 | break;
|
---|
6374 | case OP_EXACTMB3N:
|
---|
6375 | GET_LENGTH_INC(len, bp);
|
---|
6376 | p_len_string(f, len, 3, bp);
|
---|
6377 | bp += len * 3;
|
---|
6378 | break;
|
---|
6379 | case OP_EXACTMBN:
|
---|
6380 | {
|
---|
6381 | int mb_len;
|
---|
6382 |
|
---|
6383 | GET_LENGTH_INC(mb_len, bp);
|
---|
6384 | GET_LENGTH_INC(len, bp);
|
---|
6385 | fprintf(f, ":%d:%d:", mb_len, len);
|
---|
6386 | n = len * mb_len;
|
---|
6387 | while (n-- > 0) { fputc(*bp++, f); }
|
---|
6388 | }
|
---|
6389 | break;
|
---|
6390 |
|
---|
6391 | case OP_EXACT1_IC:
|
---|
6392 | len = enclen(enc, bp, bpend);
|
---|
6393 | p_string(f, len, bp);
|
---|
6394 | bp += len;
|
---|
6395 | break;
|
---|
6396 | case OP_EXACTN_IC:
|
---|
6397 | GET_LENGTH_INC(len, bp);
|
---|
6398 | p_len_string(f, len, 1, bp);
|
---|
6399 | bp += len;
|
---|
6400 | break;
|
---|
6401 |
|
---|
6402 | case OP_CCLASS:
|
---|
6403 | n = bitset_on_num((BitSetRef )bp);
|
---|
6404 | bp += SIZE_BITSET;
|
---|
6405 | fprintf(f, ":%d", n);
|
---|
6406 | break;
|
---|
6407 |
|
---|
6408 | case OP_CCLASS_NOT:
|
---|
6409 | n = bitset_on_num((BitSetRef )bp);
|
---|
6410 | bp += SIZE_BITSET;
|
---|
6411 | fprintf(f, ":%d", n);
|
---|
6412 | break;
|
---|
6413 |
|
---|
6414 | case OP_CCLASS_MB:
|
---|
6415 | case OP_CCLASS_MB_NOT:
|
---|
6416 | GET_LENGTH_INC(len, bp);
|
---|
6417 | q = bp;
|
---|
6418 | # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
6419 | ALIGNMENT_RIGHT(q);
|
---|
6420 | # endif
|
---|
6421 | GET_CODE_POINT(code, q);
|
---|
6422 | bp += len;
|
---|
6423 | fprintf(f, ":%d:%d", (int )code, len);
|
---|
6424 | break;
|
---|
6425 |
|
---|
6426 | case OP_CCLASS_MIX:
|
---|
6427 | case OP_CCLASS_MIX_NOT:
|
---|
6428 | n = bitset_on_num((BitSetRef )bp);
|
---|
6429 | bp += SIZE_BITSET;
|
---|
6430 | GET_LENGTH_INC(len, bp);
|
---|
6431 | q = bp;
|
---|
6432 | # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
6433 | ALIGNMENT_RIGHT(q);
|
---|
6434 | # endif
|
---|
6435 | GET_CODE_POINT(code, q);
|
---|
6436 | bp += len;
|
---|
6437 | fprintf(f, ":%d:%d:%d", n, (int )code, len);
|
---|
6438 | break;
|
---|
6439 |
|
---|
6440 | case OP_BACKREFN_IC:
|
---|
6441 | mem = *((MemNumType* )bp);
|
---|
6442 | bp += SIZE_MEMNUM;
|
---|
6443 | fprintf(f, ":%d", mem);
|
---|
6444 | break;
|
---|
6445 |
|
---|
6446 | case OP_BACKREF_MULTI_IC:
|
---|
6447 | case OP_BACKREF_MULTI:
|
---|
6448 | fputs(" ", f);
|
---|
6449 | GET_LENGTH_INC(len, bp);
|
---|
6450 | for (i = 0; i < len; i++) {
|
---|
6451 | GET_MEMNUM_INC(mem, bp);
|
---|
6452 | if (i > 0) fputs(", ", f);
|
---|
6453 | fprintf(f, "%d", mem);
|
---|
6454 | }
|
---|
6455 | break;
|
---|
6456 |
|
---|
6457 | case OP_BACKREF_WITH_LEVEL:
|
---|
6458 | {
|
---|
6459 | OnigOptionType option;
|
---|
6460 | LengthType level;
|
---|
6461 |
|
---|
6462 | GET_OPTION_INC(option, bp);
|
---|
6463 | fprintf(f, ":%d", option);
|
---|
6464 | GET_LENGTH_INC(level, bp);
|
---|
6465 | fprintf(f, ":%d", level);
|
---|
6466 |
|
---|
6467 | fputs(" ", f);
|
---|
6468 | GET_LENGTH_INC(len, bp);
|
---|
6469 | for (i = 0; i < len; i++) {
|
---|
6470 | GET_MEMNUM_INC(mem, bp);
|
---|
6471 | if (i > 0) fputs(", ", f);
|
---|
6472 | fprintf(f, "%d", mem);
|
---|
6473 | }
|
---|
6474 | }
|
---|
6475 | break;
|
---|
6476 |
|
---|
6477 | case OP_REPEAT:
|
---|
6478 | case OP_REPEAT_NG:
|
---|
6479 | {
|
---|
6480 | mem = *((MemNumType* )bp);
|
---|
6481 | bp += SIZE_MEMNUM;
|
---|
6482 | addr = *((RelAddrType* )bp);
|
---|
6483 | bp += SIZE_RELADDR;
|
---|
6484 | fprintf(f, ":%d:%d", mem, addr);
|
---|
6485 | }
|
---|
6486 | break;
|
---|
6487 |
|
---|
6488 | case OP_PUSH_OR_JUMP_EXACT1:
|
---|
6489 | case OP_PUSH_IF_PEEK_NEXT:
|
---|
6490 | addr = *((RelAddrType* )bp);
|
---|
6491 | bp += SIZE_RELADDR;
|
---|
6492 | fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
|
---|
6493 | p_string(f, 1, bp);
|
---|
6494 | bp += 1;
|
---|
6495 | break;
|
---|
6496 |
|
---|
6497 | case OP_LOOK_BEHIND:
|
---|
6498 | GET_LENGTH_INC(len, bp);
|
---|
6499 | fprintf(f, ":%d", len);
|
---|
6500 | break;
|
---|
6501 |
|
---|
6502 | case OP_PUSH_LOOK_BEHIND_NOT:
|
---|
6503 | GET_RELADDR_INC(addr, bp);
|
---|
6504 | GET_LENGTH_INC(len, bp);
|
---|
6505 | fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
|
---|
6506 | break;
|
---|
6507 |
|
---|
6508 | case OP_STATE_CHECK_PUSH:
|
---|
6509 | case OP_STATE_CHECK_PUSH_OR_JUMP:
|
---|
6510 | scn = *((StateCheckNumType* )bp);
|
---|
6511 | bp += SIZE_STATE_CHECK_NUM;
|
---|
6512 | addr = *((RelAddrType* )bp);
|
---|
6513 | bp += SIZE_RELADDR;
|
---|
6514 | fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
|
---|
6515 | break;
|
---|
6516 |
|
---|
6517 | case OP_CONDITION:
|
---|
6518 | GET_MEMNUM_INC(mem, bp);
|
---|
6519 | GET_RELADDR_INC(addr, bp);
|
---|
6520 | fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
|
---|
6521 | break;
|
---|
6522 |
|
---|
6523 | default:
|
---|
6524 | fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
|
---|
6525 | bp[-1]);
|
---|
6526 | }
|
---|
6527 | }
|
---|
6528 | fputs("]", f);
|
---|
6529 | if (nextp) *nextp = bp;
|
---|
6530 | }
|
---|
6531 |
|
---|
6532 | # ifdef ONIG_DEBUG_COMPILE
|
---|
6533 | static void
|
---|
6534 | print_compiled_byte_code_list(FILE* f, regex_t* reg)
|
---|
6535 | {
|
---|
6536 | int ncode;
|
---|
6537 | UChar* bp = reg->p;
|
---|
6538 | UChar* end = reg->p + reg->used;
|
---|
6539 |
|
---|
6540 | fprintf(f, "code length: %d", reg->used);
|
---|
6541 |
|
---|
6542 | ncode = -1;
|
---|
6543 | while (bp < end) {
|
---|
6544 | ncode++;
|
---|
6545 | if (ncode % 5 == 0)
|
---|
6546 | fprintf(f, "\n%ld:", bp - reg->p);
|
---|
6547 | else
|
---|
6548 | fprintf(f, " %ld:", bp - reg->p);
|
---|
6549 | onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
|
---|
6550 | }
|
---|
6551 |
|
---|
6552 | fprintf(f, "\n");
|
---|
6553 | }
|
---|
6554 | # endif /* ONIG_DEBUG_COMPILE */
|
---|
6555 |
|
---|
6556 | # ifdef ONIG_DEBUG_PARSE_TREE
|
---|
6557 | static void
|
---|
6558 | print_indent_tree(FILE* f, Node* node, int indent)
|
---|
6559 | {
|
---|
6560 | int i, type, container_p = 0;
|
---|
6561 | int add = 3;
|
---|
6562 | UChar* p;
|
---|
6563 |
|
---|
6564 | Indent(f, indent);
|
---|
6565 | if (IS_NULL(node)) {
|
---|
6566 | fprintf(f, "ERROR: null node!!!\n");
|
---|
6567 | exit (0);
|
---|
6568 | }
|
---|
6569 |
|
---|
6570 | type = NTYPE(node);
|
---|
6571 | switch (type) {
|
---|
6572 | case NT_LIST:
|
---|
6573 | case NT_ALT:
|
---|
6574 | if (NTYPE(node) == NT_LIST)
|
---|
6575 | fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
|
---|
6576 | else
|
---|
6577 | fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
|
---|
6578 |
|
---|
6579 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
6580 | while (IS_NOT_NULL(node = NCDR(node))) {
|
---|
6581 | if (NTYPE(node) != type) {
|
---|
6582 | fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
|
---|
6583 | exit(0);
|
---|
6584 | }
|
---|
6585 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
6586 | }
|
---|
6587 | break;
|
---|
6588 |
|
---|
6589 | case NT_STR:
|
---|
6590 | fprintf(f, "<string%s:%"PRIxPTR">",
|
---|
6591 | (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
|
---|
6592 | for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
|
---|
6593 | if (*p >= 0x20 && *p < 0x7f)
|
---|
6594 | fputc(*p, f);
|
---|
6595 | else {
|
---|
6596 | fprintf(f, " 0x%02x", *p);
|
---|
6597 | }
|
---|
6598 | }
|
---|
6599 | break;
|
---|
6600 |
|
---|
6601 | case NT_CCLASS:
|
---|
6602 | fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
|
---|
6603 | if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
|
---|
6604 | if (NCCLASS(node)->mbuf) {
|
---|
6605 | BBuf* bbuf = NCCLASS(node)->mbuf;
|
---|
6606 | OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
|
---|
6607 | OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
|
---|
6608 | fprintf(f, "%d", *data++);
|
---|
6609 | for (; data < end; data+=2) {
|
---|
6610 | fprintf(f, ",");
|
---|
6611 | fprintf(f, "%04x-%04x", data[0], data[1]);
|
---|
6612 | }
|
---|
6613 | }
|
---|
6614 | break;
|
---|
6615 |
|
---|
6616 | case NT_CTYPE:
|
---|
6617 | fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
|
---|
6618 | switch (NCTYPE(node)->ctype) {
|
---|
6619 | case ONIGENC_CTYPE_WORD:
|
---|
6620 | if (NCTYPE(node)->not != 0)
|
---|
6621 | fputs("not word", f);
|
---|
6622 | else
|
---|
6623 | fputs("word", f);
|
---|
6624 | break;
|
---|
6625 |
|
---|
6626 | default:
|
---|
6627 | fprintf(f, "ERROR: undefined ctype.\n");
|
---|
6628 | exit(0);
|
---|
6629 | }
|
---|
6630 | break;
|
---|
6631 |
|
---|
6632 | case NT_CANY:
|
---|
6633 | fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
|
---|
6634 | break;
|
---|
6635 |
|
---|
6636 | case NT_ANCHOR:
|
---|
6637 | fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
|
---|
6638 | switch (NANCHOR(node)->type) {
|
---|
6639 | case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
|
---|
6640 | case ANCHOR_END_BUF: fputs("end buf", f); break;
|
---|
6641 | case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
|
---|
6642 | case ANCHOR_END_LINE: fputs("end line", f); break;
|
---|
6643 | case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
|
---|
6644 | case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
|
---|
6645 |
|
---|
6646 | case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
|
---|
6647 | case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
|
---|
6648 | # ifdef USE_WORD_BEGIN_END
|
---|
6649 | case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
|
---|
6650 | case ANCHOR_WORD_END: fputs("word end", f); break;
|
---|
6651 | # endif
|
---|
6652 | case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
|
---|
6653 | case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
|
---|
6654 | case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
|
---|
6655 | case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
|
---|
6656 | case ANCHOR_KEEP: fputs("keep",f); break;
|
---|
6657 |
|
---|
6658 | default:
|
---|
6659 | fprintf(f, "ERROR: undefined anchor type.\n");
|
---|
6660 | break;
|
---|
6661 | }
|
---|
6662 | break;
|
---|
6663 |
|
---|
6664 | case NT_BREF:
|
---|
6665 | {
|
---|
6666 | int* p;
|
---|
6667 | BRefNode* br = NBREF(node);
|
---|
6668 | p = BACKREFS_P(br);
|
---|
6669 | fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
|
---|
6670 | for (i = 0; i < br->back_num; i++) {
|
---|
6671 | if (i > 0) fputs(", ", f);
|
---|
6672 | fprintf(f, "%d", p[i]);
|
---|
6673 | }
|
---|
6674 | }
|
---|
6675 | break;
|
---|
6676 |
|
---|
6677 | # ifdef USE_SUBEXP_CALL
|
---|
6678 | case NT_CALL:
|
---|
6679 | {
|
---|
6680 | CallNode* cn = NCALL(node);
|
---|
6681 | fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
|
---|
6682 | p_string(f, cn->name_end - cn->name, cn->name);
|
---|
6683 | }
|
---|
6684 | break;
|
---|
6685 | # endif
|
---|
6686 |
|
---|
6687 | case NT_QTFR:
|
---|
6688 | fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
|
---|
6689 | NQTFR(node)->lower, NQTFR(node)->upper,
|
---|
6690 | (NQTFR(node)->greedy ? "" : "?"));
|
---|
6691 | print_indent_tree(f, NQTFR(node)->target, indent + add);
|
---|
6692 | break;
|
---|
6693 |
|
---|
6694 | case NT_ENCLOSE:
|
---|
6695 | fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
|
---|
6696 | switch (NENCLOSE(node)->type) {
|
---|
6697 | case ENCLOSE_OPTION:
|
---|
6698 | fprintf(f, "option:%d", NENCLOSE(node)->option);
|
---|
6699 | break;
|
---|
6700 | case ENCLOSE_MEMORY:
|
---|
6701 | fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
|
---|
6702 | break;
|
---|
6703 | case ENCLOSE_STOP_BACKTRACK:
|
---|
6704 | fprintf(f, "stop-bt");
|
---|
6705 | break;
|
---|
6706 | case ENCLOSE_CONDITION:
|
---|
6707 | fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
|
---|
6708 | break;
|
---|
6709 | case ENCLOSE_ABSENT:
|
---|
6710 | fprintf(f, "absent");
|
---|
6711 | break;
|
---|
6712 |
|
---|
6713 | default:
|
---|
6714 | break;
|
---|
6715 | }
|
---|
6716 | fprintf(f, "\n");
|
---|
6717 | print_indent_tree(f, NENCLOSE(node)->target, indent + add);
|
---|
6718 | break;
|
---|
6719 |
|
---|
6720 | default:
|
---|
6721 | fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
|
---|
6722 | break;
|
---|
6723 | }
|
---|
6724 |
|
---|
6725 | if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
|
---|
6726 | type != NT_ENCLOSE)
|
---|
6727 | fprintf(f, "\n");
|
---|
6728 |
|
---|
6729 | if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
|
---|
6730 |
|
---|
6731 | fflush(f);
|
---|
6732 | }
|
---|
6733 |
|
---|
6734 | static void
|
---|
6735 | print_tree(FILE* f, Node* node)
|
---|
6736 | {
|
---|
6737 | print_indent_tree(f, node, 0);
|
---|
6738 | }
|
---|
6739 | # endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
6740 | #endif /* ONIG_DEBUG */
|
---|