1 | /**********************************************************************
|
---|
2 | regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
|
---|
3 | **********************************************************************/
|
---|
4 | /*-
|
---|
5 | * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
---|
6 | * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
---|
7 | * All rights reserved.
|
---|
8 | *
|
---|
9 | * Redistribution and use in source and binary forms, with or without
|
---|
10 | * modification, are permitted provided that the following conditions
|
---|
11 | * are met:
|
---|
12 | * 1. Redistributions of source code must retain the above copyright
|
---|
13 | * notice, this list of conditions and the following disclaimer.
|
---|
14 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
15 | * notice, this list of conditions and the following disclaimer in the
|
---|
16 | * documentation and/or other materials provided with the distribution.
|
---|
17 | *
|
---|
18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
28 | * SUCH DAMAGE.
|
---|
29 | */
|
---|
30 |
|
---|
31 | #include "regparse.h"
|
---|
32 |
|
---|
33 | #if defined(USE_MULTI_THREAD_SYSTEM) \
|
---|
34 | && defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
|
---|
35 | #ifdef _WIN32
|
---|
36 | CRITICAL_SECTION gOnigMutex;
|
---|
37 | #else
|
---|
38 | pthread_mutex_t gOnigMutex;
|
---|
39 | #endif
|
---|
40 | #endif
|
---|
41 |
|
---|
42 | OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
|
---|
43 |
|
---|
44 | extern OnigCaseFoldType
|
---|
45 | onig_get_default_case_fold_flag(void)
|
---|
46 | {
|
---|
47 | return OnigDefaultCaseFoldFlag;
|
---|
48 | }
|
---|
49 |
|
---|
50 | extern int
|
---|
51 | onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
|
---|
52 | {
|
---|
53 | OnigDefaultCaseFoldFlag = case_fold_flag;
|
---|
54 | return 0;
|
---|
55 | }
|
---|
56 |
|
---|
57 |
|
---|
58 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
59 | static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
---|
60 | #endif
|
---|
61 |
|
---|
62 | #if 0
|
---|
63 | static UChar*
|
---|
64 | str_dup(UChar* s, UChar* end)
|
---|
65 | {
|
---|
66 | ptrdiff_t len = end - s;
|
---|
67 |
|
---|
68 | if (len > 0) {
|
---|
69 | UChar* r = (UChar* )xmalloc(len + 1);
|
---|
70 | CHECK_NULL_RETURN(r);
|
---|
71 | xmemcpy(r, s, len);
|
---|
72 | r[len] = (UChar )0;
|
---|
73 | return r;
|
---|
74 | }
|
---|
75 | else return NULL;
|
---|
76 | }
|
---|
77 | #endif
|
---|
78 |
|
---|
79 | static void
|
---|
80 | swap_node(Node* a, Node* b)
|
---|
81 | {
|
---|
82 | Node c;
|
---|
83 | c = *a; *a = *b; *b = c;
|
---|
84 |
|
---|
85 | if (NTYPE(a) == NT_STR) {
|
---|
86 | StrNode* sn = NSTR(a);
|
---|
87 | if (sn->capa == 0) {
|
---|
88 | size_t len = sn->end - sn->s;
|
---|
89 | sn->s = sn->buf;
|
---|
90 | sn->end = sn->s + len;
|
---|
91 | }
|
---|
92 | }
|
---|
93 |
|
---|
94 | if (NTYPE(b) == NT_STR) {
|
---|
95 | StrNode* sn = NSTR(b);
|
---|
96 | if (sn->capa == 0) {
|
---|
97 | size_t len = sn->end - sn->s;
|
---|
98 | sn->s = sn->buf;
|
---|
99 | sn->end = sn->s + len;
|
---|
100 | }
|
---|
101 | }
|
---|
102 | }
|
---|
103 |
|
---|
104 | static OnigDistance
|
---|
105 | distance_add(OnigDistance d1, OnigDistance d2)
|
---|
106 | {
|
---|
107 | if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
|
---|
108 | return ONIG_INFINITE_DISTANCE;
|
---|
109 | else {
|
---|
110 | if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
|
---|
111 | else return ONIG_INFINITE_DISTANCE;
|
---|
112 | }
|
---|
113 | }
|
---|
114 |
|
---|
115 | static OnigDistance
|
---|
116 | distance_multiply(OnigDistance d, int m)
|
---|
117 | {
|
---|
118 | if (m == 0) return 0;
|
---|
119 |
|
---|
120 | if (d < ONIG_INFINITE_DISTANCE / m)
|
---|
121 | return d * m;
|
---|
122 | else
|
---|
123 | return ONIG_INFINITE_DISTANCE;
|
---|
124 | }
|
---|
125 |
|
---|
126 | static int
|
---|
127 | bitset_is_empty(BitSetRef bs)
|
---|
128 | {
|
---|
129 | int i;
|
---|
130 | for (i = 0; i < BITSET_SIZE; i++) {
|
---|
131 | if (bs[i] != 0) return 0;
|
---|
132 | }
|
---|
133 | return 1;
|
---|
134 | }
|
---|
135 |
|
---|
136 | #ifdef ONIG_DEBUG
|
---|
137 | static int
|
---|
138 | bitset_on_num(BitSetRef bs)
|
---|
139 | {
|
---|
140 | int i, n;
|
---|
141 |
|
---|
142 | n = 0;
|
---|
143 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
144 | if (BITSET_AT(bs, i)) n++;
|
---|
145 | }
|
---|
146 | return n;
|
---|
147 | }
|
---|
148 | #endif
|
---|
149 |
|
---|
150 | extern int
|
---|
151 | onig_bbuf_init(BBuf* buf, OnigDistance size)
|
---|
152 | {
|
---|
153 | if (size <= 0) {
|
---|
154 | size = 0;
|
---|
155 | buf->p = NULL;
|
---|
156 | }
|
---|
157 | else {
|
---|
158 | buf->p = (UChar* )xmalloc(size);
|
---|
159 | if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
|
---|
160 | }
|
---|
161 |
|
---|
162 | buf->alloc = (unsigned int )size;
|
---|
163 | buf->used = 0;
|
---|
164 | return 0;
|
---|
165 | }
|
---|
166 |
|
---|
167 |
|
---|
168 | #ifdef USE_SUBEXP_CALL
|
---|
169 |
|
---|
170 | static int
|
---|
171 | unset_addr_list_init(UnsetAddrList* uslist, int size)
|
---|
172 | {
|
---|
173 | UnsetAddr* p;
|
---|
174 |
|
---|
175 | p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
|
---|
176 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
177 | uslist->num = 0;
|
---|
178 | uslist->alloc = size;
|
---|
179 | uslist->us = p;
|
---|
180 | return 0;
|
---|
181 | }
|
---|
182 |
|
---|
183 | static void
|
---|
184 | unset_addr_list_end(UnsetAddrList* uslist)
|
---|
185 | {
|
---|
186 | if (IS_NOT_NULL(uslist->us))
|
---|
187 | xfree(uslist->us);
|
---|
188 | }
|
---|
189 |
|
---|
190 | static int
|
---|
191 | unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
|
---|
192 | {
|
---|
193 | UnsetAddr* p;
|
---|
194 | int size;
|
---|
195 |
|
---|
196 | if (uslist->num >= uslist->alloc) {
|
---|
197 | size = uslist->alloc * 2;
|
---|
198 | p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
|
---|
199 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
200 | uslist->alloc = size;
|
---|
201 | uslist->us = p;
|
---|
202 | }
|
---|
203 |
|
---|
204 | uslist->us[uslist->num].offset = offset;
|
---|
205 | uslist->us[uslist->num].target = node;
|
---|
206 | uslist->num++;
|
---|
207 | return 0;
|
---|
208 | }
|
---|
209 | #endif /* USE_SUBEXP_CALL */
|
---|
210 |
|
---|
211 |
|
---|
212 | static int
|
---|
213 | add_opcode(regex_t* reg, int opcode)
|
---|
214 | {
|
---|
215 | BBUF_ADD1(reg, opcode);
|
---|
216 | return 0;
|
---|
217 | }
|
---|
218 |
|
---|
219 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
220 | static int
|
---|
221 | add_state_check_num(regex_t* reg, int num)
|
---|
222 | {
|
---|
223 | StateCheckNumType n = (StateCheckNumType )num;
|
---|
224 |
|
---|
225 | BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
|
---|
226 | return 0;
|
---|
227 | }
|
---|
228 | #endif
|
---|
229 |
|
---|
230 | static int
|
---|
231 | add_rel_addr(regex_t* reg, int addr)
|
---|
232 | {
|
---|
233 | RelAddrType ra = (RelAddrType )addr;
|
---|
234 |
|
---|
235 | BBUF_ADD(reg, &ra, SIZE_RELADDR);
|
---|
236 | return 0;
|
---|
237 | }
|
---|
238 |
|
---|
239 | static int
|
---|
240 | add_abs_addr(regex_t* reg, int addr)
|
---|
241 | {
|
---|
242 | AbsAddrType ra = (AbsAddrType )addr;
|
---|
243 |
|
---|
244 | BBUF_ADD(reg, &ra, SIZE_ABSADDR);
|
---|
245 | return 0;
|
---|
246 | }
|
---|
247 |
|
---|
248 | static int
|
---|
249 | add_length(regex_t* reg, OnigDistance len)
|
---|
250 | {
|
---|
251 | LengthType l = (LengthType )len;
|
---|
252 |
|
---|
253 | BBUF_ADD(reg, &l, SIZE_LENGTH);
|
---|
254 | return 0;
|
---|
255 | }
|
---|
256 |
|
---|
257 | static int
|
---|
258 | add_mem_num(regex_t* reg, int num)
|
---|
259 | {
|
---|
260 | MemNumType n = (MemNumType )num;
|
---|
261 |
|
---|
262 | BBUF_ADD(reg, &n, SIZE_MEMNUM);
|
---|
263 | return 0;
|
---|
264 | }
|
---|
265 |
|
---|
266 | static int
|
---|
267 | add_pointer(regex_t* reg, void* addr)
|
---|
268 | {
|
---|
269 | PointerType ptr = (PointerType )addr;
|
---|
270 |
|
---|
271 | BBUF_ADD(reg, &ptr, SIZE_POINTER);
|
---|
272 | return 0;
|
---|
273 | }
|
---|
274 |
|
---|
275 | static int
|
---|
276 | add_option(regex_t* reg, OnigOptionType option)
|
---|
277 | {
|
---|
278 | BBUF_ADD(reg, &option, SIZE_OPTION);
|
---|
279 | return 0;
|
---|
280 | }
|
---|
281 |
|
---|
282 | static int
|
---|
283 | add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
|
---|
284 | {
|
---|
285 | int r;
|
---|
286 |
|
---|
287 | r = add_opcode(reg, opcode);
|
---|
288 | if (r) return r;
|
---|
289 | r = add_rel_addr(reg, addr);
|
---|
290 | return r;
|
---|
291 | }
|
---|
292 |
|
---|
293 | static int
|
---|
294 | add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
|
---|
295 | {
|
---|
296 | BBUF_ADD(reg, bytes, len);
|
---|
297 | return 0;
|
---|
298 | }
|
---|
299 |
|
---|
300 | static int
|
---|
301 | add_bitset(regex_t* reg, BitSetRef bs)
|
---|
302 | {
|
---|
303 | BBUF_ADD(reg, bs, SIZE_BITSET);
|
---|
304 | return 0;
|
---|
305 | }
|
---|
306 |
|
---|
307 | static int
|
---|
308 | add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
|
---|
309 | {
|
---|
310 | int r;
|
---|
311 |
|
---|
312 | r = add_opcode(reg, opcode);
|
---|
313 | if (r) return r;
|
---|
314 | r = add_option(reg, option);
|
---|
315 | return r;
|
---|
316 | }
|
---|
317 |
|
---|
318 | static int compile_length_tree(Node* node, regex_t* reg);
|
---|
319 | static int compile_tree(Node* node, regex_t* reg);
|
---|
320 |
|
---|
321 |
|
---|
322 | #define IS_NEED_STR_LEN_OP_EXACT(op) \
|
---|
323 | ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
|
---|
324 | (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
|
---|
325 |
|
---|
326 | static int
|
---|
327 | select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
|
---|
328 | {
|
---|
329 | int op;
|
---|
330 | OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
|
---|
331 |
|
---|
332 | if (ignore_case) {
|
---|
333 | switch (str_len) {
|
---|
334 | case 1: op = OP_EXACT1_IC; break;
|
---|
335 | default: op = OP_EXACTN_IC; break;
|
---|
336 | }
|
---|
337 | }
|
---|
338 | else {
|
---|
339 | switch (mb_len) {
|
---|
340 | case 1:
|
---|
341 | switch (str_len) {
|
---|
342 | case 1: op = OP_EXACT1; break;
|
---|
343 | case 2: op = OP_EXACT2; break;
|
---|
344 | case 3: op = OP_EXACT3; break;
|
---|
345 | case 4: op = OP_EXACT4; break;
|
---|
346 | case 5: op = OP_EXACT5; break;
|
---|
347 | default: op = OP_EXACTN; break;
|
---|
348 | }
|
---|
349 | break;
|
---|
350 |
|
---|
351 | case 2:
|
---|
352 | switch (str_len) {
|
---|
353 | case 1: op = OP_EXACTMB2N1; break;
|
---|
354 | case 2: op = OP_EXACTMB2N2; break;
|
---|
355 | case 3: op = OP_EXACTMB2N3; break;
|
---|
356 | default: op = OP_EXACTMB2N; break;
|
---|
357 | }
|
---|
358 | break;
|
---|
359 |
|
---|
360 | case 3:
|
---|
361 | op = OP_EXACTMB3N;
|
---|
362 | break;
|
---|
363 |
|
---|
364 | default:
|
---|
365 | op = OP_EXACTMBN;
|
---|
366 | break;
|
---|
367 | }
|
---|
368 | }
|
---|
369 | return op;
|
---|
370 | }
|
---|
371 |
|
---|
372 | static int
|
---|
373 | compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
|
---|
374 | {
|
---|
375 | int r;
|
---|
376 | int saved_num_null_check = reg->num_null_check;
|
---|
377 |
|
---|
378 | if (empty_info != 0) {
|
---|
379 | r = add_opcode(reg, OP_NULL_CHECK_START);
|
---|
380 | if (r) return r;
|
---|
381 | r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
|
---|
382 | if (r) return r;
|
---|
383 | reg->num_null_check++;
|
---|
384 | }
|
---|
385 |
|
---|
386 | r = compile_tree(node, reg);
|
---|
387 | if (r) return r;
|
---|
388 |
|
---|
389 | if (empty_info != 0) {
|
---|
390 | if (empty_info == NQ_TARGET_IS_EMPTY)
|
---|
391 | r = add_opcode(reg, OP_NULL_CHECK_END);
|
---|
392 | else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
|
---|
393 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
|
---|
394 | else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
|
---|
395 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
|
---|
396 |
|
---|
397 | if (r) return r;
|
---|
398 | r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
|
---|
399 | }
|
---|
400 | return r;
|
---|
401 | }
|
---|
402 |
|
---|
403 | #ifdef USE_SUBEXP_CALL
|
---|
404 | static int
|
---|
405 | compile_call(CallNode* node, regex_t* reg)
|
---|
406 | {
|
---|
407 | int r;
|
---|
408 |
|
---|
409 | r = add_opcode(reg, OP_CALL);
|
---|
410 | if (r) return r;
|
---|
411 | r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
|
---|
412 | node->target);
|
---|
413 | if (r) return r;
|
---|
414 | r = add_abs_addr(reg, 0 /*dummy addr.*/);
|
---|
415 | return r;
|
---|
416 | }
|
---|
417 | #endif
|
---|
418 |
|
---|
419 | static int
|
---|
420 | compile_tree_n_times(Node* node, int n, regex_t* reg)
|
---|
421 | {
|
---|
422 | int i, r;
|
---|
423 |
|
---|
424 | for (i = 0; i < n; i++) {
|
---|
425 | r = compile_tree(node, reg);
|
---|
426 | if (r) return r;
|
---|
427 | }
|
---|
428 | return 0;
|
---|
429 | }
|
---|
430 |
|
---|
431 | static int
|
---|
432 | add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
|
---|
433 | regex_t* reg ARG_UNUSED, int ignore_case)
|
---|
434 | {
|
---|
435 | int len;
|
---|
436 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
437 |
|
---|
438 | len = SIZE_OPCODE;
|
---|
439 |
|
---|
440 | if (op == OP_EXACTMBN) len += SIZE_LENGTH;
|
---|
441 | if (IS_NEED_STR_LEN_OP_EXACT(op))
|
---|
442 | len += SIZE_LENGTH;
|
---|
443 |
|
---|
444 | len += (int )byte_len;
|
---|
445 | return len;
|
---|
446 | }
|
---|
447 |
|
---|
448 | static int
|
---|
449 | add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
|
---|
450 | regex_t* reg, int ignore_case)
|
---|
451 | {
|
---|
452 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
453 | add_opcode(reg, op);
|
---|
454 |
|
---|
455 | if (op == OP_EXACTMBN)
|
---|
456 | add_length(reg, mb_len);
|
---|
457 |
|
---|
458 | if (IS_NEED_STR_LEN_OP_EXACT(op)) {
|
---|
459 | if (op == OP_EXACTN_IC)
|
---|
460 | add_length(reg, byte_len);
|
---|
461 | else
|
---|
462 | add_length(reg, byte_len / mb_len);
|
---|
463 | }
|
---|
464 |
|
---|
465 | add_bytes(reg, s, byte_len);
|
---|
466 | return 0;
|
---|
467 | }
|
---|
468 |
|
---|
469 |
|
---|
470 | static int
|
---|
471 | compile_length_string_node(Node* node, regex_t* reg)
|
---|
472 | {
|
---|
473 | int rlen, r, len, prev_len, blen, ambig;
|
---|
474 | OnigEncoding enc = reg->enc;
|
---|
475 | UChar *p, *prev;
|
---|
476 | StrNode* sn;
|
---|
477 |
|
---|
478 | sn = NSTR(node);
|
---|
479 | if (sn->end <= sn->s)
|
---|
480 | return 0;
|
---|
481 |
|
---|
482 | ambig = NSTRING_IS_AMBIG(node);
|
---|
483 |
|
---|
484 | p = prev = sn->s;
|
---|
485 | prev_len = enclen(enc, p);
|
---|
486 | p += prev_len;
|
---|
487 | blen = prev_len;
|
---|
488 | rlen = 0;
|
---|
489 |
|
---|
490 | for (; p < sn->end; ) {
|
---|
491 | len = enclen(enc, p);
|
---|
492 | if (len == prev_len || ambig) {
|
---|
493 | blen += len;
|
---|
494 | }
|
---|
495 | else {
|
---|
496 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
497 | rlen += r;
|
---|
498 | prev = p;
|
---|
499 | blen = len;
|
---|
500 | prev_len = len;
|
---|
501 | }
|
---|
502 | p += len;
|
---|
503 | }
|
---|
504 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
505 | rlen += r;
|
---|
506 | return rlen;
|
---|
507 | }
|
---|
508 |
|
---|
509 | static int
|
---|
510 | compile_length_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
511 | {
|
---|
512 | if (sn->end <= sn->s)
|
---|
513 | return 0;
|
---|
514 |
|
---|
515 | return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
516 | }
|
---|
517 |
|
---|
518 | static int
|
---|
519 | compile_string_node(Node* node, regex_t* reg)
|
---|
520 | {
|
---|
521 | int r, len, prev_len, blen, ambig;
|
---|
522 | OnigEncoding enc = reg->enc;
|
---|
523 | UChar *p, *prev, *end;
|
---|
524 | StrNode* sn;
|
---|
525 |
|
---|
526 | sn = NSTR(node);
|
---|
527 | if (sn->end <= sn->s)
|
---|
528 | return 0;
|
---|
529 |
|
---|
530 | end = sn->end;
|
---|
531 | ambig = NSTRING_IS_AMBIG(node);
|
---|
532 |
|
---|
533 | p = prev = sn->s;
|
---|
534 | prev_len = enclen(enc, p);
|
---|
535 | p += prev_len;
|
---|
536 | blen = prev_len;
|
---|
537 |
|
---|
538 | for (; p < end; ) {
|
---|
539 | len = enclen(enc, p);
|
---|
540 | if (len == prev_len || ambig) {
|
---|
541 | blen += len;
|
---|
542 | }
|
---|
543 | else {
|
---|
544 | r = add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
545 | if (r) return r;
|
---|
546 |
|
---|
547 | prev = p;
|
---|
548 | blen = len;
|
---|
549 | prev_len = len;
|
---|
550 | }
|
---|
551 |
|
---|
552 | p += len;
|
---|
553 | }
|
---|
554 | return add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
555 | }
|
---|
556 |
|
---|
557 | static int
|
---|
558 | compile_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
559 | {
|
---|
560 | if (sn->end <= sn->s)
|
---|
561 | return 0;
|
---|
562 |
|
---|
563 | return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
564 | }
|
---|
565 |
|
---|
566 | static int
|
---|
567 | add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
|
---|
568 | {
|
---|
569 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
570 | add_length(reg, mbuf->used);
|
---|
571 | return add_bytes(reg, mbuf->p, mbuf->used);
|
---|
572 | #else
|
---|
573 | int r, pad_size;
|
---|
574 | UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
|
---|
575 |
|
---|
576 | GET_ALIGNMENT_PAD_SIZE(p, pad_size);
|
---|
577 | add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
|
---|
578 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
579 |
|
---|
580 | r = add_bytes(reg, mbuf->p, mbuf->used);
|
---|
581 |
|
---|
582 | /* padding for return value from compile_length_cclass_node() to be fix. */
|
---|
583 | pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
|
---|
584 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
585 | return r;
|
---|
586 | #endif
|
---|
587 | }
|
---|
588 |
|
---|
589 | static int
|
---|
590 | compile_length_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
591 | {
|
---|
592 | int len;
|
---|
593 |
|
---|
594 | if (IS_NCCLASS_SHARE(cc)) {
|
---|
595 | len = SIZE_OPCODE + SIZE_POINTER;
|
---|
596 | return len;
|
---|
597 | }
|
---|
598 |
|
---|
599 | if (IS_NULL(cc->mbuf)) {
|
---|
600 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
601 | }
|
---|
602 | else {
|
---|
603 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
604 | len = SIZE_OPCODE;
|
---|
605 | }
|
---|
606 | else {
|
---|
607 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
608 | }
|
---|
609 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
610 | len += SIZE_LENGTH + cc->mbuf->used;
|
---|
611 | #else
|
---|
612 | len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
|
---|
613 | #endif
|
---|
614 | }
|
---|
615 |
|
---|
616 | return len;
|
---|
617 | }
|
---|
618 |
|
---|
619 | static int
|
---|
620 | compile_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
621 | {
|
---|
622 | int r;
|
---|
623 |
|
---|
624 | if (IS_NCCLASS_SHARE(cc)) {
|
---|
625 | add_opcode(reg, OP_CCLASS_NODE);
|
---|
626 | r = add_pointer(reg, cc);
|
---|
627 | return r;
|
---|
628 | }
|
---|
629 |
|
---|
630 | if (IS_NULL(cc->mbuf)) {
|
---|
631 | if (IS_NCCLASS_NOT(cc))
|
---|
632 | add_opcode(reg, OP_CCLASS_NOT);
|
---|
633 | else
|
---|
634 | add_opcode(reg, OP_CCLASS);
|
---|
635 |
|
---|
636 | r = add_bitset(reg, cc->bs);
|
---|
637 | }
|
---|
638 | else {
|
---|
639 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
640 | if (IS_NCCLASS_NOT(cc))
|
---|
641 | add_opcode(reg, OP_CCLASS_MB_NOT);
|
---|
642 | else
|
---|
643 | add_opcode(reg, OP_CCLASS_MB);
|
---|
644 |
|
---|
645 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
646 | }
|
---|
647 | else {
|
---|
648 | if (IS_NCCLASS_NOT(cc))
|
---|
649 | add_opcode(reg, OP_CCLASS_MIX_NOT);
|
---|
650 | else
|
---|
651 | add_opcode(reg, OP_CCLASS_MIX);
|
---|
652 |
|
---|
653 | r = add_bitset(reg, cc->bs);
|
---|
654 | if (r) return r;
|
---|
655 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
656 | }
|
---|
657 | }
|
---|
658 |
|
---|
659 | return r;
|
---|
660 | }
|
---|
661 |
|
---|
662 | static int
|
---|
663 | entry_repeat_range(regex_t* reg, int id, int lower, int upper)
|
---|
664 | {
|
---|
665 | #define REPEAT_RANGE_ALLOC 4
|
---|
666 |
|
---|
667 | OnigRepeatRange* p;
|
---|
668 |
|
---|
669 | if (reg->repeat_range_alloc == 0) {
|
---|
670 | p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
|
---|
671 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
672 | reg->repeat_range = p;
|
---|
673 | reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
|
---|
674 | }
|
---|
675 | else if (reg->repeat_range_alloc <= id) {
|
---|
676 | int n;
|
---|
677 | n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
|
---|
678 | p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
|
---|
679 | sizeof(OnigRepeatRange) * n);
|
---|
680 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
681 | reg->repeat_range = p;
|
---|
682 | reg->repeat_range_alloc = n;
|
---|
683 | }
|
---|
684 | else {
|
---|
685 | p = reg->repeat_range;
|
---|
686 | }
|
---|
687 |
|
---|
688 | p[id].lower = lower;
|
---|
689 | p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
|
---|
690 | return 0;
|
---|
691 | }
|
---|
692 |
|
---|
693 | static int
|
---|
694 | compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
|
---|
695 | regex_t* reg)
|
---|
696 | {
|
---|
697 | int r;
|
---|
698 | int num_repeat = reg->num_repeat;
|
---|
699 |
|
---|
700 | r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
|
---|
701 | if (r) return r;
|
---|
702 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
703 | reg->num_repeat++;
|
---|
704 | if (r) return r;
|
---|
705 | r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
|
---|
706 | if (r) return r;
|
---|
707 |
|
---|
708 | r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
|
---|
709 | if (r) return r;
|
---|
710 |
|
---|
711 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
712 | if (r) return r;
|
---|
713 |
|
---|
714 | if (
|
---|
715 | #ifdef USE_SUBEXP_CALL
|
---|
716 | reg->num_call > 0 ||
|
---|
717 | #endif
|
---|
718 | IS_QUANTIFIER_IN_REPEAT(qn)) {
|
---|
719 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
|
---|
720 | }
|
---|
721 | else {
|
---|
722 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
|
---|
723 | }
|
---|
724 | if (r) return r;
|
---|
725 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
726 | return r;
|
---|
727 | }
|
---|
728 |
|
---|
729 | static int
|
---|
730 | is_anychar_star_quantifier(QtfrNode* qn)
|
---|
731 | {
|
---|
732 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
---|
733 | NTYPE(qn->target) == NT_CANY)
|
---|
734 | return 1;
|
---|
735 | else
|
---|
736 | return 0;
|
---|
737 | }
|
---|
738 |
|
---|
739 | #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
|
---|
740 | #define CKN_ON (ckn > 0)
|
---|
741 |
|
---|
742 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
743 |
|
---|
744 | static int
|
---|
745 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
746 | {
|
---|
747 | int len, mod_tlen, cklen;
|
---|
748 | int ckn;
|
---|
749 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
750 | int empty_info = qn->target_empty_info;
|
---|
751 | int tlen = compile_length_tree(qn->target, reg);
|
---|
752 |
|
---|
753 | if (tlen < 0) return tlen;
|
---|
754 |
|
---|
755 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
756 |
|
---|
757 | cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
|
---|
758 |
|
---|
759 | /* anychar repeat */
|
---|
760 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
761 | if (qn->greedy && infinite) {
|
---|
762 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
|
---|
763 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
|
---|
764 | else
|
---|
765 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
|
---|
766 | }
|
---|
767 | }
|
---|
768 |
|
---|
769 | if (empty_info != 0)
|
---|
770 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
771 | else
|
---|
772 | mod_tlen = tlen;
|
---|
773 |
|
---|
774 | if (infinite && qn->lower <= 1) {
|
---|
775 | if (qn->greedy) {
|
---|
776 | if (qn->lower == 1)
|
---|
777 | len = SIZE_OP_JUMP;
|
---|
778 | else
|
---|
779 | len = 0;
|
---|
780 |
|
---|
781 | len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
|
---|
782 | }
|
---|
783 | else {
|
---|
784 | if (qn->lower == 0)
|
---|
785 | len = SIZE_OP_JUMP;
|
---|
786 | else
|
---|
787 | len = 0;
|
---|
788 |
|
---|
789 | len += mod_tlen + SIZE_OP_PUSH + cklen;
|
---|
790 | }
|
---|
791 | }
|
---|
792 | else if (qn->upper == 0) {
|
---|
793 | if (qn->is_refered != 0) /* /(?<n>..){0}/ */
|
---|
794 | len = SIZE_OP_JUMP + tlen;
|
---|
795 | else
|
---|
796 | len = 0;
|
---|
797 | }
|
---|
798 | else if (qn->upper == 1 && qn->greedy) {
|
---|
799 | if (qn->lower == 0) {
|
---|
800 | if (CKN_ON) {
|
---|
801 | len = SIZE_OP_STATE_CHECK_PUSH + tlen;
|
---|
802 | }
|
---|
803 | else {
|
---|
804 | len = SIZE_OP_PUSH + tlen;
|
---|
805 | }
|
---|
806 | }
|
---|
807 | else {
|
---|
808 | len = tlen;
|
---|
809 | }
|
---|
810 | }
|
---|
811 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
812 | len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
|
---|
813 | }
|
---|
814 | else {
|
---|
815 | len = SIZE_OP_REPEAT_INC
|
---|
816 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
817 | if (CKN_ON)
|
---|
818 | len += SIZE_OP_STATE_CHECK;
|
---|
819 | }
|
---|
820 |
|
---|
821 | return len;
|
---|
822 | }
|
---|
823 |
|
---|
824 | static int
|
---|
825 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
826 | {
|
---|
827 | int r, mod_tlen;
|
---|
828 | int ckn;
|
---|
829 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
830 | int empty_info = qn->target_empty_info;
|
---|
831 | int tlen = compile_length_tree(qn->target, reg);
|
---|
832 |
|
---|
833 | if (tlen < 0) return tlen;
|
---|
834 |
|
---|
835 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
836 |
|
---|
837 | if (is_anychar_star_quantifier(qn)) {
|
---|
838 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
839 | if (r) return r;
|
---|
840 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
|
---|
841 | if (IS_MULTILINE(reg->options))
|
---|
842 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
843 | else
|
---|
844 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
845 | if (r) return r;
|
---|
846 | if (CKN_ON) {
|
---|
847 | r = add_state_check_num(reg, ckn);
|
---|
848 | if (r) return r;
|
---|
849 | }
|
---|
850 |
|
---|
851 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
852 | }
|
---|
853 | else {
|
---|
854 | if (IS_MULTILINE(reg->options)) {
|
---|
855 | r = add_opcode(reg, (CKN_ON ?
|
---|
856 | OP_STATE_CHECK_ANYCHAR_ML_STAR
|
---|
857 | : OP_ANYCHAR_ML_STAR));
|
---|
858 | }
|
---|
859 | else {
|
---|
860 | r = add_opcode(reg, (CKN_ON ?
|
---|
861 | OP_STATE_CHECK_ANYCHAR_STAR
|
---|
862 | : OP_ANYCHAR_STAR));
|
---|
863 | }
|
---|
864 | if (r) return r;
|
---|
865 | if (CKN_ON)
|
---|
866 | r = add_state_check_num(reg, ckn);
|
---|
867 |
|
---|
868 | return r;
|
---|
869 | }
|
---|
870 | }
|
---|
871 |
|
---|
872 | if (empty_info != 0)
|
---|
873 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
874 | else
|
---|
875 | mod_tlen = tlen;
|
---|
876 |
|
---|
877 | if (infinite && qn->lower <= 1) {
|
---|
878 | if (qn->greedy) {
|
---|
879 | if (qn->lower == 1) {
|
---|
880 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
881 | (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
|
---|
882 | if (r) return r;
|
---|
883 | }
|
---|
884 |
|
---|
885 | if (CKN_ON) {
|
---|
886 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
887 | if (r) return r;
|
---|
888 | r = add_state_check_num(reg, ckn);
|
---|
889 | if (r) return r;
|
---|
890 | r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
|
---|
891 | }
|
---|
892 | else {
|
---|
893 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
894 | }
|
---|
895 | if (r) return r;
|
---|
896 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
897 | if (r) return r;
|
---|
898 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
899 | -(mod_tlen + (int )SIZE_OP_JUMP
|
---|
900 | + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
|
---|
901 | }
|
---|
902 | else {
|
---|
903 | if (qn->lower == 0) {
|
---|
904 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
905 | if (r) return r;
|
---|
906 | }
|
---|
907 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
908 | if (r) return r;
|
---|
909 | if (CKN_ON) {
|
---|
910 | r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
|
---|
911 | if (r) return r;
|
---|
912 | r = add_state_check_num(reg, ckn);
|
---|
913 | if (r) return r;
|
---|
914 | r = add_rel_addr(reg,
|
---|
915 | -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
|
---|
916 | }
|
---|
917 | else
|
---|
918 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
919 | }
|
---|
920 | }
|
---|
921 | else if (qn->upper == 0) {
|
---|
922 | if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
923 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
924 | if (r) return r;
|
---|
925 | r = compile_tree(qn->target, reg);
|
---|
926 | }
|
---|
927 | else
|
---|
928 | r = 0;
|
---|
929 | }
|
---|
930 | else if (qn->upper == 1 && qn->greedy) {
|
---|
931 | if (qn->lower == 0) {
|
---|
932 | if (CKN_ON) {
|
---|
933 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
934 | if (r) return r;
|
---|
935 | r = add_state_check_num(reg, ckn);
|
---|
936 | if (r) return r;
|
---|
937 | r = add_rel_addr(reg, tlen);
|
---|
938 | }
|
---|
939 | else {
|
---|
940 | r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
|
---|
941 | }
|
---|
942 | if (r) return r;
|
---|
943 | }
|
---|
944 |
|
---|
945 | r = compile_tree(qn->target, reg);
|
---|
946 | }
|
---|
947 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
948 | if (CKN_ON) {
|
---|
949 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
950 | if (r) return r;
|
---|
951 | r = add_state_check_num(reg, ckn);
|
---|
952 | if (r) return r;
|
---|
953 | r = add_rel_addr(reg, SIZE_OP_JUMP);
|
---|
954 | }
|
---|
955 | else {
|
---|
956 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
957 | }
|
---|
958 |
|
---|
959 | if (r) return r;
|
---|
960 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
961 | if (r) return r;
|
---|
962 | r = compile_tree(qn->target, reg);
|
---|
963 | }
|
---|
964 | else {
|
---|
965 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
966 | if (CKN_ON) {
|
---|
967 | if (r) return r;
|
---|
968 | r = add_opcode(reg, OP_STATE_CHECK);
|
---|
969 | if (r) return r;
|
---|
970 | r = add_state_check_num(reg, ckn);
|
---|
971 | }
|
---|
972 | }
|
---|
973 | return r;
|
---|
974 | }
|
---|
975 |
|
---|
976 | #else /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
977 |
|
---|
978 | static int
|
---|
979 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
980 | {
|
---|
981 | int len, mod_tlen;
|
---|
982 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
983 | int empty_info = qn->target_empty_info;
|
---|
984 | int tlen = compile_length_tree(qn->target, reg);
|
---|
985 |
|
---|
986 | if (tlen < 0) return tlen;
|
---|
987 |
|
---|
988 | /* anychar repeat */
|
---|
989 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
990 | if (qn->greedy && infinite) {
|
---|
991 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
992 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
|
---|
993 | else
|
---|
994 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
|
---|
995 | }
|
---|
996 | }
|
---|
997 |
|
---|
998 | if (empty_info != 0)
|
---|
999 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
1000 | else
|
---|
1001 | mod_tlen = tlen;
|
---|
1002 |
|
---|
1003 | if (infinite &&
|
---|
1004 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
1005 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
1006 | len = SIZE_OP_JUMP;
|
---|
1007 | }
|
---|
1008 | else {
|
---|
1009 | len = tlen * qn->lower;
|
---|
1010 | }
|
---|
1011 |
|
---|
1012 | if (qn->greedy) {
|
---|
1013 | if (IS_NOT_NULL(qn->head_exact))
|
---|
1014 | len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
|
---|
1015 | else if (IS_NOT_NULL(qn->next_head_exact))
|
---|
1016 | len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
|
---|
1017 | else
|
---|
1018 | len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
|
---|
1019 | }
|
---|
1020 | else
|
---|
1021 | len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
|
---|
1022 | }
|
---|
1023 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
1024 | len = SIZE_OP_JUMP + tlen;
|
---|
1025 | }
|
---|
1026 | else if (!infinite && qn->greedy &&
|
---|
1027 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
1028 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
1029 | len = tlen * qn->lower;
|
---|
1030 | len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
|
---|
1031 | }
|
---|
1032 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
1033 | len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
|
---|
1034 | }
|
---|
1035 | else {
|
---|
1036 | len = SIZE_OP_REPEAT_INC
|
---|
1037 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
1038 | }
|
---|
1039 |
|
---|
1040 | return len;
|
---|
1041 | }
|
---|
1042 |
|
---|
1043 | static int
|
---|
1044 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
1045 | {
|
---|
1046 | int i, r, mod_tlen;
|
---|
1047 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
1048 | int empty_info = qn->target_empty_info;
|
---|
1049 | int tlen = compile_length_tree(qn->target, reg);
|
---|
1050 |
|
---|
1051 | if (tlen < 0) return tlen;
|
---|
1052 |
|
---|
1053 | if (is_anychar_star_quantifier(qn)) {
|
---|
1054 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1055 | if (r) return r;
|
---|
1056 | if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
1057 | if (IS_MULTILINE(reg->options))
|
---|
1058 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
1059 | else
|
---|
1060 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
1061 | if (r) return r;
|
---|
1062 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
1063 | }
|
---|
1064 | else {
|
---|
1065 | if (IS_MULTILINE(reg->options))
|
---|
1066 | return add_opcode(reg, OP_ANYCHAR_ML_STAR);
|
---|
1067 | else
|
---|
1068 | return add_opcode(reg, OP_ANYCHAR_STAR);
|
---|
1069 | }
|
---|
1070 | }
|
---|
1071 |
|
---|
1072 | if (empty_info != 0)
|
---|
1073 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
1074 | else
|
---|
1075 | mod_tlen = tlen;
|
---|
1076 |
|
---|
1077 | if (infinite &&
|
---|
1078 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
1079 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
1080 | if (qn->greedy) {
|
---|
1081 | if (IS_NOT_NULL(qn->head_exact))
|
---|
1082 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
|
---|
1083 | else if (IS_NOT_NULL(qn->next_head_exact))
|
---|
1084 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
|
---|
1085 | else
|
---|
1086 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
|
---|
1087 | }
|
---|
1088 | else {
|
---|
1089 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
|
---|
1090 | }
|
---|
1091 | if (r) return r;
|
---|
1092 | }
|
---|
1093 | else {
|
---|
1094 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1095 | if (r) return r;
|
---|
1096 | }
|
---|
1097 |
|
---|
1098 | if (qn->greedy) {
|
---|
1099 | if (IS_NOT_NULL(qn->head_exact)) {
|
---|
1100 | r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
|
---|
1101 | mod_tlen + SIZE_OP_JUMP);
|
---|
1102 | if (r) return r;
|
---|
1103 | add_bytes(reg, NSTR(qn->head_exact)->s, 1);
|
---|
1104 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1105 | if (r) return r;
|
---|
1106 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1107 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
|
---|
1108 | }
|
---|
1109 | else if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
1110 | r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
|
---|
1111 | mod_tlen + SIZE_OP_JUMP);
|
---|
1112 | if (r) return r;
|
---|
1113 | add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
1114 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1115 | if (r) return r;
|
---|
1116 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1117 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
|
---|
1118 | }
|
---|
1119 | else {
|
---|
1120 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
1121 | if (r) return r;
|
---|
1122 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1123 | if (r) return r;
|
---|
1124 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1125 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
|
---|
1126 | }
|
---|
1127 | }
|
---|
1128 | else {
|
---|
1129 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
1130 | if (r) return r;
|
---|
1131 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
1132 | if (r) return r;
|
---|
1133 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
1134 | }
|
---|
1135 | }
|
---|
1136 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
1137 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
1138 | if (r) return r;
|
---|
1139 | r = compile_tree(qn->target, reg);
|
---|
1140 | }
|
---|
1141 | else if (!infinite && qn->greedy &&
|
---|
1142 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
1143 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
1144 | int n = qn->upper - qn->lower;
|
---|
1145 |
|
---|
1146 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1147 | if (r) return r;
|
---|
1148 |
|
---|
1149 | for (i = 0; i < n; i++) {
|
---|
1150 | r = add_opcode_rel_addr(reg, OP_PUSH,
|
---|
1151 | (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
|
---|
1152 | if (r) return r;
|
---|
1153 | r = compile_tree(qn->target, reg);
|
---|
1154 | if (r) return r;
|
---|
1155 | }
|
---|
1156 | }
|
---|
1157 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
1158 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
1159 | if (r) return r;
|
---|
1160 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
1161 | if (r) return r;
|
---|
1162 | r = compile_tree(qn->target, reg);
|
---|
1163 | }
|
---|
1164 | else {
|
---|
1165 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
1166 | }
|
---|
1167 | return r;
|
---|
1168 | }
|
---|
1169 | #endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
1170 |
|
---|
1171 | static int
|
---|
1172 | compile_length_option_node(EncloseNode* node, regex_t* reg)
|
---|
1173 | {
|
---|
1174 | int tlen;
|
---|
1175 | OnigOptionType prev = reg->options;
|
---|
1176 |
|
---|
1177 | reg->options = node->option;
|
---|
1178 | tlen = compile_length_tree(node->target, reg);
|
---|
1179 | reg->options = prev;
|
---|
1180 |
|
---|
1181 | if (tlen < 0) return tlen;
|
---|
1182 |
|
---|
1183 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
1184 | return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
|
---|
1185 | + tlen + SIZE_OP_SET_OPTION;
|
---|
1186 | }
|
---|
1187 | else
|
---|
1188 | return tlen;
|
---|
1189 | }
|
---|
1190 |
|
---|
1191 | static int
|
---|
1192 | compile_option_node(EncloseNode* node, regex_t* reg)
|
---|
1193 | {
|
---|
1194 | int r;
|
---|
1195 | OnigOptionType prev = reg->options;
|
---|
1196 |
|
---|
1197 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
1198 | r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
|
---|
1199 | if (r) return r;
|
---|
1200 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
1201 | if (r) return r;
|
---|
1202 | r = add_opcode(reg, OP_FAIL);
|
---|
1203 | if (r) return r;
|
---|
1204 | }
|
---|
1205 |
|
---|
1206 | reg->options = node->option;
|
---|
1207 | r = compile_tree(node->target, reg);
|
---|
1208 | reg->options = prev;
|
---|
1209 |
|
---|
1210 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
1211 | if (r) return r;
|
---|
1212 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
1213 | }
|
---|
1214 | return r;
|
---|
1215 | }
|
---|
1216 |
|
---|
1217 | static int
|
---|
1218 | compile_length_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
1219 | {
|
---|
1220 | int len;
|
---|
1221 | int tlen;
|
---|
1222 |
|
---|
1223 | if (node->type == ENCLOSE_OPTION)
|
---|
1224 | return compile_length_option_node(node, reg);
|
---|
1225 |
|
---|
1226 | if (node->target) {
|
---|
1227 | tlen = compile_length_tree(node->target, reg);
|
---|
1228 | if (tlen < 0) return tlen;
|
---|
1229 | }
|
---|
1230 | else
|
---|
1231 | tlen = 0;
|
---|
1232 |
|
---|
1233 | switch (node->type) {
|
---|
1234 | case ENCLOSE_MEMORY:
|
---|
1235 | #ifdef USE_SUBEXP_CALL
|
---|
1236 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
1237 | len = SIZE_OP_MEMORY_START_PUSH + tlen
|
---|
1238 | + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
|
---|
1239 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1240 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1241 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
1242 | else
|
---|
1243 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1244 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
1245 | }
|
---|
1246 | else
|
---|
1247 | #endif
|
---|
1248 | {
|
---|
1249 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
1250 | len = SIZE_OP_MEMORY_START_PUSH;
|
---|
1251 | else
|
---|
1252 | len = SIZE_OP_MEMORY_START;
|
---|
1253 |
|
---|
1254 | len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
|
---|
1255 | ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
|
---|
1256 | }
|
---|
1257 | break;
|
---|
1258 |
|
---|
1259 | case ENCLOSE_STOP_BACKTRACK:
|
---|
1260 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
1261 | QtfrNode* qn = NQTFR(node->target);
|
---|
1262 | tlen = compile_length_tree(qn->target, reg);
|
---|
1263 | if (tlen < 0) return tlen;
|
---|
1264 |
|
---|
1265 | len = tlen * qn->lower
|
---|
1266 | + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
|
---|
1267 | }
|
---|
1268 | else {
|
---|
1269 | len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
|
---|
1270 | }
|
---|
1271 | break;
|
---|
1272 |
|
---|
1273 | case ENCLOSE_CONDITION:
|
---|
1274 | len = SIZE_OP_CONDITION;
|
---|
1275 | if (NTYPE(node->target) == NT_ALT) {
|
---|
1276 | Node* x = node->target;
|
---|
1277 |
|
---|
1278 | tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
1279 | if (tlen < 0) return tlen;
|
---|
1280 | len += tlen + SIZE_OP_JUMP;
|
---|
1281 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
1282 | x = NCDR(x);
|
---|
1283 | tlen = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
1284 | if (tlen < 0) return tlen;
|
---|
1285 | len += tlen;
|
---|
1286 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
1287 | }
|
---|
1288 | else {
|
---|
1289 | return ONIGERR_PARSER_BUG;
|
---|
1290 | }
|
---|
1291 | break;
|
---|
1292 |
|
---|
1293 | default:
|
---|
1294 | return ONIGERR_TYPE_BUG;
|
---|
1295 | break;
|
---|
1296 | }
|
---|
1297 |
|
---|
1298 | return len;
|
---|
1299 | }
|
---|
1300 |
|
---|
1301 | static int get_char_length_tree(Node* node, regex_t* reg, int* len);
|
---|
1302 |
|
---|
1303 | static int
|
---|
1304 | compile_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
1305 | {
|
---|
1306 | int r, len;
|
---|
1307 |
|
---|
1308 | if (node->type == ENCLOSE_OPTION)
|
---|
1309 | return compile_option_node(node, reg);
|
---|
1310 |
|
---|
1311 | switch (node->type) {
|
---|
1312 | case ENCLOSE_MEMORY:
|
---|
1313 | #ifdef USE_SUBEXP_CALL
|
---|
1314 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
1315 | r = add_opcode(reg, OP_CALL);
|
---|
1316 | if (r) return r;
|
---|
1317 | node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
|
---|
1318 | node->state |= NST_ADDR_FIXED;
|
---|
1319 | r = add_abs_addr(reg, (int )node->call_addr);
|
---|
1320 | if (r) return r;
|
---|
1321 | len = compile_length_tree(node->target, reg);
|
---|
1322 | len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
|
---|
1323 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1324 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1325 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
1326 | else
|
---|
1327 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
1328 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
1329 |
|
---|
1330 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
1331 | if (r) return r;
|
---|
1332 | }
|
---|
1333 | #endif
|
---|
1334 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
1335 | r = add_opcode(reg, OP_MEMORY_START_PUSH);
|
---|
1336 | else
|
---|
1337 | r = add_opcode(reg, OP_MEMORY_START);
|
---|
1338 | if (r) return r;
|
---|
1339 | r = add_mem_num(reg, node->regnum);
|
---|
1340 | if (r) return r;
|
---|
1341 | r = compile_tree(node->target, reg);
|
---|
1342 | if (r) return r;
|
---|
1343 | #ifdef USE_SUBEXP_CALL
|
---|
1344 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
1345 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1346 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
1347 | ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
|
---|
1348 | else
|
---|
1349 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
1350 | ? OP_MEMORY_END_REC : OP_MEMORY_END));
|
---|
1351 |
|
---|
1352 | if (r) return r;
|
---|
1353 | r = add_mem_num(reg, node->regnum);
|
---|
1354 | if (r) return r;
|
---|
1355 | r = add_opcode(reg, OP_RETURN);
|
---|
1356 | }
|
---|
1357 | else
|
---|
1358 | #endif
|
---|
1359 | {
|
---|
1360 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
1361 | r = add_opcode(reg, OP_MEMORY_END_PUSH);
|
---|
1362 | else
|
---|
1363 | r = add_opcode(reg, OP_MEMORY_END);
|
---|
1364 | if (r) return r;
|
---|
1365 | r = add_mem_num(reg, node->regnum);
|
---|
1366 | }
|
---|
1367 | break;
|
---|
1368 |
|
---|
1369 | case ENCLOSE_STOP_BACKTRACK:
|
---|
1370 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
1371 | QtfrNode* qn = NQTFR(node->target);
|
---|
1372 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
1373 | if (r) return r;
|
---|
1374 |
|
---|
1375 | len = compile_length_tree(qn->target, reg);
|
---|
1376 | if (len < 0) return len;
|
---|
1377 |
|
---|
1378 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
|
---|
1379 | if (r) return r;
|
---|
1380 | r = compile_tree(qn->target, reg);
|
---|
1381 | if (r) return r;
|
---|
1382 | r = add_opcode(reg, OP_POP);
|
---|
1383 | if (r) return r;
|
---|
1384 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
1385 | -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
|
---|
1386 | }
|
---|
1387 | else {
|
---|
1388 | r = add_opcode(reg, OP_PUSH_STOP_BT);
|
---|
1389 | if (r) return r;
|
---|
1390 | r = compile_tree(node->target, reg);
|
---|
1391 | if (r) return r;
|
---|
1392 | r = add_opcode(reg, OP_POP_STOP_BT);
|
---|
1393 | }
|
---|
1394 | break;
|
---|
1395 |
|
---|
1396 | case ENCLOSE_CONDITION:
|
---|
1397 | r = add_opcode(reg, OP_CONDITION);
|
---|
1398 | if (r) return r;
|
---|
1399 | r = add_mem_num(reg, node->regnum);
|
---|
1400 | if (r) return r;
|
---|
1401 |
|
---|
1402 | if (NTYPE(node->target) == NT_ALT) {
|
---|
1403 | Node* x = node->target;
|
---|
1404 | int len2;
|
---|
1405 |
|
---|
1406 | len = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
1407 | if (len < 0) return len;
|
---|
1408 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
1409 | x = NCDR(x);
|
---|
1410 | len2 = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
1411 | if (len2 < 0) return len2;
|
---|
1412 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
1413 |
|
---|
1414 | x = node->target;
|
---|
1415 | r = add_rel_addr(reg, len + SIZE_OP_JUMP);
|
---|
1416 | if (r) return r;
|
---|
1417 | r = compile_tree(NCAR(x), reg); /* yes-node */
|
---|
1418 | if (r) return r;
|
---|
1419 | r = add_opcode_rel_addr(reg, OP_JUMP, len2);
|
---|
1420 | if (r) return r;
|
---|
1421 | x = NCDR(x);
|
---|
1422 | r = compile_tree(NCAR(x), reg); /* no-node */
|
---|
1423 | }
|
---|
1424 | else {
|
---|
1425 | return ONIGERR_PARSER_BUG;
|
---|
1426 | }
|
---|
1427 | break;
|
---|
1428 |
|
---|
1429 | default:
|
---|
1430 | return ONIGERR_TYPE_BUG;
|
---|
1431 | break;
|
---|
1432 | }
|
---|
1433 |
|
---|
1434 | return r;
|
---|
1435 | }
|
---|
1436 |
|
---|
1437 | static int
|
---|
1438 | compile_length_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
1439 | {
|
---|
1440 | int len;
|
---|
1441 | int tlen = 0;
|
---|
1442 |
|
---|
1443 | if (node->target) {
|
---|
1444 | tlen = compile_length_tree(node->target, reg);
|
---|
1445 | if (tlen < 0) return tlen;
|
---|
1446 | }
|
---|
1447 |
|
---|
1448 | switch (node->type) {
|
---|
1449 | case ANCHOR_PREC_READ:
|
---|
1450 | len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
|
---|
1451 | break;
|
---|
1452 | case ANCHOR_PREC_READ_NOT:
|
---|
1453 | len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
|
---|
1454 | break;
|
---|
1455 | case ANCHOR_LOOK_BEHIND:
|
---|
1456 | len = SIZE_OP_LOOK_BEHIND + tlen;
|
---|
1457 | break;
|
---|
1458 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
1459 | len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
|
---|
1460 | break;
|
---|
1461 |
|
---|
1462 | default:
|
---|
1463 | len = SIZE_OPCODE;
|
---|
1464 | break;
|
---|
1465 | }
|
---|
1466 |
|
---|
1467 | return len;
|
---|
1468 | }
|
---|
1469 |
|
---|
1470 | static int
|
---|
1471 | compile_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
1472 | {
|
---|
1473 | int r, len;
|
---|
1474 |
|
---|
1475 | switch (node->type) {
|
---|
1476 | case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
|
---|
1477 | case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
|
---|
1478 | case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
|
---|
1479 | case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
|
---|
1480 | case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
|
---|
1481 | case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
|
---|
1482 |
|
---|
1483 | /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
|
---|
1484 | case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
|
---|
1485 |
|
---|
1486 | case ANCHOR_WORD_BOUND:
|
---|
1487 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
|
---|
1488 | else r = add_opcode(reg, OP_WORD_BOUND);
|
---|
1489 | break;
|
---|
1490 | case ANCHOR_NOT_WORD_BOUND:
|
---|
1491 | if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
|
---|
1492 | else r = add_opcode(reg, OP_NOT_WORD_BOUND);
|
---|
1493 | break;
|
---|
1494 | #ifdef USE_WORD_BEGIN_END
|
---|
1495 | case ANCHOR_WORD_BEGIN:
|
---|
1496 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
|
---|
1497 | else r = add_opcode(reg, OP_WORD_BEGIN);
|
---|
1498 | break;
|
---|
1499 | case ANCHOR_WORD_END:
|
---|
1500 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
|
---|
1501 | else r = add_opcode(reg, OP_WORD_END);
|
---|
1502 | break;
|
---|
1503 | #endif
|
---|
1504 | case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
|
---|
1505 |
|
---|
1506 | case ANCHOR_PREC_READ:
|
---|
1507 | r = add_opcode(reg, OP_PUSH_POS);
|
---|
1508 | if (r) return r;
|
---|
1509 | r = compile_tree(node->target, reg);
|
---|
1510 | if (r) return r;
|
---|
1511 | r = add_opcode(reg, OP_POP_POS);
|
---|
1512 | break;
|
---|
1513 |
|
---|
1514 | case ANCHOR_PREC_READ_NOT:
|
---|
1515 | len = compile_length_tree(node->target, reg);
|
---|
1516 | if (len < 0) return len;
|
---|
1517 | r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
|
---|
1518 | if (r) return r;
|
---|
1519 | r = compile_tree(node->target, reg);
|
---|
1520 | if (r) return r;
|
---|
1521 | r = add_opcode(reg, OP_FAIL_POS);
|
---|
1522 | break;
|
---|
1523 |
|
---|
1524 | case ANCHOR_LOOK_BEHIND:
|
---|
1525 | {
|
---|
1526 | int n;
|
---|
1527 | r = add_opcode(reg, OP_LOOK_BEHIND);
|
---|
1528 | if (r) return r;
|
---|
1529 | if (node->char_len < 0) {
|
---|
1530 | r = get_char_length_tree(node->target, reg, &n);
|
---|
1531 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
1532 | }
|
---|
1533 | else
|
---|
1534 | n = node->char_len;
|
---|
1535 | r = add_length(reg, n);
|
---|
1536 | if (r) return r;
|
---|
1537 | r = compile_tree(node->target, reg);
|
---|
1538 | }
|
---|
1539 | break;
|
---|
1540 |
|
---|
1541 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
1542 | {
|
---|
1543 | int n;
|
---|
1544 | len = compile_length_tree(node->target, reg);
|
---|
1545 | r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
|
---|
1546 | len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
|
---|
1547 | if (r) return r;
|
---|
1548 | if (node->char_len < 0) {
|
---|
1549 | r = get_char_length_tree(node->target, reg, &n);
|
---|
1550 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
1551 | }
|
---|
1552 | else
|
---|
1553 | n = node->char_len;
|
---|
1554 | r = add_length(reg, n);
|
---|
1555 | if (r) return r;
|
---|
1556 | r = compile_tree(node->target, reg);
|
---|
1557 | if (r) return r;
|
---|
1558 | r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
|
---|
1559 | }
|
---|
1560 | break;
|
---|
1561 |
|
---|
1562 | default:
|
---|
1563 | return ONIGERR_TYPE_BUG;
|
---|
1564 | break;
|
---|
1565 | }
|
---|
1566 |
|
---|
1567 | return r;
|
---|
1568 | }
|
---|
1569 |
|
---|
1570 | static int
|
---|
1571 | compile_length_tree(Node* node, regex_t* reg)
|
---|
1572 | {
|
---|
1573 | int len, type, r;
|
---|
1574 |
|
---|
1575 | type = NTYPE(node);
|
---|
1576 | switch (type) {
|
---|
1577 | case NT_LIST:
|
---|
1578 | len = 0;
|
---|
1579 | do {
|
---|
1580 | r = compile_length_tree(NCAR(node), reg);
|
---|
1581 | if (r < 0) return r;
|
---|
1582 | len += r;
|
---|
1583 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
1584 | r = len;
|
---|
1585 | break;
|
---|
1586 |
|
---|
1587 | case NT_ALT:
|
---|
1588 | {
|
---|
1589 | int n = 0;
|
---|
1590 | len = 0;
|
---|
1591 | do {
|
---|
1592 | r = compile_length_tree(NCAR(node), reg);
|
---|
1593 | if (r < 0) return r;
|
---|
1594 | len += r;
|
---|
1595 | n++;
|
---|
1596 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
1597 | r = len;
|
---|
1598 | r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
|
---|
1599 | }
|
---|
1600 | break;
|
---|
1601 |
|
---|
1602 | case NT_STR:
|
---|
1603 | if (NSTRING_IS_RAW(node))
|
---|
1604 | r = compile_length_string_raw_node(NSTR(node), reg);
|
---|
1605 | else
|
---|
1606 | r = compile_length_string_node(node, reg);
|
---|
1607 | break;
|
---|
1608 |
|
---|
1609 | case NT_CCLASS:
|
---|
1610 | r = compile_length_cclass_node(NCCLASS(node), reg);
|
---|
1611 | break;
|
---|
1612 |
|
---|
1613 | case NT_CTYPE:
|
---|
1614 | case NT_CANY:
|
---|
1615 | r = SIZE_OPCODE;
|
---|
1616 | break;
|
---|
1617 |
|
---|
1618 | case NT_BREF:
|
---|
1619 | {
|
---|
1620 | BRefNode* br = NBREF(node);
|
---|
1621 |
|
---|
1622 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
1623 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
1624 | r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
|
---|
1625 | SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
1626 | }
|
---|
1627 | else
|
---|
1628 | #endif
|
---|
1629 | if (br->back_num == 1) {
|
---|
1630 | r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
|
---|
1631 | ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
|
---|
1632 | }
|
---|
1633 | else {
|
---|
1634 | r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
1635 | }
|
---|
1636 | }
|
---|
1637 | break;
|
---|
1638 |
|
---|
1639 | #ifdef USE_SUBEXP_CALL
|
---|
1640 | case NT_CALL:
|
---|
1641 | r = SIZE_OP_CALL;
|
---|
1642 | break;
|
---|
1643 | #endif
|
---|
1644 |
|
---|
1645 | case NT_QTFR:
|
---|
1646 | r = compile_length_quantifier_node(NQTFR(node), reg);
|
---|
1647 | break;
|
---|
1648 |
|
---|
1649 | case NT_ENCLOSE:
|
---|
1650 | r = compile_length_enclose_node(NENCLOSE(node), reg);
|
---|
1651 | break;
|
---|
1652 |
|
---|
1653 | case NT_ANCHOR:
|
---|
1654 | r = compile_length_anchor_node(NANCHOR(node), reg);
|
---|
1655 | break;
|
---|
1656 |
|
---|
1657 | default:
|
---|
1658 | return ONIGERR_TYPE_BUG;
|
---|
1659 | break;
|
---|
1660 | }
|
---|
1661 |
|
---|
1662 | return r;
|
---|
1663 | }
|
---|
1664 |
|
---|
1665 | static int
|
---|
1666 | compile_tree(Node* node, regex_t* reg)
|
---|
1667 | {
|
---|
1668 | int n, type, len, pos, r = 0;
|
---|
1669 |
|
---|
1670 | type = NTYPE(node);
|
---|
1671 | switch (type) {
|
---|
1672 | case NT_LIST:
|
---|
1673 | do {
|
---|
1674 | r = compile_tree(NCAR(node), reg);
|
---|
1675 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1676 | break;
|
---|
1677 |
|
---|
1678 | case NT_ALT:
|
---|
1679 | {
|
---|
1680 | Node* x = node;
|
---|
1681 | len = 0;
|
---|
1682 | do {
|
---|
1683 | len += compile_length_tree(NCAR(x), reg);
|
---|
1684 | if (NCDR(x) != NULL) {
|
---|
1685 | len += SIZE_OP_PUSH + SIZE_OP_JUMP;
|
---|
1686 | }
|
---|
1687 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
1688 | pos = reg->used + len; /* goal position */
|
---|
1689 |
|
---|
1690 | do {
|
---|
1691 | len = compile_length_tree(NCAR(node), reg);
|
---|
1692 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
1693 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
|
---|
1694 | if (r) break;
|
---|
1695 | }
|
---|
1696 | r = compile_tree(NCAR(node), reg);
|
---|
1697 | if (r) break;
|
---|
1698 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
1699 | len = pos - (reg->used + SIZE_OP_JUMP);
|
---|
1700 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
1701 | if (r) break;
|
---|
1702 | }
|
---|
1703 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
1704 | }
|
---|
1705 | break;
|
---|
1706 |
|
---|
1707 | case NT_STR:
|
---|
1708 | if (NSTRING_IS_RAW(node))
|
---|
1709 | r = compile_string_raw_node(NSTR(node), reg);
|
---|
1710 | else
|
---|
1711 | r = compile_string_node(node, reg);
|
---|
1712 | break;
|
---|
1713 |
|
---|
1714 | case NT_CCLASS:
|
---|
1715 | r = compile_cclass_node(NCCLASS(node), reg);
|
---|
1716 | break;
|
---|
1717 |
|
---|
1718 | case NT_CTYPE:
|
---|
1719 | {
|
---|
1720 | int op;
|
---|
1721 |
|
---|
1722 | switch (NCTYPE(node)->ctype) {
|
---|
1723 | case ONIGENC_CTYPE_WORD:
|
---|
1724 | if (NCTYPE(node)->ascii_range != 0) {
|
---|
1725 | if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
|
---|
1726 | else op = OP_ASCII_WORD;
|
---|
1727 | }
|
---|
1728 | else {
|
---|
1729 | if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
|
---|
1730 | else op = OP_WORD;
|
---|
1731 | }
|
---|
1732 | break;
|
---|
1733 | default:
|
---|
1734 | return ONIGERR_TYPE_BUG;
|
---|
1735 | break;
|
---|
1736 | }
|
---|
1737 | r = add_opcode(reg, op);
|
---|
1738 | }
|
---|
1739 | break;
|
---|
1740 |
|
---|
1741 | case NT_CANY:
|
---|
1742 | if (IS_MULTILINE(reg->options))
|
---|
1743 | r = add_opcode(reg, OP_ANYCHAR_ML);
|
---|
1744 | else
|
---|
1745 | r = add_opcode(reg, OP_ANYCHAR);
|
---|
1746 | break;
|
---|
1747 |
|
---|
1748 | case NT_BREF:
|
---|
1749 | {
|
---|
1750 | BRefNode* br = NBREF(node);
|
---|
1751 |
|
---|
1752 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
1753 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
1754 | r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
|
---|
1755 | if (r) return r;
|
---|
1756 | r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
|
---|
1757 | if (r) return r;
|
---|
1758 | r = add_length(reg, br->nest_level);
|
---|
1759 | if (r) return r;
|
---|
1760 |
|
---|
1761 | goto add_bacref_mems;
|
---|
1762 | }
|
---|
1763 | else
|
---|
1764 | #endif
|
---|
1765 | if (br->back_num == 1) {
|
---|
1766 | n = br->back_static[0];
|
---|
1767 | if (IS_IGNORECASE(reg->options)) {
|
---|
1768 | r = add_opcode(reg, OP_BACKREFN_IC);
|
---|
1769 | if (r) return r;
|
---|
1770 | r = add_mem_num(reg, n);
|
---|
1771 | }
|
---|
1772 | else {
|
---|
1773 | switch (n) {
|
---|
1774 | case 1: r = add_opcode(reg, OP_BACKREF1); break;
|
---|
1775 | case 2: r = add_opcode(reg, OP_BACKREF2); break;
|
---|
1776 | default:
|
---|
1777 | r = add_opcode(reg, OP_BACKREFN);
|
---|
1778 | if (r) return r;
|
---|
1779 | r = add_mem_num(reg, n);
|
---|
1780 | break;
|
---|
1781 | }
|
---|
1782 | }
|
---|
1783 | }
|
---|
1784 | else {
|
---|
1785 | int i;
|
---|
1786 | int* p;
|
---|
1787 |
|
---|
1788 | if (IS_IGNORECASE(reg->options)) {
|
---|
1789 | r = add_opcode(reg, OP_BACKREF_MULTI_IC);
|
---|
1790 | }
|
---|
1791 | else {
|
---|
1792 | r = add_opcode(reg, OP_BACKREF_MULTI);
|
---|
1793 | }
|
---|
1794 | if (r) return r;
|
---|
1795 |
|
---|
1796 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
1797 | add_bacref_mems:
|
---|
1798 | #endif
|
---|
1799 | r = add_length(reg, br->back_num);
|
---|
1800 | if (r) return r;
|
---|
1801 | p = BACKREFS_P(br);
|
---|
1802 | for (i = br->back_num - 1; i >= 0; i--) {
|
---|
1803 | r = add_mem_num(reg, p[i]);
|
---|
1804 | if (r) return r;
|
---|
1805 | }
|
---|
1806 | }
|
---|
1807 | }
|
---|
1808 | break;
|
---|
1809 |
|
---|
1810 | #ifdef USE_SUBEXP_CALL
|
---|
1811 | case NT_CALL:
|
---|
1812 | r = compile_call(NCALL(node), reg);
|
---|
1813 | break;
|
---|
1814 | #endif
|
---|
1815 |
|
---|
1816 | case NT_QTFR:
|
---|
1817 | r = compile_quantifier_node(NQTFR(node), reg);
|
---|
1818 | break;
|
---|
1819 |
|
---|
1820 | case NT_ENCLOSE:
|
---|
1821 | r = compile_enclose_node(NENCLOSE(node), reg);
|
---|
1822 | break;
|
---|
1823 |
|
---|
1824 | case NT_ANCHOR:
|
---|
1825 | r = compile_anchor_node(NANCHOR(node), reg);
|
---|
1826 | break;
|
---|
1827 |
|
---|
1828 | default:
|
---|
1829 | #ifdef ONIG_DEBUG
|
---|
1830 | fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
|
---|
1831 | #endif
|
---|
1832 | break;
|
---|
1833 | }
|
---|
1834 |
|
---|
1835 | return r;
|
---|
1836 | }
|
---|
1837 |
|
---|
1838 | #ifdef USE_NAMED_GROUP
|
---|
1839 |
|
---|
1840 | static int
|
---|
1841 | noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
|
---|
1842 | {
|
---|
1843 | int r = 0;
|
---|
1844 | Node* node = *plink;
|
---|
1845 |
|
---|
1846 | switch (NTYPE(node)) {
|
---|
1847 | case NT_LIST:
|
---|
1848 | case NT_ALT:
|
---|
1849 | do {
|
---|
1850 | r = noname_disable_map(&(NCAR(node)), map, counter);
|
---|
1851 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1852 | break;
|
---|
1853 |
|
---|
1854 | case NT_QTFR:
|
---|
1855 | {
|
---|
1856 | Node** ptarget = &(NQTFR(node)->target);
|
---|
1857 | Node* old = *ptarget;
|
---|
1858 | r = noname_disable_map(ptarget, map, counter);
|
---|
1859 | if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
|
---|
1860 | onig_reduce_nested_quantifier(node, *ptarget);
|
---|
1861 | }
|
---|
1862 | }
|
---|
1863 | break;
|
---|
1864 |
|
---|
1865 | case NT_ENCLOSE:
|
---|
1866 | {
|
---|
1867 | EncloseNode* en = NENCLOSE(node);
|
---|
1868 | if (en->type == ENCLOSE_MEMORY) {
|
---|
1869 | if (IS_ENCLOSE_NAMED_GROUP(en)) {
|
---|
1870 | (*counter)++;
|
---|
1871 | map[en->regnum].new_val = *counter;
|
---|
1872 | en->regnum = *counter;
|
---|
1873 | }
|
---|
1874 | else if (en->regnum != 0) {
|
---|
1875 | *plink = en->target;
|
---|
1876 | en->target = NULL_NODE;
|
---|
1877 | onig_node_free(node);
|
---|
1878 | r = noname_disable_map(plink, map, counter);
|
---|
1879 | break;
|
---|
1880 | }
|
---|
1881 | }
|
---|
1882 | r = noname_disable_map(&(en->target), map, counter);
|
---|
1883 | }
|
---|
1884 | break;
|
---|
1885 |
|
---|
1886 | case NT_ANCHOR:
|
---|
1887 | {
|
---|
1888 | AnchorNode* an = NANCHOR(node);
|
---|
1889 | switch (an->type) {
|
---|
1890 | case ANCHOR_PREC_READ:
|
---|
1891 | case ANCHOR_PREC_READ_NOT:
|
---|
1892 | case ANCHOR_LOOK_BEHIND:
|
---|
1893 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
1894 | r = noname_disable_map(&(an->target), map, counter);
|
---|
1895 | break;
|
---|
1896 | }
|
---|
1897 | }
|
---|
1898 | break;
|
---|
1899 |
|
---|
1900 | default:
|
---|
1901 | break;
|
---|
1902 | }
|
---|
1903 |
|
---|
1904 | return r;
|
---|
1905 | }
|
---|
1906 |
|
---|
1907 | static int
|
---|
1908 | renumber_node_backref(Node* node, GroupNumRemap* map)
|
---|
1909 | {
|
---|
1910 | int i, pos, n, old_num;
|
---|
1911 | int *backs;
|
---|
1912 | BRefNode* bn = NBREF(node);
|
---|
1913 |
|
---|
1914 | if (! IS_BACKREF_NAME_REF(bn))
|
---|
1915 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
1916 |
|
---|
1917 | old_num = bn->back_num;
|
---|
1918 | if (IS_NULL(bn->back_dynamic))
|
---|
1919 | backs = bn->back_static;
|
---|
1920 | else
|
---|
1921 | backs = bn->back_dynamic;
|
---|
1922 |
|
---|
1923 | for (i = 0, pos = 0; i < old_num; i++) {
|
---|
1924 | n = map[backs[i]].new_val;
|
---|
1925 | if (n > 0) {
|
---|
1926 | backs[pos] = n;
|
---|
1927 | pos++;
|
---|
1928 | }
|
---|
1929 | }
|
---|
1930 |
|
---|
1931 | bn->back_num = pos;
|
---|
1932 | return 0;
|
---|
1933 | }
|
---|
1934 |
|
---|
1935 | static int
|
---|
1936 | renumber_by_map(Node* node, GroupNumRemap* map)
|
---|
1937 | {
|
---|
1938 | int r = 0;
|
---|
1939 |
|
---|
1940 | switch (NTYPE(node)) {
|
---|
1941 | case NT_LIST:
|
---|
1942 | case NT_ALT:
|
---|
1943 | do {
|
---|
1944 | r = renumber_by_map(NCAR(node), map);
|
---|
1945 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1946 | break;
|
---|
1947 | case NT_QTFR:
|
---|
1948 | r = renumber_by_map(NQTFR(node)->target, map);
|
---|
1949 | break;
|
---|
1950 | case NT_ENCLOSE:
|
---|
1951 | {
|
---|
1952 | EncloseNode* en = NENCLOSE(node);
|
---|
1953 | if (en->type == ENCLOSE_CONDITION)
|
---|
1954 | en->regnum = map[en->regnum].new_val;
|
---|
1955 | r = renumber_by_map(en->target, map);
|
---|
1956 | }
|
---|
1957 | break;
|
---|
1958 |
|
---|
1959 | case NT_BREF:
|
---|
1960 | r = renumber_node_backref(node, map);
|
---|
1961 | break;
|
---|
1962 |
|
---|
1963 | case NT_ANCHOR:
|
---|
1964 | {
|
---|
1965 | AnchorNode* an = NANCHOR(node);
|
---|
1966 | switch (an->type) {
|
---|
1967 | case ANCHOR_PREC_READ:
|
---|
1968 | case ANCHOR_PREC_READ_NOT:
|
---|
1969 | case ANCHOR_LOOK_BEHIND:
|
---|
1970 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
1971 | r = renumber_by_map(an->target, map);
|
---|
1972 | break;
|
---|
1973 | }
|
---|
1974 | }
|
---|
1975 | break;
|
---|
1976 |
|
---|
1977 | default:
|
---|
1978 | break;
|
---|
1979 | }
|
---|
1980 |
|
---|
1981 | return r;
|
---|
1982 | }
|
---|
1983 |
|
---|
1984 | static int
|
---|
1985 | numbered_ref_check(Node* node)
|
---|
1986 | {
|
---|
1987 | int r = 0;
|
---|
1988 |
|
---|
1989 | switch (NTYPE(node)) {
|
---|
1990 | case NT_LIST:
|
---|
1991 | case NT_ALT:
|
---|
1992 | do {
|
---|
1993 | r = numbered_ref_check(NCAR(node));
|
---|
1994 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
1995 | break;
|
---|
1996 | case NT_QTFR:
|
---|
1997 | r = numbered_ref_check(NQTFR(node)->target);
|
---|
1998 | break;
|
---|
1999 | case NT_ENCLOSE:
|
---|
2000 | r = numbered_ref_check(NENCLOSE(node)->target);
|
---|
2001 | break;
|
---|
2002 |
|
---|
2003 | case NT_BREF:
|
---|
2004 | if (! IS_BACKREF_NAME_REF(NBREF(node)))
|
---|
2005 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
2006 | break;
|
---|
2007 |
|
---|
2008 | default:
|
---|
2009 | break;
|
---|
2010 | }
|
---|
2011 |
|
---|
2012 | return r;
|
---|
2013 | }
|
---|
2014 |
|
---|
2015 | static int
|
---|
2016 | disable_noname_group_capture_(Node** root, regex_t* reg, ScanEnv* env, GroupNumRemap* map)
|
---|
2017 | {
|
---|
2018 | int r, i, pos, counter;
|
---|
2019 | BitStatusType loc;
|
---|
2020 |
|
---|
2021 | CHECK_NULL_RETURN_MEMERR(map);
|
---|
2022 | for (i = 1; i <= env->num_mem; i++) {
|
---|
2023 | map[i].new_val = 0;
|
---|
2024 | }
|
---|
2025 | counter = 0;
|
---|
2026 | r = noname_disable_map(root, map, &counter);
|
---|
2027 | if (r != 0) return r;
|
---|
2028 |
|
---|
2029 | r = renumber_by_map(*root, map);
|
---|
2030 | if (r != 0) return r;
|
---|
2031 |
|
---|
2032 | for (i = 1, pos = 1; i <= env->num_mem; i++) {
|
---|
2033 | if (map[i].new_val > 0) {
|
---|
2034 | SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
|
---|
2035 | pos++;
|
---|
2036 | }
|
---|
2037 | }
|
---|
2038 |
|
---|
2039 | loc = env->capture_history;
|
---|
2040 | BIT_STATUS_CLEAR(env->capture_history);
|
---|
2041 | for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
|
---|
2042 | if (BIT_STATUS_AT(loc, i)) {
|
---|
2043 | BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
|
---|
2044 | }
|
---|
2045 | }
|
---|
2046 |
|
---|
2047 | env->num_mem = env->num_named;
|
---|
2048 | reg->num_mem = env->num_named;
|
---|
2049 |
|
---|
2050 | return onig_renumber_name_table(reg, map);
|
---|
2051 | }
|
---|
2052 |
|
---|
2053 | static int
|
---|
2054 | disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
|
---|
2055 | {
|
---|
2056 | GroupNumRemap* map = (GroupNumRemap*)xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
|
---|
2057 | int ret = disable_noname_group_capture_(root, reg, env, map);
|
---|
2058 | xfree(map);
|
---|
2059 | return ret;
|
---|
2060 | }
|
---|
2061 | #endif /* USE_NAMED_GROUP */
|
---|
2062 |
|
---|
2063 | #ifdef USE_SUBEXP_CALL
|
---|
2064 | static int
|
---|
2065 | unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
|
---|
2066 | {
|
---|
2067 | int i, offset;
|
---|
2068 | EncloseNode* en;
|
---|
2069 | AbsAddrType addr;
|
---|
2070 |
|
---|
2071 | for (i = 0; i < uslist->num; i++) {
|
---|
2072 | en = NENCLOSE(uslist->us[i].target);
|
---|
2073 | if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
|
---|
2074 | addr = en->call_addr;
|
---|
2075 | offset = uslist->us[i].offset;
|
---|
2076 |
|
---|
2077 | BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
|
---|
2078 | }
|
---|
2079 | return 0;
|
---|
2080 | }
|
---|
2081 | #endif
|
---|
2082 |
|
---|
2083 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
2084 | static int
|
---|
2085 | quantifiers_memory_node_info(Node* node)
|
---|
2086 | {
|
---|
2087 | int r = 0;
|
---|
2088 |
|
---|
2089 | switch (NTYPE(node)) {
|
---|
2090 | case NT_LIST:
|
---|
2091 | case NT_ALT:
|
---|
2092 | {
|
---|
2093 | int v;
|
---|
2094 | do {
|
---|
2095 | v = quantifiers_memory_node_info(NCAR(node));
|
---|
2096 | if (v > r) r = v;
|
---|
2097 | } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2098 | }
|
---|
2099 | break;
|
---|
2100 |
|
---|
2101 | #ifdef USE_SUBEXP_CALL
|
---|
2102 | case NT_CALL:
|
---|
2103 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
2104 | return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
|
---|
2105 | }
|
---|
2106 | else
|
---|
2107 | r = quantifiers_memory_node_info(NCALL(node)->target);
|
---|
2108 | break;
|
---|
2109 | #endif
|
---|
2110 |
|
---|
2111 | case NT_QTFR:
|
---|
2112 | {
|
---|
2113 | QtfrNode* qn = NQTFR(node);
|
---|
2114 | if (qn->upper != 0) {
|
---|
2115 | r = quantifiers_memory_node_info(qn->target);
|
---|
2116 | }
|
---|
2117 | }
|
---|
2118 | break;
|
---|
2119 |
|
---|
2120 | case NT_ENCLOSE:
|
---|
2121 | {
|
---|
2122 | EncloseNode* en = NENCLOSE(node);
|
---|
2123 | switch (en->type) {
|
---|
2124 | case ENCLOSE_MEMORY:
|
---|
2125 | return NQ_TARGET_IS_EMPTY_MEM;
|
---|
2126 | break;
|
---|
2127 |
|
---|
2128 | case ENCLOSE_OPTION:
|
---|
2129 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2130 | case ENCLOSE_CONDITION:
|
---|
2131 | r = quantifiers_memory_node_info(en->target);
|
---|
2132 | break;
|
---|
2133 | default:
|
---|
2134 | break;
|
---|
2135 | }
|
---|
2136 | }
|
---|
2137 | break;
|
---|
2138 |
|
---|
2139 | case NT_BREF:
|
---|
2140 | case NT_STR:
|
---|
2141 | case NT_CTYPE:
|
---|
2142 | case NT_CCLASS:
|
---|
2143 | case NT_CANY:
|
---|
2144 | case NT_ANCHOR:
|
---|
2145 | default:
|
---|
2146 | break;
|
---|
2147 | }
|
---|
2148 |
|
---|
2149 | return r;
|
---|
2150 | }
|
---|
2151 | #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
|
---|
2152 |
|
---|
2153 | static int
|
---|
2154 | get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
|
---|
2155 | {
|
---|
2156 | OnigDistance tmin;
|
---|
2157 | int r = 0;
|
---|
2158 |
|
---|
2159 | *min = 0;
|
---|
2160 | switch (NTYPE(node)) {
|
---|
2161 | case NT_BREF:
|
---|
2162 | {
|
---|
2163 | int i;
|
---|
2164 | int* backs;
|
---|
2165 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
2166 | BRefNode* br = NBREF(node);
|
---|
2167 | if (br->state & NST_RECURSION) break;
|
---|
2168 |
|
---|
2169 | backs = BACKREFS_P(br);
|
---|
2170 | if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
2171 | r = get_min_match_length(nodes[backs[0]], min, env);
|
---|
2172 | if (r != 0) break;
|
---|
2173 | for (i = 1; i < br->back_num; i++) {
|
---|
2174 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
2175 | r = get_min_match_length(nodes[backs[i]], &tmin, env);
|
---|
2176 | if (r != 0) break;
|
---|
2177 | if (*min > tmin) *min = tmin;
|
---|
2178 | }
|
---|
2179 | }
|
---|
2180 | break;
|
---|
2181 |
|
---|
2182 | #ifdef USE_SUBEXP_CALL
|
---|
2183 | case NT_CALL:
|
---|
2184 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
2185 | EncloseNode* en = NENCLOSE(NCALL(node)->target);
|
---|
2186 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
2187 | *min = en->min_len;
|
---|
2188 | }
|
---|
2189 | else
|
---|
2190 | r = get_min_match_length(NCALL(node)->target, min, env);
|
---|
2191 | break;
|
---|
2192 | #endif
|
---|
2193 |
|
---|
2194 | case NT_LIST:
|
---|
2195 | do {
|
---|
2196 | r = get_min_match_length(NCAR(node), &tmin, env);
|
---|
2197 | if (r == 0) *min += tmin;
|
---|
2198 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2199 | break;
|
---|
2200 |
|
---|
2201 | case NT_ALT:
|
---|
2202 | {
|
---|
2203 | Node *x, *y;
|
---|
2204 | y = node;
|
---|
2205 | do {
|
---|
2206 | x = NCAR(y);
|
---|
2207 | r = get_min_match_length(x, &tmin, env);
|
---|
2208 | if (r != 0) break;
|
---|
2209 | if (y == node) *min = tmin;
|
---|
2210 | else if (*min > tmin) *min = tmin;
|
---|
2211 | } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
|
---|
2212 | }
|
---|
2213 | break;
|
---|
2214 |
|
---|
2215 | case NT_STR:
|
---|
2216 | {
|
---|
2217 | StrNode* sn = NSTR(node);
|
---|
2218 | *min = sn->end - sn->s;
|
---|
2219 | }
|
---|
2220 | break;
|
---|
2221 |
|
---|
2222 | case NT_CTYPE:
|
---|
2223 | *min = 1;
|
---|
2224 | break;
|
---|
2225 |
|
---|
2226 | case NT_CCLASS:
|
---|
2227 | case NT_CANY:
|
---|
2228 | *min = 1;
|
---|
2229 | break;
|
---|
2230 |
|
---|
2231 | case NT_QTFR:
|
---|
2232 | {
|
---|
2233 | QtfrNode* qn = NQTFR(node);
|
---|
2234 |
|
---|
2235 | if (qn->lower > 0) {
|
---|
2236 | r = get_min_match_length(qn->target, min, env);
|
---|
2237 | if (r == 0)
|
---|
2238 | *min = distance_multiply(*min, qn->lower);
|
---|
2239 | }
|
---|
2240 | }
|
---|
2241 | break;
|
---|
2242 |
|
---|
2243 | case NT_ENCLOSE:
|
---|
2244 | {
|
---|
2245 | EncloseNode* en = NENCLOSE(node);
|
---|
2246 | switch (en->type) {
|
---|
2247 | case ENCLOSE_MEMORY:
|
---|
2248 | #ifdef USE_SUBEXP_CALL
|
---|
2249 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
2250 | *min = en->min_len;
|
---|
2251 | else {
|
---|
2252 | r = get_min_match_length(en->target, min, env);
|
---|
2253 | if (r == 0) {
|
---|
2254 | en->min_len = *min;
|
---|
2255 | SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
|
---|
2256 | }
|
---|
2257 | }
|
---|
2258 | break;
|
---|
2259 | #endif
|
---|
2260 | case ENCLOSE_OPTION:
|
---|
2261 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2262 | case ENCLOSE_CONDITION:
|
---|
2263 | r = get_min_match_length(en->target, min, env);
|
---|
2264 | break;
|
---|
2265 | }
|
---|
2266 | }
|
---|
2267 | break;
|
---|
2268 |
|
---|
2269 | case NT_ANCHOR:
|
---|
2270 | default:
|
---|
2271 | break;
|
---|
2272 | }
|
---|
2273 |
|
---|
2274 | return r;
|
---|
2275 | }
|
---|
2276 |
|
---|
2277 | static int
|
---|
2278 | get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
|
---|
2279 | {
|
---|
2280 | OnigDistance tmax;
|
---|
2281 | int r = 0;
|
---|
2282 |
|
---|
2283 | *max = 0;
|
---|
2284 | switch (NTYPE(node)) {
|
---|
2285 | case NT_LIST:
|
---|
2286 | do {
|
---|
2287 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
2288 | if (r == 0)
|
---|
2289 | *max = distance_add(*max, tmax);
|
---|
2290 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2291 | break;
|
---|
2292 |
|
---|
2293 | case NT_ALT:
|
---|
2294 | do {
|
---|
2295 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
2296 | if (r == 0 && *max < tmax) *max = tmax;
|
---|
2297 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2298 | break;
|
---|
2299 |
|
---|
2300 | case NT_STR:
|
---|
2301 | {
|
---|
2302 | StrNode* sn = NSTR(node);
|
---|
2303 | *max = sn->end - sn->s;
|
---|
2304 | }
|
---|
2305 | break;
|
---|
2306 |
|
---|
2307 | case NT_CTYPE:
|
---|
2308 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
2309 | break;
|
---|
2310 |
|
---|
2311 | case NT_CCLASS:
|
---|
2312 | case NT_CANY:
|
---|
2313 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
2314 | break;
|
---|
2315 |
|
---|
2316 | case NT_BREF:
|
---|
2317 | {
|
---|
2318 | int i;
|
---|
2319 | int* backs;
|
---|
2320 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
2321 | BRefNode* br = NBREF(node);
|
---|
2322 | if (br->state & NST_RECURSION) {
|
---|
2323 | *max = ONIG_INFINITE_DISTANCE;
|
---|
2324 | break;
|
---|
2325 | }
|
---|
2326 | backs = BACKREFS_P(br);
|
---|
2327 | for (i = 0; i < br->back_num; i++) {
|
---|
2328 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
2329 | r = get_max_match_length(nodes[backs[i]], &tmax, env);
|
---|
2330 | if (r != 0) break;
|
---|
2331 | if (*max < tmax) *max = tmax;
|
---|
2332 | }
|
---|
2333 | }
|
---|
2334 | break;
|
---|
2335 |
|
---|
2336 | #ifdef USE_SUBEXP_CALL
|
---|
2337 | case NT_CALL:
|
---|
2338 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
2339 | r = get_max_match_length(NCALL(node)->target, max, env);
|
---|
2340 | else
|
---|
2341 | *max = ONIG_INFINITE_DISTANCE;
|
---|
2342 | break;
|
---|
2343 | #endif
|
---|
2344 |
|
---|
2345 | case NT_QTFR:
|
---|
2346 | {
|
---|
2347 | QtfrNode* qn = NQTFR(node);
|
---|
2348 |
|
---|
2349 | if (qn->upper != 0) {
|
---|
2350 | r = get_max_match_length(qn->target, max, env);
|
---|
2351 | if (r == 0 && *max != 0) {
|
---|
2352 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
2353 | *max = distance_multiply(*max, qn->upper);
|
---|
2354 | else
|
---|
2355 | *max = ONIG_INFINITE_DISTANCE;
|
---|
2356 | }
|
---|
2357 | }
|
---|
2358 | }
|
---|
2359 | break;
|
---|
2360 |
|
---|
2361 | case NT_ENCLOSE:
|
---|
2362 | {
|
---|
2363 | EncloseNode* en = NENCLOSE(node);
|
---|
2364 | switch (en->type) {
|
---|
2365 | case ENCLOSE_MEMORY:
|
---|
2366 | #ifdef USE_SUBEXP_CALL
|
---|
2367 | if (IS_ENCLOSE_MAX_FIXED(en))
|
---|
2368 | *max = en->max_len;
|
---|
2369 | else {
|
---|
2370 | r = get_max_match_length(en->target, max, env);
|
---|
2371 | if (r == 0) {
|
---|
2372 | en->max_len = *max;
|
---|
2373 | SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
|
---|
2374 | }
|
---|
2375 | }
|
---|
2376 | break;
|
---|
2377 | #endif
|
---|
2378 | case ENCLOSE_OPTION:
|
---|
2379 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2380 | case ENCLOSE_CONDITION:
|
---|
2381 | r = get_max_match_length(en->target, max, env);
|
---|
2382 | break;
|
---|
2383 | }
|
---|
2384 | }
|
---|
2385 | break;
|
---|
2386 |
|
---|
2387 | case NT_ANCHOR:
|
---|
2388 | default:
|
---|
2389 | break;
|
---|
2390 | }
|
---|
2391 |
|
---|
2392 | return r;
|
---|
2393 | }
|
---|
2394 |
|
---|
2395 | #define GET_CHAR_LEN_VARLEN -1
|
---|
2396 | #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
|
---|
2397 |
|
---|
2398 | /* fixed size pattern node only */
|
---|
2399 | static int
|
---|
2400 | get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
|
---|
2401 | {
|
---|
2402 | int tlen;
|
---|
2403 | int r = 0;
|
---|
2404 |
|
---|
2405 | level++;
|
---|
2406 | *len = 0;
|
---|
2407 | switch (NTYPE(node)) {
|
---|
2408 | case NT_LIST:
|
---|
2409 | do {
|
---|
2410 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
2411 | if (r == 0)
|
---|
2412 | *len = (int )distance_add(*len, tlen);
|
---|
2413 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2414 | break;
|
---|
2415 |
|
---|
2416 | case NT_ALT:
|
---|
2417 | {
|
---|
2418 | int tlen2;
|
---|
2419 | int varlen = 0;
|
---|
2420 |
|
---|
2421 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
2422 | while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
|
---|
2423 | r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
|
---|
2424 | if (r == 0) {
|
---|
2425 | if (tlen != tlen2)
|
---|
2426 | varlen = 1;
|
---|
2427 | }
|
---|
2428 | }
|
---|
2429 | if (r == 0) {
|
---|
2430 | if (varlen != 0) {
|
---|
2431 | if (level == 1)
|
---|
2432 | r = GET_CHAR_LEN_TOP_ALT_VARLEN;
|
---|
2433 | else
|
---|
2434 | r = GET_CHAR_LEN_VARLEN;
|
---|
2435 | }
|
---|
2436 | else
|
---|
2437 | *len = tlen;
|
---|
2438 | }
|
---|
2439 | }
|
---|
2440 | break;
|
---|
2441 |
|
---|
2442 | case NT_STR:
|
---|
2443 | {
|
---|
2444 | StrNode* sn = NSTR(node);
|
---|
2445 | UChar *s = sn->s;
|
---|
2446 | while (s < sn->end) {
|
---|
2447 | s += enclen(reg->enc, s);
|
---|
2448 | (*len)++;
|
---|
2449 | }
|
---|
2450 | }
|
---|
2451 | break;
|
---|
2452 |
|
---|
2453 | case NT_QTFR:
|
---|
2454 | {
|
---|
2455 | QtfrNode* qn = NQTFR(node);
|
---|
2456 | if (qn->lower == qn->upper) {
|
---|
2457 | r = get_char_length_tree1(qn->target, reg, &tlen, level);
|
---|
2458 | if (r == 0)
|
---|
2459 | *len = (int )distance_multiply(tlen, qn->lower);
|
---|
2460 | }
|
---|
2461 | else
|
---|
2462 | r = GET_CHAR_LEN_VARLEN;
|
---|
2463 | }
|
---|
2464 | break;
|
---|
2465 |
|
---|
2466 | #ifdef USE_SUBEXP_CALL
|
---|
2467 | case NT_CALL:
|
---|
2468 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
2469 | r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
|
---|
2470 | else
|
---|
2471 | r = GET_CHAR_LEN_VARLEN;
|
---|
2472 | break;
|
---|
2473 | #endif
|
---|
2474 |
|
---|
2475 | case NT_CTYPE:
|
---|
2476 | *len = 1;
|
---|
2477 | break;
|
---|
2478 |
|
---|
2479 | case NT_CCLASS:
|
---|
2480 | case NT_CANY:
|
---|
2481 | *len = 1;
|
---|
2482 | break;
|
---|
2483 |
|
---|
2484 | case NT_ENCLOSE:
|
---|
2485 | {
|
---|
2486 | EncloseNode* en = NENCLOSE(node);
|
---|
2487 | switch (en->type) {
|
---|
2488 | case ENCLOSE_MEMORY:
|
---|
2489 | #ifdef USE_SUBEXP_CALL
|
---|
2490 | if (IS_ENCLOSE_CLEN_FIXED(en))
|
---|
2491 | *len = en->char_len;
|
---|
2492 | else {
|
---|
2493 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
2494 | if (r == 0) {
|
---|
2495 | en->char_len = *len;
|
---|
2496 | SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
|
---|
2497 | }
|
---|
2498 | }
|
---|
2499 | break;
|
---|
2500 | #endif
|
---|
2501 | case ENCLOSE_OPTION:
|
---|
2502 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2503 | case ENCLOSE_CONDITION:
|
---|
2504 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
2505 | break;
|
---|
2506 | default:
|
---|
2507 | break;
|
---|
2508 | }
|
---|
2509 | }
|
---|
2510 | break;
|
---|
2511 |
|
---|
2512 | case NT_ANCHOR:
|
---|
2513 | break;
|
---|
2514 |
|
---|
2515 | default:
|
---|
2516 | r = GET_CHAR_LEN_VARLEN;
|
---|
2517 | break;
|
---|
2518 | }
|
---|
2519 |
|
---|
2520 | return r;
|
---|
2521 | }
|
---|
2522 |
|
---|
2523 | static int
|
---|
2524 | get_char_length_tree(Node* node, regex_t* reg, int* len)
|
---|
2525 | {
|
---|
2526 | return get_char_length_tree1(node, reg, len, 0);
|
---|
2527 | }
|
---|
2528 |
|
---|
2529 | /* x is not included y ==> 1 : 0 */
|
---|
2530 | static int
|
---|
2531 | is_not_included(Node* x, Node* y, regex_t* reg)
|
---|
2532 | {
|
---|
2533 | int i;
|
---|
2534 | OnigDistance len;
|
---|
2535 | OnigCodePoint code;
|
---|
2536 | UChar *p;
|
---|
2537 | int ytype;
|
---|
2538 |
|
---|
2539 | retry:
|
---|
2540 | ytype = NTYPE(y);
|
---|
2541 | switch (NTYPE(x)) {
|
---|
2542 | case NT_CTYPE:
|
---|
2543 | {
|
---|
2544 | switch (ytype) {
|
---|
2545 | case NT_CTYPE:
|
---|
2546 | if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
|
---|
2547 | NCTYPE(y)->not != NCTYPE(x)->not &&
|
---|
2548 | NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
|
---|
2549 | return 1;
|
---|
2550 | else
|
---|
2551 | return 0;
|
---|
2552 | break;
|
---|
2553 |
|
---|
2554 | case NT_CCLASS:
|
---|
2555 | swap:
|
---|
2556 | {
|
---|
2557 | Node* tmp;
|
---|
2558 | tmp = x; x = y; y = tmp;
|
---|
2559 | goto retry;
|
---|
2560 | }
|
---|
2561 | break;
|
---|
2562 |
|
---|
2563 | case NT_STR:
|
---|
2564 | goto swap;
|
---|
2565 | break;
|
---|
2566 |
|
---|
2567 | default:
|
---|
2568 | break;
|
---|
2569 | }
|
---|
2570 | }
|
---|
2571 | break;
|
---|
2572 |
|
---|
2573 | case NT_CCLASS:
|
---|
2574 | {
|
---|
2575 | CClassNode* xc = NCCLASS(x);
|
---|
2576 | switch (ytype) {
|
---|
2577 | case NT_CTYPE:
|
---|
2578 | switch (NCTYPE(y)->ctype) {
|
---|
2579 | case ONIGENC_CTYPE_WORD:
|
---|
2580 | if (NCTYPE(y)->not == 0) {
|
---|
2581 | if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
|
---|
2582 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
2583 | if (BITSET_AT(xc->bs, i)) {
|
---|
2584 | if (NCTYPE(y)->ascii_range) {
|
---|
2585 | if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
|
---|
2586 | }
|
---|
2587 | else {
|
---|
2588 | if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
|
---|
2589 | }
|
---|
2590 | }
|
---|
2591 | }
|
---|
2592 | return 1;
|
---|
2593 | }
|
---|
2594 | return 0;
|
---|
2595 | }
|
---|
2596 | else {
|
---|
2597 | if (IS_NOT_NULL(xc->mbuf)) return 0;
|
---|
2598 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
2599 | int is_word;
|
---|
2600 | if (NCTYPE(y)->ascii_range)
|
---|
2601 | is_word = IS_CODE_SB_WORD(reg->enc, i);
|
---|
2602 | else
|
---|
2603 | is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
|
---|
2604 | if (! is_word) {
|
---|
2605 | if (!IS_NCCLASS_NOT(xc)) {
|
---|
2606 | if (BITSET_AT(xc->bs, i))
|
---|
2607 | return 0;
|
---|
2608 | }
|
---|
2609 | else {
|
---|
2610 | if (! BITSET_AT(xc->bs, i))
|
---|
2611 | return 0;
|
---|
2612 | }
|
---|
2613 | }
|
---|
2614 | }
|
---|
2615 | return 1;
|
---|
2616 | }
|
---|
2617 | break;
|
---|
2618 |
|
---|
2619 | default:
|
---|
2620 | break;
|
---|
2621 | }
|
---|
2622 | break;
|
---|
2623 |
|
---|
2624 | case NT_CCLASS:
|
---|
2625 | {
|
---|
2626 | int v;
|
---|
2627 | CClassNode* yc = NCCLASS(y);
|
---|
2628 |
|
---|
2629 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
2630 | v = BITSET_AT(xc->bs, i);
|
---|
2631 | if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
|
---|
2632 | (v == 0 && IS_NCCLASS_NOT(xc))) {
|
---|
2633 | v = BITSET_AT(yc->bs, i);
|
---|
2634 | if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
|
---|
2635 | (v == 0 && IS_NCCLASS_NOT(yc)))
|
---|
2636 | return 0;
|
---|
2637 | }
|
---|
2638 | }
|
---|
2639 | if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
|
---|
2640 | (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
|
---|
2641 | return 1;
|
---|
2642 | return 0;
|
---|
2643 | }
|
---|
2644 | break;
|
---|
2645 |
|
---|
2646 | case NT_STR:
|
---|
2647 | goto swap;
|
---|
2648 | break;
|
---|
2649 |
|
---|
2650 | default:
|
---|
2651 | break;
|
---|
2652 | }
|
---|
2653 | }
|
---|
2654 | break;
|
---|
2655 |
|
---|
2656 | case NT_STR:
|
---|
2657 | {
|
---|
2658 | StrNode* xs = NSTR(x);
|
---|
2659 | if (NSTRING_LEN(x) == 0)
|
---|
2660 | break;
|
---|
2661 |
|
---|
2662 | switch (ytype) {
|
---|
2663 | case NT_CTYPE:
|
---|
2664 | switch (NCTYPE(y)->ctype) {
|
---|
2665 | case ONIGENC_CTYPE_WORD:
|
---|
2666 | if (NCTYPE(y)->ascii_range) {
|
---|
2667 | if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
|
---|
2668 | return NCTYPE(y)->not;
|
---|
2669 | else
|
---|
2670 | return !(NCTYPE(y)->not);
|
---|
2671 | }
|
---|
2672 | else {
|
---|
2673 | if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
|
---|
2674 | return NCTYPE(y)->not;
|
---|
2675 | else
|
---|
2676 | return !(NCTYPE(y)->not);
|
---|
2677 | }
|
---|
2678 | break;
|
---|
2679 | default:
|
---|
2680 | break;
|
---|
2681 | }
|
---|
2682 | break;
|
---|
2683 |
|
---|
2684 | case NT_CCLASS:
|
---|
2685 | {
|
---|
2686 | CClassNode* cc = NCCLASS(y);
|
---|
2687 |
|
---|
2688 | code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
|
---|
2689 | xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
|
---|
2690 | return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
|
---|
2691 | }
|
---|
2692 | break;
|
---|
2693 |
|
---|
2694 | case NT_STR:
|
---|
2695 | {
|
---|
2696 | UChar *q;
|
---|
2697 | StrNode* ys = NSTR(y);
|
---|
2698 | len = NSTRING_LEN(x);
|
---|
2699 | if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
|
---|
2700 | if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
|
---|
2701 | /* tiny version */
|
---|
2702 | return 0;
|
---|
2703 | }
|
---|
2704 | else {
|
---|
2705 | for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
|
---|
2706 | if (*p != *q) return 1;
|
---|
2707 | }
|
---|
2708 | }
|
---|
2709 | }
|
---|
2710 | break;
|
---|
2711 |
|
---|
2712 | default:
|
---|
2713 | break;
|
---|
2714 | }
|
---|
2715 | }
|
---|
2716 | break;
|
---|
2717 |
|
---|
2718 | default:
|
---|
2719 | break;
|
---|
2720 | }
|
---|
2721 |
|
---|
2722 | return 0;
|
---|
2723 | }
|
---|
2724 |
|
---|
2725 | static Node*
|
---|
2726 | get_head_value_node(Node* node, int exact, regex_t* reg)
|
---|
2727 | {
|
---|
2728 | Node* n = NULL_NODE;
|
---|
2729 |
|
---|
2730 | switch (NTYPE(node)) {
|
---|
2731 | case NT_BREF:
|
---|
2732 | case NT_ALT:
|
---|
2733 | case NT_CANY:
|
---|
2734 | #ifdef USE_SUBEXP_CALL
|
---|
2735 | case NT_CALL:
|
---|
2736 | #endif
|
---|
2737 | break;
|
---|
2738 |
|
---|
2739 | case NT_CTYPE:
|
---|
2740 | case NT_CCLASS:
|
---|
2741 | if (exact == 0) {
|
---|
2742 | n = node;
|
---|
2743 | }
|
---|
2744 | break;
|
---|
2745 |
|
---|
2746 | case NT_LIST:
|
---|
2747 | n = get_head_value_node(NCAR(node), exact, reg);
|
---|
2748 | break;
|
---|
2749 |
|
---|
2750 | case NT_STR:
|
---|
2751 | {
|
---|
2752 | StrNode* sn = NSTR(node);
|
---|
2753 |
|
---|
2754 | if (sn->end <= sn->s)
|
---|
2755 | break;
|
---|
2756 |
|
---|
2757 | if (exact != 0 &&
|
---|
2758 | !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
|
---|
2759 | }
|
---|
2760 | else {
|
---|
2761 | n = node;
|
---|
2762 | }
|
---|
2763 | }
|
---|
2764 | break;
|
---|
2765 |
|
---|
2766 | case NT_QTFR:
|
---|
2767 | {
|
---|
2768 | QtfrNode* qn = NQTFR(node);
|
---|
2769 | if (qn->lower > 0) {
|
---|
2770 | if (IS_NOT_NULL(qn->head_exact))
|
---|
2771 | n = qn->head_exact;
|
---|
2772 | else
|
---|
2773 | n = get_head_value_node(qn->target, exact, reg);
|
---|
2774 | }
|
---|
2775 | }
|
---|
2776 | break;
|
---|
2777 |
|
---|
2778 | case NT_ENCLOSE:
|
---|
2779 | {
|
---|
2780 | EncloseNode* en = NENCLOSE(node);
|
---|
2781 | switch (en->type) {
|
---|
2782 | case ENCLOSE_OPTION:
|
---|
2783 | {
|
---|
2784 | OnigOptionType options = reg->options;
|
---|
2785 |
|
---|
2786 | reg->options = NENCLOSE(node)->option;
|
---|
2787 | n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
|
---|
2788 | reg->options = options;
|
---|
2789 | }
|
---|
2790 | break;
|
---|
2791 |
|
---|
2792 | case ENCLOSE_MEMORY:
|
---|
2793 | case ENCLOSE_STOP_BACKTRACK:
|
---|
2794 | case ENCLOSE_CONDITION:
|
---|
2795 | n = get_head_value_node(en->target, exact, reg);
|
---|
2796 | break;
|
---|
2797 | }
|
---|
2798 | }
|
---|
2799 | break;
|
---|
2800 |
|
---|
2801 | case NT_ANCHOR:
|
---|
2802 | if (NANCHOR(node)->type == ANCHOR_PREC_READ)
|
---|
2803 | n = get_head_value_node(NANCHOR(node)->target, exact, reg);
|
---|
2804 | break;
|
---|
2805 |
|
---|
2806 | default:
|
---|
2807 | break;
|
---|
2808 | }
|
---|
2809 |
|
---|
2810 | return n;
|
---|
2811 | }
|
---|
2812 |
|
---|
2813 | static int
|
---|
2814 | check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
|
---|
2815 | {
|
---|
2816 | int type, r = 0;
|
---|
2817 |
|
---|
2818 | type = NTYPE(node);
|
---|
2819 | if ((NTYPE2BIT(type) & type_mask) == 0)
|
---|
2820 | return 1;
|
---|
2821 |
|
---|
2822 | switch (type) {
|
---|
2823 | case NT_LIST:
|
---|
2824 | case NT_ALT:
|
---|
2825 | do {
|
---|
2826 | r = check_type_tree(NCAR(node), type_mask, enclose_mask,
|
---|
2827 | anchor_mask);
|
---|
2828 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2829 | break;
|
---|
2830 |
|
---|
2831 | case NT_QTFR:
|
---|
2832 | r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
|
---|
2833 | anchor_mask);
|
---|
2834 | break;
|
---|
2835 |
|
---|
2836 | case NT_ENCLOSE:
|
---|
2837 | {
|
---|
2838 | EncloseNode* en = NENCLOSE(node);
|
---|
2839 | if ((en->type & enclose_mask) == 0)
|
---|
2840 | return 1;
|
---|
2841 |
|
---|
2842 | r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
|
---|
2843 | }
|
---|
2844 | break;
|
---|
2845 |
|
---|
2846 | case NT_ANCHOR:
|
---|
2847 | type = NANCHOR(node)->type;
|
---|
2848 | if ((type & anchor_mask) == 0)
|
---|
2849 | return 1;
|
---|
2850 |
|
---|
2851 | if (NANCHOR(node)->target)
|
---|
2852 | r = check_type_tree(NANCHOR(node)->target,
|
---|
2853 | type_mask, enclose_mask, anchor_mask);
|
---|
2854 | break;
|
---|
2855 |
|
---|
2856 | default:
|
---|
2857 | break;
|
---|
2858 | }
|
---|
2859 | return r;
|
---|
2860 | }
|
---|
2861 |
|
---|
2862 | #ifdef USE_SUBEXP_CALL
|
---|
2863 |
|
---|
2864 | #define RECURSION_EXIST 1
|
---|
2865 | #define RECURSION_INFINITE 2
|
---|
2866 |
|
---|
2867 | static int
|
---|
2868 | subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
|
---|
2869 | {
|
---|
2870 | int type;
|
---|
2871 | int r = 0;
|
---|
2872 |
|
---|
2873 | type = NTYPE(node);
|
---|
2874 | switch (type) {
|
---|
2875 | case NT_LIST:
|
---|
2876 | {
|
---|
2877 | Node *x;
|
---|
2878 | OnigDistance min;
|
---|
2879 | int ret;
|
---|
2880 |
|
---|
2881 | x = node;
|
---|
2882 | do {
|
---|
2883 | ret = subexp_inf_recursive_check(NCAR(x), env, head);
|
---|
2884 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
2885 | r |= ret;
|
---|
2886 | if (head) {
|
---|
2887 | ret = get_min_match_length(NCAR(x), &min, env);
|
---|
2888 | if (ret != 0) return ret;
|
---|
2889 | if (min != 0) head = 0;
|
---|
2890 | }
|
---|
2891 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
2892 | }
|
---|
2893 | break;
|
---|
2894 |
|
---|
2895 | case NT_ALT:
|
---|
2896 | {
|
---|
2897 | int ret;
|
---|
2898 | r = RECURSION_EXIST;
|
---|
2899 | do {
|
---|
2900 | ret = subexp_inf_recursive_check(NCAR(node), env, head);
|
---|
2901 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
2902 | r &= ret;
|
---|
2903 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
2904 | }
|
---|
2905 | break;
|
---|
2906 |
|
---|
2907 | case NT_QTFR:
|
---|
2908 | r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
|
---|
2909 | if (r == RECURSION_EXIST) {
|
---|
2910 | if (NQTFR(node)->lower == 0) r = 0;
|
---|
2911 | }
|
---|
2912 | break;
|
---|
2913 |
|
---|
2914 | case NT_ANCHOR:
|
---|
2915 | {
|
---|
2916 | AnchorNode* an = NANCHOR(node);
|
---|
2917 | switch (an->type) {
|
---|
2918 | case ANCHOR_PREC_READ:
|
---|
2919 | case ANCHOR_PREC_READ_NOT:
|
---|
2920 | case ANCHOR_LOOK_BEHIND:
|
---|
2921 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
2922 | r = subexp_inf_recursive_check(an->target, env, head);
|
---|
2923 | break;
|
---|
2924 | }
|
---|
2925 | }
|
---|
2926 | break;
|
---|
2927 |
|
---|
2928 | case NT_CALL:
|
---|
2929 | r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
|
---|
2930 | break;
|
---|
2931 |
|
---|
2932 | case NT_ENCLOSE:
|
---|
2933 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
2934 | return 0;
|
---|
2935 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
2936 | return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
|
---|
2937 | else {
|
---|
2938 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
2939 | r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
|
---|
2940 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
2941 | }
|
---|
2942 | break;
|
---|
2943 |
|
---|
2944 | default:
|
---|
2945 | break;
|
---|
2946 | }
|
---|
2947 |
|
---|
2948 | return r;
|
---|
2949 | }
|
---|
2950 |
|
---|
2951 | static int
|
---|
2952 | subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
2953 | {
|
---|
2954 | int type;
|
---|
2955 | int r = 0;
|
---|
2956 |
|
---|
2957 | type = NTYPE(node);
|
---|
2958 | switch (type) {
|
---|
2959 | case NT_LIST:
|
---|
2960 | case NT_ALT:
|
---|
2961 | do {
|
---|
2962 | r = subexp_inf_recursive_check_trav(NCAR(node), env);
|
---|
2963 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
2964 | break;
|
---|
2965 |
|
---|
2966 | case NT_QTFR:
|
---|
2967 | r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
|
---|
2968 | break;
|
---|
2969 |
|
---|
2970 | case NT_ANCHOR:
|
---|
2971 | {
|
---|
2972 | AnchorNode* an = NANCHOR(node);
|
---|
2973 | switch (an->type) {
|
---|
2974 | case ANCHOR_PREC_READ:
|
---|
2975 | case ANCHOR_PREC_READ_NOT:
|
---|
2976 | case ANCHOR_LOOK_BEHIND:
|
---|
2977 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
2978 | r = subexp_inf_recursive_check_trav(an->target, env);
|
---|
2979 | break;
|
---|
2980 | }
|
---|
2981 | }
|
---|
2982 | break;
|
---|
2983 |
|
---|
2984 | case NT_ENCLOSE:
|
---|
2985 | {
|
---|
2986 | EncloseNode* en = NENCLOSE(node);
|
---|
2987 |
|
---|
2988 | if (IS_ENCLOSE_RECURSION(en)) {
|
---|
2989 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
2990 | r = subexp_inf_recursive_check(en->target, env, 1);
|
---|
2991 | if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
|
---|
2992 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
2993 | }
|
---|
2994 | r = subexp_inf_recursive_check_trav(en->target, env);
|
---|
2995 | }
|
---|
2996 |
|
---|
2997 | break;
|
---|
2998 |
|
---|
2999 | default:
|
---|
3000 | break;
|
---|
3001 | }
|
---|
3002 |
|
---|
3003 | return r;
|
---|
3004 | }
|
---|
3005 |
|
---|
3006 | static int
|
---|
3007 | subexp_recursive_check(Node* node)
|
---|
3008 | {
|
---|
3009 | int r = 0;
|
---|
3010 |
|
---|
3011 | switch (NTYPE(node)) {
|
---|
3012 | case NT_LIST:
|
---|
3013 | case NT_ALT:
|
---|
3014 | do {
|
---|
3015 | r |= subexp_recursive_check(NCAR(node));
|
---|
3016 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
3017 | break;
|
---|
3018 |
|
---|
3019 | case NT_QTFR:
|
---|
3020 | r = subexp_recursive_check(NQTFR(node)->target);
|
---|
3021 | break;
|
---|
3022 |
|
---|
3023 | case NT_ANCHOR:
|
---|
3024 | {
|
---|
3025 | AnchorNode* an = NANCHOR(node);
|
---|
3026 | switch (an->type) {
|
---|
3027 | case ANCHOR_PREC_READ:
|
---|
3028 | case ANCHOR_PREC_READ_NOT:
|
---|
3029 | case ANCHOR_LOOK_BEHIND:
|
---|
3030 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
3031 | r = subexp_recursive_check(an->target);
|
---|
3032 | break;
|
---|
3033 | }
|
---|
3034 | }
|
---|
3035 | break;
|
---|
3036 |
|
---|
3037 | case NT_CALL:
|
---|
3038 | r = subexp_recursive_check(NCALL(node)->target);
|
---|
3039 | if (r != 0) SET_CALL_RECURSION(node);
|
---|
3040 | break;
|
---|
3041 |
|
---|
3042 | case NT_ENCLOSE:
|
---|
3043 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
3044 | return 0;
|
---|
3045 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
3046 | return 1; /* recursion */
|
---|
3047 | else {
|
---|
3048 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
3049 | r = subexp_recursive_check(NENCLOSE(node)->target);
|
---|
3050 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
3051 | }
|
---|
3052 | break;
|
---|
3053 |
|
---|
3054 | default:
|
---|
3055 | break;
|
---|
3056 | }
|
---|
3057 |
|
---|
3058 | return r;
|
---|
3059 | }
|
---|
3060 |
|
---|
3061 |
|
---|
3062 | static int
|
---|
3063 | subexp_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
3064 | {
|
---|
3065 | #define FOUND_CALLED_NODE 1
|
---|
3066 |
|
---|
3067 | int type;
|
---|
3068 | int r = 0;
|
---|
3069 |
|
---|
3070 | type = NTYPE(node);
|
---|
3071 | switch (type) {
|
---|
3072 | case NT_LIST:
|
---|
3073 | case NT_ALT:
|
---|
3074 | {
|
---|
3075 | int ret;
|
---|
3076 | do {
|
---|
3077 | ret = subexp_recursive_check_trav(NCAR(node), env);
|
---|
3078 | if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
|
---|
3079 | else if (ret < 0) return ret;
|
---|
3080 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
3081 | }
|
---|
3082 | break;
|
---|
3083 |
|
---|
3084 | case NT_QTFR:
|
---|
3085 | r = subexp_recursive_check_trav(NQTFR(node)->target, env);
|
---|
3086 | if (NQTFR(node)->upper == 0) {
|
---|
3087 | if (r == FOUND_CALLED_NODE)
|
---|
3088 | NQTFR(node)->is_refered = 1;
|
---|
3089 | }
|
---|
3090 | break;
|
---|
3091 |
|
---|
3092 | case NT_ANCHOR:
|
---|
3093 | {
|
---|
3094 | AnchorNode* an = NANCHOR(node);
|
---|
3095 | switch (an->type) {
|
---|
3096 | case ANCHOR_PREC_READ:
|
---|
3097 | case ANCHOR_PREC_READ_NOT:
|
---|
3098 | case ANCHOR_LOOK_BEHIND:
|
---|
3099 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
3100 | r = subexp_recursive_check_trav(an->target, env);
|
---|
3101 | break;
|
---|
3102 | }
|
---|
3103 | }
|
---|
3104 | break;
|
---|
3105 |
|
---|
3106 | case NT_ENCLOSE:
|
---|
3107 | {
|
---|
3108 | EncloseNode* en = NENCLOSE(node);
|
---|
3109 |
|
---|
3110 | if (! IS_ENCLOSE_RECURSION(en)) {
|
---|
3111 | if (IS_ENCLOSE_CALLED(en)) {
|
---|
3112 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
3113 | r = subexp_recursive_check(en->target);
|
---|
3114 | if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
|
---|
3115 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
3116 | }
|
---|
3117 | }
|
---|
3118 | r = subexp_recursive_check_trav(en->target, env);
|
---|
3119 | if (IS_ENCLOSE_CALLED(en))
|
---|
3120 | r |= FOUND_CALLED_NODE;
|
---|
3121 | }
|
---|
3122 | break;
|
---|
3123 |
|
---|
3124 | default:
|
---|
3125 | break;
|
---|
3126 | }
|
---|
3127 |
|
---|
3128 | return r;
|
---|
3129 | }
|
---|
3130 |
|
---|
3131 | static int
|
---|
3132 | setup_subexp_call(Node* node, ScanEnv* env)
|
---|
3133 | {
|
---|
3134 | int type;
|
---|
3135 | int r = 0;
|
---|
3136 |
|
---|
3137 | type = NTYPE(node);
|
---|
3138 | switch (type) {
|
---|
3139 | case NT_LIST:
|
---|
3140 | do {
|
---|
3141 | r = setup_subexp_call(NCAR(node), env);
|
---|
3142 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3143 | break;
|
---|
3144 |
|
---|
3145 | case NT_ALT:
|
---|
3146 | do {
|
---|
3147 | r = setup_subexp_call(NCAR(node), env);
|
---|
3148 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3149 | break;
|
---|
3150 |
|
---|
3151 | case NT_QTFR:
|
---|
3152 | r = setup_subexp_call(NQTFR(node)->target, env);
|
---|
3153 | break;
|
---|
3154 | case NT_ENCLOSE:
|
---|
3155 | r = setup_subexp_call(NENCLOSE(node)->target, env);
|
---|
3156 | break;
|
---|
3157 |
|
---|
3158 | case NT_CALL:
|
---|
3159 | {
|
---|
3160 | CallNode* cn = NCALL(node);
|
---|
3161 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
3162 |
|
---|
3163 | if (cn->group_num != 0) {
|
---|
3164 | int gnum = cn->group_num;
|
---|
3165 |
|
---|
3166 | #ifdef USE_NAMED_GROUP
|
---|
3167 | if (env->num_named > 0 &&
|
---|
3168 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
3169 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
3170 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
3171 | }
|
---|
3172 | #endif
|
---|
3173 | if (gnum > env->num_mem) {
|
---|
3174 | onig_scan_env_set_error_string(env,
|
---|
3175 | ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
|
---|
3176 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;
|
---|
3177 | }
|
---|
3178 |
|
---|
3179 | #ifdef USE_NAMED_GROUP
|
---|
3180 | set_call_attr:
|
---|
3181 | #endif
|
---|
3182 | cn->target = nodes[cn->group_num];
|
---|
3183 | if (IS_NULL(cn->target)) {
|
---|
3184 | onig_scan_env_set_error_string(env,
|
---|
3185 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
3186 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
3187 | }
|
---|
3188 | SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
|
---|
3189 | BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
|
---|
3190 | cn->unset_addr_list = env->unset_addr_list;
|
---|
3191 | }
|
---|
3192 | #ifdef USE_NAMED_GROUP
|
---|
3193 | #ifdef USE_PERL_SUBEXP_CALL
|
---|
3194 | else if (cn->name == cn->name_end) {
|
---|
3195 | goto set_call_attr;
|
---|
3196 | }
|
---|
3197 | #endif
|
---|
3198 | else {
|
---|
3199 | int *refs;
|
---|
3200 |
|
---|
3201 | int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
|
---|
3202 | &refs);
|
---|
3203 | if (n <= 0) {
|
---|
3204 | onig_scan_env_set_error_string(env,
|
---|
3205 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
3206 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
3207 | }
|
---|
3208 | else if (n > 1 &&
|
---|
3209 | ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
|
---|
3210 | onig_scan_env_set_error_string(env,
|
---|
3211 | ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
|
---|
3212 | return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
|
---|
3213 | }
|
---|
3214 | else {
|
---|
3215 | cn->group_num = refs[0];
|
---|
3216 | goto set_call_attr;
|
---|
3217 | }
|
---|
3218 | }
|
---|
3219 | #endif
|
---|
3220 | }
|
---|
3221 | break;
|
---|
3222 |
|
---|
3223 | case NT_ANCHOR:
|
---|
3224 | {
|
---|
3225 | AnchorNode* an = NANCHOR(node);
|
---|
3226 |
|
---|
3227 | switch (an->type) {
|
---|
3228 | case ANCHOR_PREC_READ:
|
---|
3229 | case ANCHOR_PREC_READ_NOT:
|
---|
3230 | case ANCHOR_LOOK_BEHIND:
|
---|
3231 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
3232 | r = setup_subexp_call(an->target, env);
|
---|
3233 | break;
|
---|
3234 | }
|
---|
3235 | }
|
---|
3236 | break;
|
---|
3237 |
|
---|
3238 | default:
|
---|
3239 | break;
|
---|
3240 | }
|
---|
3241 |
|
---|
3242 | return r;
|
---|
3243 | }
|
---|
3244 | #endif
|
---|
3245 |
|
---|
3246 | /* divide different length alternatives in look-behind.
|
---|
3247 | (?<=A|B) ==> (?<=A)|(?<=B)
|
---|
3248 | (?<!A|B) ==> (?<!A)(?<!B)
|
---|
3249 | */
|
---|
3250 | static int
|
---|
3251 | divide_look_behind_alternatives(Node* node)
|
---|
3252 | {
|
---|
3253 | Node *head, *np, *insert_node;
|
---|
3254 | AnchorNode* an = NANCHOR(node);
|
---|
3255 | int anc_type = an->type;
|
---|
3256 |
|
---|
3257 | head = an->target;
|
---|
3258 | np = NCAR(head);
|
---|
3259 | swap_node(node, head);
|
---|
3260 | NCAR(node) = head;
|
---|
3261 | NANCHOR(head)->target = np;
|
---|
3262 |
|
---|
3263 | np = node;
|
---|
3264 | while ((np = NCDR(np)) != NULL_NODE) {
|
---|
3265 | insert_node = onig_node_new_anchor(anc_type);
|
---|
3266 | CHECK_NULL_RETURN_MEMERR(insert_node);
|
---|
3267 | NANCHOR(insert_node)->target = NCAR(np);
|
---|
3268 | NCAR(np) = insert_node;
|
---|
3269 | }
|
---|
3270 |
|
---|
3271 | if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
|
---|
3272 | np = node;
|
---|
3273 | do {
|
---|
3274 | SET_NTYPE(np, NT_LIST); /* alt -> list */
|
---|
3275 | } while ((np = NCDR(np)) != NULL_NODE);
|
---|
3276 | }
|
---|
3277 | return 0;
|
---|
3278 | }
|
---|
3279 |
|
---|
3280 | static int
|
---|
3281 | setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
|
---|
3282 | {
|
---|
3283 | int r, len;
|
---|
3284 | AnchorNode* an = NANCHOR(node);
|
---|
3285 |
|
---|
3286 | r = get_char_length_tree(an->target, reg, &len);
|
---|
3287 | if (r == 0)
|
---|
3288 | an->char_len = len;
|
---|
3289 | else if (r == GET_CHAR_LEN_VARLEN)
|
---|
3290 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
3291 | else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
|
---|
3292 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
|
---|
3293 | r = divide_look_behind_alternatives(node);
|
---|
3294 | else
|
---|
3295 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
3296 | }
|
---|
3297 |
|
---|
3298 | return r;
|
---|
3299 | }
|
---|
3300 |
|
---|
3301 | static int
|
---|
3302 | next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
|
---|
3303 | {
|
---|
3304 | int type;
|
---|
3305 |
|
---|
3306 | retry:
|
---|
3307 | type = NTYPE(node);
|
---|
3308 | if (type == NT_QTFR) {
|
---|
3309 | QtfrNode* qn = NQTFR(node);
|
---|
3310 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
|
---|
3311 | #ifdef USE_QTFR_PEEK_NEXT
|
---|
3312 | Node* n = get_head_value_node(next_node, 1, reg);
|
---|
3313 | /* '\0': for UTF-16BE etc... */
|
---|
3314 | if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
|
---|
3315 | qn->next_head_exact = n;
|
---|
3316 | }
|
---|
3317 | #endif
|
---|
3318 | /* automatic possessification a*b ==> (?>a*)b */
|
---|
3319 | if (qn->lower <= 1) {
|
---|
3320 | int ttype = NTYPE(qn->target);
|
---|
3321 | if (IS_NODE_TYPE_SIMPLE(ttype)) {
|
---|
3322 | Node *x, *y;
|
---|
3323 | x = get_head_value_node(qn->target, 0, reg);
|
---|
3324 | if (IS_NOT_NULL(x)) {
|
---|
3325 | y = get_head_value_node(next_node, 0, reg);
|
---|
3326 | if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
|
---|
3327 | Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
|
---|
3328 | CHECK_NULL_RETURN_MEMERR(en);
|
---|
3329 | SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
3330 | swap_node(node, en);
|
---|
3331 | NENCLOSE(node)->target = en;
|
---|
3332 | }
|
---|
3333 | }
|
---|
3334 | }
|
---|
3335 | }
|
---|
3336 |
|
---|
3337 | #ifndef ONIG_DONT_OPTIMIZE
|
---|
3338 | if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
|
---|
3339 | in_root && /* qn->lower == 0 && */
|
---|
3340 | NTYPE(qn->target) == NT_CANY &&
|
---|
3341 | ! IS_MULTILINE(reg->options)) {
|
---|
3342 | /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
|
---|
3343 | Node *np;
|
---|
3344 | np = onig_node_new_list(NULL_NODE, NULL_NODE);
|
---|
3345 | CHECK_NULL_RETURN_MEMERR(np);
|
---|
3346 | swap_node(node, np);
|
---|
3347 | NCDR(node) = onig_node_new_list(np, NULL_NODE);
|
---|
3348 | if (IS_NULL(NCDR(node))) {
|
---|
3349 | onig_node_free(np);
|
---|
3350 | return ONIGERR_MEMORY;
|
---|
3351 | }
|
---|
3352 | np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
|
---|
3353 | CHECK_NULL_RETURN_MEMERR(np);
|
---|
3354 | NCAR(node) = np;
|
---|
3355 | }
|
---|
3356 | #endif
|
---|
3357 | }
|
---|
3358 | }
|
---|
3359 | else if (type == NT_ENCLOSE) {
|
---|
3360 | EncloseNode* en = NENCLOSE(node);
|
---|
3361 | in_root = 0;
|
---|
3362 | if (en->type == ENCLOSE_MEMORY) {
|
---|
3363 | node = en->target;
|
---|
3364 | goto retry;
|
---|
3365 | }
|
---|
3366 | }
|
---|
3367 | return 0;
|
---|
3368 | }
|
---|
3369 |
|
---|
3370 |
|
---|
3371 | static int
|
---|
3372 | update_string_node_case_fold(regex_t* reg, Node *node)
|
---|
3373 | {
|
---|
3374 | UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
3375 | UChar *sbuf, *ebuf, *sp;
|
---|
3376 | int r, i, len;
|
---|
3377 | OnigDistance sbuf_size;
|
---|
3378 | StrNode* sn = NSTR(node);
|
---|
3379 |
|
---|
3380 | end = sn->end;
|
---|
3381 | sbuf_size = (end - sn->s) * 2;
|
---|
3382 | sbuf = (UChar* )xmalloc(sbuf_size);
|
---|
3383 | CHECK_NULL_RETURN_MEMERR(sbuf);
|
---|
3384 | ebuf = sbuf + sbuf_size;
|
---|
3385 |
|
---|
3386 | sp = sbuf;
|
---|
3387 | p = sn->s;
|
---|
3388 | while (p < end) {
|
---|
3389 | len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
|
---|
3390 | for (i = 0; i < len; i++) {
|
---|
3391 | if (sp >= ebuf) {
|
---|
3392 | UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
|
---|
3393 | if (IS_NULL(p)) {
|
---|
3394 | xfree(sbuf);
|
---|
3395 | return ONIGERR_MEMORY;
|
---|
3396 | }
|
---|
3397 | sbuf = p;
|
---|
3398 | sp = sbuf + sbuf_size;
|
---|
3399 | sbuf_size *= 2;
|
---|
3400 | ebuf = sbuf + sbuf_size;
|
---|
3401 | }
|
---|
3402 |
|
---|
3403 | *sp++ = buf[i];
|
---|
3404 | }
|
---|
3405 | }
|
---|
3406 |
|
---|
3407 | r = onig_node_str_set(node, sbuf, sp);
|
---|
3408 |
|
---|
3409 | xfree(sbuf);
|
---|
3410 | return r;
|
---|
3411 | }
|
---|
3412 |
|
---|
3413 | static int
|
---|
3414 | expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
|
---|
3415 | regex_t* reg)
|
---|
3416 | {
|
---|
3417 | int r;
|
---|
3418 | Node *node;
|
---|
3419 |
|
---|
3420 | node = onig_node_new_str(s, end);
|
---|
3421 | if (IS_NULL(node)) return ONIGERR_MEMORY;
|
---|
3422 |
|
---|
3423 | r = update_string_node_case_fold(reg, node);
|
---|
3424 | if (r != 0) {
|
---|
3425 | onig_node_free(node);
|
---|
3426 | return r;
|
---|
3427 | }
|
---|
3428 |
|
---|
3429 | NSTRING_SET_AMBIG(node);
|
---|
3430 | NSTRING_SET_DONT_GET_OPT_INFO(node);
|
---|
3431 | *rnode = node;
|
---|
3432 | return 0;
|
---|
3433 | }
|
---|
3434 |
|
---|
3435 | static int
|
---|
3436 | is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
|
---|
3437 | int slen)
|
---|
3438 | {
|
---|
3439 | int i;
|
---|
3440 |
|
---|
3441 | for (i = 0; i < item_num; i++) {
|
---|
3442 | if (items[i].byte_len != slen) {
|
---|
3443 | return 1;
|
---|
3444 | }
|
---|
3445 | if (items[i].code_len != 1) {
|
---|
3446 | return 1;
|
---|
3447 | }
|
---|
3448 | }
|
---|
3449 | return 0;
|
---|
3450 | }
|
---|
3451 |
|
---|
3452 | static int
|
---|
3453 | expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
---|
3454 | UChar *p, int slen, UChar *end,
|
---|
3455 | regex_t* reg, Node **rnode)
|
---|
3456 | {
|
---|
3457 | int r, i, j, len, varlen;
|
---|
3458 | Node *anode, *var_anode, *snode, *xnode, *an;
|
---|
3459 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
3460 |
|
---|
3461 | *rnode = var_anode = NULL_NODE;
|
---|
3462 |
|
---|
3463 | varlen = 0;
|
---|
3464 | for (i = 0; i < item_num; i++) {
|
---|
3465 | if (items[i].byte_len != slen) {
|
---|
3466 | varlen = 1;
|
---|
3467 | break;
|
---|
3468 | }
|
---|
3469 | }
|
---|
3470 |
|
---|
3471 | if (varlen != 0) {
|
---|
3472 | *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3473 | if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
|
---|
3474 |
|
---|
3475 | xnode = onig_node_new_list(NULL, NULL);
|
---|
3476 | if (IS_NULL(xnode)) goto mem_err;
|
---|
3477 | NCAR(var_anode) = xnode;
|
---|
3478 |
|
---|
3479 | anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3480 | if (IS_NULL(anode)) goto mem_err;
|
---|
3481 | NCAR(xnode) = anode;
|
---|
3482 | }
|
---|
3483 | else {
|
---|
3484 | *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3485 | if (IS_NULL(anode)) return ONIGERR_MEMORY;
|
---|
3486 | }
|
---|
3487 |
|
---|
3488 | snode = onig_node_new_str(p, p + slen);
|
---|
3489 | if (IS_NULL(snode)) goto mem_err;
|
---|
3490 |
|
---|
3491 | NCAR(anode) = snode;
|
---|
3492 |
|
---|
3493 | for (i = 0; i < item_num; i++) {
|
---|
3494 | snode = onig_node_new_str(NULL, NULL);
|
---|
3495 | if (IS_NULL(snode)) goto mem_err;
|
---|
3496 |
|
---|
3497 | for (j = 0; j < items[i].code_len; j++) {
|
---|
3498 | len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
|
---|
3499 | if (len < 0) {
|
---|
3500 | r = len;
|
---|
3501 | goto mem_err2;
|
---|
3502 | }
|
---|
3503 |
|
---|
3504 | r = onig_node_str_cat(snode, buf, buf + len);
|
---|
3505 | if (r != 0) goto mem_err2;
|
---|
3506 | }
|
---|
3507 |
|
---|
3508 | an = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
3509 | if (IS_NULL(an)) {
|
---|
3510 | goto mem_err2;
|
---|
3511 | }
|
---|
3512 |
|
---|
3513 | if (items[i].byte_len != slen) {
|
---|
3514 | Node *rem;
|
---|
3515 | UChar *q = p + items[i].byte_len;
|
---|
3516 |
|
---|
3517 | if (q < end) {
|
---|
3518 | r = expand_case_fold_make_rem_string(&rem, q, end, reg);
|
---|
3519 | if (r != 0) {
|
---|
3520 | onig_node_free(an);
|
---|
3521 | goto mem_err2;
|
---|
3522 | }
|
---|
3523 |
|
---|
3524 | xnode = onig_node_list_add(NULL_NODE, snode);
|
---|
3525 | if (IS_NULL(xnode)) {
|
---|
3526 | onig_node_free(an);
|
---|
3527 | onig_node_free(rem);
|
---|
3528 | goto mem_err2;
|
---|
3529 | }
|
---|
3530 | if (IS_NULL(onig_node_list_add(xnode, rem))) {
|
---|
3531 | onig_node_free(an);
|
---|
3532 | onig_node_free(xnode);
|
---|
3533 | onig_node_free(rem);
|
---|
3534 | goto mem_err;
|
---|
3535 | }
|
---|
3536 |
|
---|
3537 | NCAR(an) = xnode;
|
---|
3538 | }
|
---|
3539 | else {
|
---|
3540 | NCAR(an) = snode;
|
---|
3541 | }
|
---|
3542 |
|
---|
3543 | NCDR(var_anode) = an;
|
---|
3544 | var_anode = an;
|
---|
3545 | }
|
---|
3546 | else {
|
---|
3547 | NCAR(an) = snode;
|
---|
3548 | NCDR(anode) = an;
|
---|
3549 | anode = an;
|
---|
3550 | }
|
---|
3551 | }
|
---|
3552 |
|
---|
3553 | return varlen;
|
---|
3554 |
|
---|
3555 | mem_err2:
|
---|
3556 | onig_node_free(snode);
|
---|
3557 |
|
---|
3558 | mem_err:
|
---|
3559 | onig_node_free(*rnode);
|
---|
3560 |
|
---|
3561 | return ONIGERR_MEMORY;
|
---|
3562 | }
|
---|
3563 |
|
---|
3564 | static int
|
---|
3565 | expand_case_fold_string(Node* node, regex_t* reg)
|
---|
3566 | {
|
---|
3567 | #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
|
---|
3568 |
|
---|
3569 | int r, n, len, alt_num;
|
---|
3570 | int varlen = 0;
|
---|
3571 | UChar *start, *end, *p;
|
---|
3572 | Node *top_root, *root, *snode, *prev_node;
|
---|
3573 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
3574 | StrNode* sn = NSTR(node);
|
---|
3575 |
|
---|
3576 | if (NSTRING_IS_AMBIG(node)) return 0;
|
---|
3577 |
|
---|
3578 | start = sn->s;
|
---|
3579 | end = sn->end;
|
---|
3580 | if (start >= end) return 0;
|
---|
3581 |
|
---|
3582 | r = 0;
|
---|
3583 | top_root = root = prev_node = snode = NULL_NODE;
|
---|
3584 | alt_num = 1;
|
---|
3585 | p = start;
|
---|
3586 | while (p < end) {
|
---|
3587 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
|
---|
3588 | p, end, items);
|
---|
3589 | if (n < 0) {
|
---|
3590 | r = n;
|
---|
3591 | goto err;
|
---|
3592 | }
|
---|
3593 |
|
---|
3594 | len = enclen(reg->enc, p);
|
---|
3595 |
|
---|
3596 | varlen = is_case_fold_variable_len(n, items, len);
|
---|
3597 | if (n == 0 || varlen == 0) {
|
---|
3598 | if (IS_NULL(snode)) {
|
---|
3599 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
3600 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
3601 | if (IS_NULL(root)) {
|
---|
3602 | onig_node_free(prev_node);
|
---|
3603 | goto mem_err;
|
---|
3604 | }
|
---|
3605 | }
|
---|
3606 |
|
---|
3607 | prev_node = snode = onig_node_new_str(NULL, NULL);
|
---|
3608 | if (IS_NULL(snode)) goto mem_err;
|
---|
3609 | if (IS_NOT_NULL(root)) {
|
---|
3610 | if (IS_NULL(onig_node_list_add(root, snode))) {
|
---|
3611 | onig_node_free(snode);
|
---|
3612 | goto mem_err;
|
---|
3613 | }
|
---|
3614 | }
|
---|
3615 | }
|
---|
3616 |
|
---|
3617 | r = onig_node_str_cat(snode, p, p + len);
|
---|
3618 | if (r != 0) goto err;
|
---|
3619 | }
|
---|
3620 | else {
|
---|
3621 | alt_num *= (n + 1);
|
---|
3622 | if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
|
---|
3623 |
|
---|
3624 | if (IS_NOT_NULL(snode)) {
|
---|
3625 | r = update_string_node_case_fold(reg, snode);
|
---|
3626 | if (r == 0) {
|
---|
3627 | NSTRING_SET_AMBIG(snode);
|
---|
3628 | }
|
---|
3629 | }
|
---|
3630 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
3631 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
3632 | if (IS_NULL(root)) {
|
---|
3633 | onig_node_free(prev_node);
|
---|
3634 | goto mem_err;
|
---|
3635 | }
|
---|
3636 | }
|
---|
3637 |
|
---|
3638 | r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
|
---|
3639 | if (r < 0) goto mem_err;
|
---|
3640 | if (r == 1) {
|
---|
3641 | if (IS_NULL(root)) {
|
---|
3642 | top_root = prev_node;
|
---|
3643 | }
|
---|
3644 | else {
|
---|
3645 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
3646 | onig_node_free(prev_node);
|
---|
3647 | goto mem_err;
|
---|
3648 | }
|
---|
3649 | }
|
---|
3650 |
|
---|
3651 | root = NCAR(prev_node);
|
---|
3652 | }
|
---|
3653 | else { /* r == 0 */
|
---|
3654 | if (IS_NOT_NULL(root)) {
|
---|
3655 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
3656 | onig_node_free(prev_node);
|
---|
3657 | goto mem_err;
|
---|
3658 | }
|
---|
3659 | }
|
---|
3660 | }
|
---|
3661 |
|
---|
3662 | snode = NULL_NODE;
|
---|
3663 | }
|
---|
3664 |
|
---|
3665 | p += len;
|
---|
3666 | }
|
---|
3667 | if (IS_NOT_NULL(snode)) {
|
---|
3668 | r = update_string_node_case_fold(reg, snode);
|
---|
3669 | if (r == 0) {
|
---|
3670 | NSTRING_SET_AMBIG(snode);
|
---|
3671 | }
|
---|
3672 | }
|
---|
3673 |
|
---|
3674 | if (p < end) {
|
---|
3675 | Node *srem;
|
---|
3676 |
|
---|
3677 | r = expand_case_fold_make_rem_string(&srem, p, end, reg);
|
---|
3678 | if (r != 0) goto mem_err;
|
---|
3679 |
|
---|
3680 | if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
|
---|
3681 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
3682 | if (IS_NULL(root)) {
|
---|
3683 | onig_node_free(srem);
|
---|
3684 | onig_node_free(prev_node);
|
---|
3685 | goto mem_err;
|
---|
3686 | }
|
---|
3687 | }
|
---|
3688 |
|
---|
3689 | if (IS_NULL(root)) {
|
---|
3690 | prev_node = srem;
|
---|
3691 | }
|
---|
3692 | else {
|
---|
3693 | if (IS_NULL(onig_node_list_add(root, srem))) {
|
---|
3694 | onig_node_free(srem);
|
---|
3695 | goto mem_err;
|
---|
3696 | }
|
---|
3697 | }
|
---|
3698 | }
|
---|
3699 |
|
---|
3700 | /* ending */
|
---|
3701 | top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
|
---|
3702 | swap_node(node, top_root);
|
---|
3703 | onig_node_free(top_root);
|
---|
3704 | return 0;
|
---|
3705 |
|
---|
3706 | mem_err:
|
---|
3707 | r = ONIGERR_MEMORY;
|
---|
3708 |
|
---|
3709 | err:
|
---|
3710 | onig_node_free(top_root);
|
---|
3711 | return r;
|
---|
3712 | }
|
---|
3713 |
|
---|
3714 |
|
---|
3715 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
3716 |
|
---|
3717 | #define CEC_THRES_NUM_BIG_REPEAT 512
|
---|
3718 | #define CEC_INFINITE_NUM 0x7fffffff
|
---|
3719 |
|
---|
3720 | #define CEC_IN_INFINITE_REPEAT (1<<0)
|
---|
3721 | #define CEC_IN_FINITE_REPEAT (1<<1)
|
---|
3722 | #define CEC_CONT_BIG_REPEAT (1<<2)
|
---|
3723 |
|
---|
3724 | static int
|
---|
3725 | setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
---|
3726 | {
|
---|
3727 | int type;
|
---|
3728 | int r = state;
|
---|
3729 |
|
---|
3730 | type = NTYPE(node);
|
---|
3731 | switch (type) {
|
---|
3732 | case NT_LIST:
|
---|
3733 | {
|
---|
3734 | Node* prev = NULL_NODE;
|
---|
3735 | do {
|
---|
3736 | r = setup_comb_exp_check(NCAR(node), r, env);
|
---|
3737 | prev = NCAR(node);
|
---|
3738 | } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3739 | }
|
---|
3740 | break;
|
---|
3741 |
|
---|
3742 | case NT_ALT:
|
---|
3743 | {
|
---|
3744 | int ret;
|
---|
3745 | do {
|
---|
3746 | ret = setup_comb_exp_check(NCAR(node), state, env);
|
---|
3747 | r |= ret;
|
---|
3748 | } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3749 | }
|
---|
3750 | break;
|
---|
3751 |
|
---|
3752 | case NT_QTFR:
|
---|
3753 | {
|
---|
3754 | int child_state = state;
|
---|
3755 | int add_state = 0;
|
---|
3756 | QtfrNode* qn = NQTFR(node);
|
---|
3757 | Node* target = qn->target;
|
---|
3758 | int var_num;
|
---|
3759 |
|
---|
3760 | if (! IS_REPEAT_INFINITE(qn->upper)) {
|
---|
3761 | if (qn->upper > 1) {
|
---|
3762 | /* {0,1}, {1,1} are allowed */
|
---|
3763 | child_state |= CEC_IN_FINITE_REPEAT;
|
---|
3764 |
|
---|
3765 | /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
|
---|
3766 | if (env->backrefed_mem == 0) {
|
---|
3767 | if (NTYPE(qn->target) == NT_ENCLOSE) {
|
---|
3768 | EncloseNode* en = NENCLOSE(qn->target);
|
---|
3769 | if (en->type == ENCLOSE_MEMORY) {
|
---|
3770 | if (NTYPE(en->target) == NT_QTFR) {
|
---|
3771 | QtfrNode* q = NQTFR(en->target);
|
---|
3772 | if (IS_REPEAT_INFINITE(q->upper)
|
---|
3773 | && q->greedy == qn->greedy) {
|
---|
3774 | qn->upper = (qn->lower == 0 ? 1 : qn->lower);
|
---|
3775 | if (qn->upper == 1)
|
---|
3776 | child_state = state;
|
---|
3777 | }
|
---|
3778 | }
|
---|
3779 | }
|
---|
3780 | }
|
---|
3781 | }
|
---|
3782 | }
|
---|
3783 | }
|
---|
3784 |
|
---|
3785 | if (state & CEC_IN_FINITE_REPEAT) {
|
---|
3786 | qn->comb_exp_check_num = -1;
|
---|
3787 | }
|
---|
3788 | else {
|
---|
3789 | if (IS_REPEAT_INFINITE(qn->upper)) {
|
---|
3790 | var_num = CEC_INFINITE_NUM;
|
---|
3791 | child_state |= CEC_IN_INFINITE_REPEAT;
|
---|
3792 | }
|
---|
3793 | else {
|
---|
3794 | var_num = qn->upper - qn->lower;
|
---|
3795 | }
|
---|
3796 |
|
---|
3797 | if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
|
---|
3798 | add_state |= CEC_CONT_BIG_REPEAT;
|
---|
3799 |
|
---|
3800 | if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
|
---|
3801 | ((state & CEC_CONT_BIG_REPEAT) != 0 &&
|
---|
3802 | var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
|
---|
3803 | if (qn->comb_exp_check_num == 0) {
|
---|
3804 | env->num_comb_exp_check++;
|
---|
3805 | qn->comb_exp_check_num = env->num_comb_exp_check;
|
---|
3806 | if (env->curr_max_regnum > env->comb_exp_max_regnum)
|
---|
3807 | env->comb_exp_max_regnum = env->curr_max_regnum;
|
---|
3808 | }
|
---|
3809 | }
|
---|
3810 | }
|
---|
3811 |
|
---|
3812 | r = setup_comb_exp_check(target, child_state, env);
|
---|
3813 | r |= add_state;
|
---|
3814 | }
|
---|
3815 | break;
|
---|
3816 |
|
---|
3817 | case NT_ENCLOSE:
|
---|
3818 | {
|
---|
3819 | EncloseNode* en = NENCLOSE(node);
|
---|
3820 |
|
---|
3821 | switch (en->type) {
|
---|
3822 | case ENCLOSE_MEMORY:
|
---|
3823 | {
|
---|
3824 | if (env->curr_max_regnum < en->regnum)
|
---|
3825 | env->curr_max_regnum = en->regnum;
|
---|
3826 |
|
---|
3827 | r = setup_comb_exp_check(en->target, state, env);
|
---|
3828 | }
|
---|
3829 | break;
|
---|
3830 |
|
---|
3831 | default:
|
---|
3832 | r = setup_comb_exp_check(en->target, state, env);
|
---|
3833 | break;
|
---|
3834 | }
|
---|
3835 | }
|
---|
3836 | break;
|
---|
3837 |
|
---|
3838 | #ifdef USE_SUBEXP_CALL
|
---|
3839 | case NT_CALL:
|
---|
3840 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
3841 | env->has_recursion = 1;
|
---|
3842 | else
|
---|
3843 | r = setup_comb_exp_check(NCALL(node)->target, state, env);
|
---|
3844 | break;
|
---|
3845 | #endif
|
---|
3846 |
|
---|
3847 | default:
|
---|
3848 | break;
|
---|
3849 | }
|
---|
3850 |
|
---|
3851 | return r;
|
---|
3852 | }
|
---|
3853 | #endif
|
---|
3854 |
|
---|
3855 | #define IN_ALT (1<<0)
|
---|
3856 | #define IN_NOT (1<<1)
|
---|
3857 | #define IN_REPEAT (1<<2)
|
---|
3858 | #define IN_VAR_REPEAT (1<<3)
|
---|
3859 | #define IN_ROOT (1<<4)
|
---|
3860 |
|
---|
3861 | /* setup_tree does the following work.
|
---|
3862 | 1. check empty loop. (set qn->target_empty_info)
|
---|
3863 | 2. expand ignore-case in char class.
|
---|
3864 | 3. set memory status bit flags. (reg->mem_stats)
|
---|
3865 | 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
|
---|
3866 | 5. find invalid patterns in look-behind.
|
---|
3867 | 6. expand repeated string.
|
---|
3868 | */
|
---|
3869 | static int
|
---|
3870 | setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
---|
3871 | {
|
---|
3872 | int type;
|
---|
3873 | int r = 0;
|
---|
3874 | int in_root = state & IN_ROOT;
|
---|
3875 |
|
---|
3876 | state &= ~IN_ROOT;
|
---|
3877 | restart:
|
---|
3878 | type = NTYPE(node);
|
---|
3879 | switch (type) {
|
---|
3880 | case NT_LIST:
|
---|
3881 | {
|
---|
3882 | Node* prev = NULL_NODE;
|
---|
3883 | int prev_in_root = 0;
|
---|
3884 | state |= in_root;
|
---|
3885 | do {
|
---|
3886 | r = setup_tree(NCAR(node), reg, state, env);
|
---|
3887 | if (IS_NOT_NULL(prev) && r == 0) {
|
---|
3888 | r = next_setup(prev, NCAR(node), prev_in_root, reg);
|
---|
3889 | }
|
---|
3890 | prev = NCAR(node);
|
---|
3891 | prev_in_root = state & IN_ROOT;
|
---|
3892 | state &= ~IN_ROOT;
|
---|
3893 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3894 | }
|
---|
3895 | break;
|
---|
3896 |
|
---|
3897 | case NT_ALT:
|
---|
3898 | do {
|
---|
3899 | r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
|
---|
3900 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
3901 | break;
|
---|
3902 |
|
---|
3903 | case NT_CCLASS:
|
---|
3904 | break;
|
---|
3905 |
|
---|
3906 | case NT_STR:
|
---|
3907 | if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
|
---|
3908 | r = expand_case_fold_string(node, reg);
|
---|
3909 | }
|
---|
3910 | break;
|
---|
3911 |
|
---|
3912 | case NT_CTYPE:
|
---|
3913 | case NT_CANY:
|
---|
3914 | break;
|
---|
3915 |
|
---|
3916 | #ifdef USE_SUBEXP_CALL
|
---|
3917 | case NT_CALL:
|
---|
3918 | break;
|
---|
3919 | #endif
|
---|
3920 |
|
---|
3921 | case NT_BREF:
|
---|
3922 | {
|
---|
3923 | int i;
|
---|
3924 | int* p;
|
---|
3925 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
3926 | BRefNode* br = NBREF(node);
|
---|
3927 | p = BACKREFS_P(br);
|
---|
3928 | for (i = 0; i < br->back_num; i++) {
|
---|
3929 | if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
3930 | BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
|
---|
3931 | BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
|
---|
3932 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
3933 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
3934 | BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
|
---|
3935 | }
|
---|
3936 | #endif
|
---|
3937 | SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
|
---|
3938 | }
|
---|
3939 | }
|
---|
3940 | break;
|
---|
3941 |
|
---|
3942 | case NT_QTFR:
|
---|
3943 | {
|
---|
3944 | OnigDistance d;
|
---|
3945 | QtfrNode* qn = NQTFR(node);
|
---|
3946 | Node* target = qn->target;
|
---|
3947 |
|
---|
3948 | if ((state & IN_REPEAT) != 0) {
|
---|
3949 | qn->state |= NST_IN_REPEAT;
|
---|
3950 | }
|
---|
3951 |
|
---|
3952 | if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
|
---|
3953 | r = get_min_match_length(target, &d, env);
|
---|
3954 | if (r) break;
|
---|
3955 | if (d == 0) {
|
---|
3956 | qn->target_empty_info = NQ_TARGET_IS_EMPTY;
|
---|
3957 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
3958 | r = quantifiers_memory_node_info(target);
|
---|
3959 | if (r < 0) break;
|
---|
3960 | if (r > 0) {
|
---|
3961 | qn->target_empty_info = r;
|
---|
3962 | }
|
---|
3963 | #endif
|
---|
3964 | #if 0
|
---|
3965 | r = get_max_match_length(target, &d, env);
|
---|
3966 | if (r == 0 && d == 0) {
|
---|
3967 | /* ()* ==> ()?, ()+ ==> () */
|
---|
3968 | qn->upper = 1;
|
---|
3969 | if (qn->lower > 1) qn->lower = 1;
|
---|
3970 | if (NTYPE(target) == NT_STR) {
|
---|
3971 | qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
|
---|
3972 | }
|
---|
3973 | }
|
---|
3974 | #endif
|
---|
3975 | }
|
---|
3976 | }
|
---|
3977 |
|
---|
3978 | state |= IN_REPEAT;
|
---|
3979 | if (qn->lower != qn->upper)
|
---|
3980 | state |= IN_VAR_REPEAT;
|
---|
3981 | r = setup_tree(target, reg, state, env);
|
---|
3982 | if (r) break;
|
---|
3983 |
|
---|
3984 | /* expand string */
|
---|
3985 | #define EXPAND_STRING_MAX_LENGTH 100
|
---|
3986 | if (NTYPE(target) == NT_STR) {
|
---|
3987 | if (qn->lower > 1) {
|
---|
3988 | int i, n = qn->lower;
|
---|
3989 | OnigDistance len = NSTRING_LEN(target);
|
---|
3990 | StrNode* sn = NSTR(target);
|
---|
3991 | Node* np;
|
---|
3992 |
|
---|
3993 | np = onig_node_new_str(sn->s, sn->end);
|
---|
3994 | if (IS_NULL(np)) return ONIGERR_MEMORY;
|
---|
3995 | NSTR(np)->flag = sn->flag;
|
---|
3996 |
|
---|
3997 | for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
|
---|
3998 | r = onig_node_str_cat(np, sn->s, sn->end);
|
---|
3999 | if (r) {
|
---|
4000 | onig_node_free(np);
|
---|
4001 | return r;
|
---|
4002 | }
|
---|
4003 | }
|
---|
4004 | if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
|
---|
4005 | Node *np1, *np2;
|
---|
4006 |
|
---|
4007 | qn->lower -= i;
|
---|
4008 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
4009 | qn->upper -= i;
|
---|
4010 |
|
---|
4011 | np1 = onig_node_new_list(np, NULL);
|
---|
4012 | if (IS_NULL(np1)) {
|
---|
4013 | onig_node_free(np);
|
---|
4014 | return ONIGERR_MEMORY;
|
---|
4015 | }
|
---|
4016 | swap_node(np1, node);
|
---|
4017 | np2 = onig_node_list_add(node, np1);
|
---|
4018 | if (IS_NULL(np2)) {
|
---|
4019 | onig_node_free(np1);
|
---|
4020 | return ONIGERR_MEMORY;
|
---|
4021 | }
|
---|
4022 | }
|
---|
4023 | else {
|
---|
4024 | swap_node(np, node);
|
---|
4025 | onig_node_free(np);
|
---|
4026 | }
|
---|
4027 | break; /* break case NT_QTFR: */
|
---|
4028 | }
|
---|
4029 | }
|
---|
4030 |
|
---|
4031 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
4032 | if (qn->greedy && (qn->target_empty_info != 0)) {
|
---|
4033 | if (NTYPE(target) == NT_QTFR) {
|
---|
4034 | QtfrNode* tqn = NQTFR(target);
|
---|
4035 | if (IS_NOT_NULL(tqn->head_exact)) {
|
---|
4036 | qn->head_exact = tqn->head_exact;
|
---|
4037 | tqn->head_exact = NULL;
|
---|
4038 | }
|
---|
4039 | }
|
---|
4040 | else {
|
---|
4041 | qn->head_exact = get_head_value_node(qn->target, 1, reg);
|
---|
4042 | }
|
---|
4043 | }
|
---|
4044 | #endif
|
---|
4045 | }
|
---|
4046 | break;
|
---|
4047 |
|
---|
4048 | case NT_ENCLOSE:
|
---|
4049 | {
|
---|
4050 | EncloseNode* en = NENCLOSE(node);
|
---|
4051 |
|
---|
4052 | switch (en->type) {
|
---|
4053 | case ENCLOSE_OPTION:
|
---|
4054 | {
|
---|
4055 | OnigOptionType options = reg->options;
|
---|
4056 | state |= in_root;
|
---|
4057 | reg->options = NENCLOSE(node)->option;
|
---|
4058 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
4059 | reg->options = options;
|
---|
4060 | }
|
---|
4061 | break;
|
---|
4062 |
|
---|
4063 | case ENCLOSE_MEMORY:
|
---|
4064 | if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
|
---|
4065 | BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
|
---|
4066 | /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
|
---|
4067 | }
|
---|
4068 | r = setup_tree(en->target, reg, state, env);
|
---|
4069 | break;
|
---|
4070 |
|
---|
4071 | case ENCLOSE_STOP_BACKTRACK:
|
---|
4072 | {
|
---|
4073 | Node* target = en->target;
|
---|
4074 | r = setup_tree(target, reg, state, env);
|
---|
4075 | if (NTYPE(target) == NT_QTFR) {
|
---|
4076 | QtfrNode* tqn = NQTFR(target);
|
---|
4077 | if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
|
---|
4078 | tqn->greedy != 0) { /* (?>a*), a*+ etc... */
|
---|
4079 | int qtype = NTYPE(tqn->target);
|
---|
4080 | if (IS_NODE_TYPE_SIMPLE(qtype))
|
---|
4081 | SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
4082 | }
|
---|
4083 | }
|
---|
4084 | }
|
---|
4085 | break;
|
---|
4086 |
|
---|
4087 | case ENCLOSE_CONDITION:
|
---|
4088 | #ifdef USE_NAMED_GROUP
|
---|
4089 | if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
|
---|
4090 | env->num_named > 0 &&
|
---|
4091 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
4092 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
4093 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
4094 | }
|
---|
4095 | #endif
|
---|
4096 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
4097 | break;
|
---|
4098 | }
|
---|
4099 | }
|
---|
4100 | break;
|
---|
4101 |
|
---|
4102 | case NT_ANCHOR:
|
---|
4103 | {
|
---|
4104 | AnchorNode* an = NANCHOR(node);
|
---|
4105 |
|
---|
4106 | switch (an->type) {
|
---|
4107 | case ANCHOR_PREC_READ:
|
---|
4108 | r = setup_tree(an->target, reg, state, env);
|
---|
4109 | break;
|
---|
4110 | case ANCHOR_PREC_READ_NOT:
|
---|
4111 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
4112 | break;
|
---|
4113 |
|
---|
4114 | /* allowed node types in look-behind */
|
---|
4115 | #define ALLOWED_TYPE_IN_LB \
|
---|
4116 | ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
|
---|
4117 | BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
|
---|
4118 |
|
---|
4119 | #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
|
---|
4120 | #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
|
---|
4121 |
|
---|
4122 | #define ALLOWED_ANCHOR_IN_LB \
|
---|
4123 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
4124 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
4125 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
4126 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
4127 | #define ALLOWED_ANCHOR_IN_LB_NOT \
|
---|
4128 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
4129 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
4130 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
4131 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
4132 |
|
---|
4133 | case ANCHOR_LOOK_BEHIND:
|
---|
4134 | {
|
---|
4135 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
4136 | ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
|
---|
4137 | if (r < 0) return r;
|
---|
4138 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
4139 | r = setup_look_behind(node, reg, env);
|
---|
4140 | if (r != 0) return r;
|
---|
4141 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
4142 | r = setup_tree(an->target, reg, state, env);
|
---|
4143 | }
|
---|
4144 | break;
|
---|
4145 |
|
---|
4146 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
4147 | {
|
---|
4148 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
4149 | ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
|
---|
4150 | if (r < 0) return r;
|
---|
4151 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
4152 | r = setup_look_behind(node, reg, env);
|
---|
4153 | if (r != 0) return r;
|
---|
4154 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
4155 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
4156 | }
|
---|
4157 | break;
|
---|
4158 | }
|
---|
4159 | }
|
---|
4160 | break;
|
---|
4161 |
|
---|
4162 | default:
|
---|
4163 | break;
|
---|
4164 | }
|
---|
4165 |
|
---|
4166 | return r;
|
---|
4167 | }
|
---|
4168 |
|
---|
4169 | #ifndef USE_SUNDAY_QUICK_SEARCH
|
---|
4170 | /* set skip map for Boyer-Moore search */
|
---|
4171 | static int
|
---|
4172 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
4173 | UChar skip[], int** int_skip, int ignore_case)
|
---|
4174 | {
|
---|
4175 | OnigDistance i, len;
|
---|
4176 | int clen, flen, n, j, k;
|
---|
4177 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
4178 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
4179 | OnigEncoding enc = reg->enc;
|
---|
4180 |
|
---|
4181 | len = end - s;
|
---|
4182 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
4183 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
|
---|
4184 |
|
---|
4185 | n = 0;
|
---|
4186 | for (i = 0; i < len - 1; i += clen) {
|
---|
4187 | p = s + i;
|
---|
4188 | if (ignore_case)
|
---|
4189 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4190 | p, end, items);
|
---|
4191 | clen = enclen(enc, p);
|
---|
4192 |
|
---|
4193 | for (j = 0; j < n; j++) {
|
---|
4194 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4195 | return 1; /* different length isn't supported. */
|
---|
4196 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4197 | if (flen != clen)
|
---|
4198 | return 1; /* different length isn't supported. */
|
---|
4199 | }
|
---|
4200 | for (j = 0; j < clen; j++) {
|
---|
4201 | skip[s[i + j]] = (UChar )(len - 1 - i - j);
|
---|
4202 | for (k = 0; k < n; k++) {
|
---|
4203 | skip[buf[k][j]] = (UChar )(len - 1 - i - j);
|
---|
4204 | }
|
---|
4205 | }
|
---|
4206 | }
|
---|
4207 | }
|
---|
4208 | else {
|
---|
4209 | if (IS_NULL(*int_skip)) {
|
---|
4210 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
4211 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
4212 | }
|
---|
4213 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
|
---|
4214 |
|
---|
4215 | n = 0;
|
---|
4216 | for (i = 0; i < len - 1; i += clen) {
|
---|
4217 | p = s + i;
|
---|
4218 | if (ignore_case)
|
---|
4219 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4220 | p, end, items);
|
---|
4221 | clen = enclen(enc, p);
|
---|
4222 |
|
---|
4223 | for (j = 0; j < n; j++) {
|
---|
4224 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4225 | return 1; /* different length isn't supported. */
|
---|
4226 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4227 | if (flen != clen)
|
---|
4228 | return 1; /* different length isn't supported. */
|
---|
4229 | }
|
---|
4230 | for (j = 0; j < clen; j++) {
|
---|
4231 | (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
|
---|
4232 | for (k = 0; k < n; k++) {
|
---|
4233 | (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
|
---|
4234 | }
|
---|
4235 | }
|
---|
4236 | }
|
---|
4237 | }
|
---|
4238 | return 0;
|
---|
4239 | }
|
---|
4240 |
|
---|
4241 | #else /* USE_SUNDAY_QUICK_SEARCH */
|
---|
4242 |
|
---|
4243 | /* set skip map for Sunday's quick search */
|
---|
4244 | static int
|
---|
4245 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
4246 | UChar skip[], int** int_skip, int ignore_case)
|
---|
4247 | {
|
---|
4248 | OnigDistance i, len;
|
---|
4249 | int clen, flen, n, j, k;
|
---|
4250 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
4251 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
4252 | OnigEncoding enc = reg->enc;
|
---|
4253 |
|
---|
4254 | len = end - s;
|
---|
4255 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
4256 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
|
---|
4257 |
|
---|
4258 | n = 0;
|
---|
4259 | for (i = 0; i < len; i += clen) {
|
---|
4260 | p = s + i;
|
---|
4261 | if (ignore_case)
|
---|
4262 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4263 | p, end, items);
|
---|
4264 | clen = enclen(enc, p);
|
---|
4265 |
|
---|
4266 | for (j = 0; j < n; j++) {
|
---|
4267 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4268 | return 1; /* different length isn't supported. */
|
---|
4269 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4270 | if (flen != clen)
|
---|
4271 | return 1; /* different length isn't supported. */
|
---|
4272 | }
|
---|
4273 | for (j = 0; j < clen; j++) {
|
---|
4274 | skip[s[i + j]] = (UChar )(len - i - j);
|
---|
4275 | for (k = 0; k < n; k++) {
|
---|
4276 | skip[buf[k][j]] = (UChar )(len - i - j);
|
---|
4277 | }
|
---|
4278 | }
|
---|
4279 | }
|
---|
4280 | }
|
---|
4281 | else {
|
---|
4282 | if (IS_NULL(*int_skip)) {
|
---|
4283 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
4284 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
4285 | }
|
---|
4286 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
|
---|
4287 |
|
---|
4288 | n = 0;
|
---|
4289 | for (i = 0; i < len; i += clen) {
|
---|
4290 | p = s + i;
|
---|
4291 | if (ignore_case)
|
---|
4292 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
4293 | p, end, items);
|
---|
4294 | clen = enclen(enc, p);
|
---|
4295 |
|
---|
4296 | for (j = 0; j < n; j++) {
|
---|
4297 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
4298 | return 1; /* different length isn't supported. */
|
---|
4299 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
4300 | if (flen != clen)
|
---|
4301 | return 1; /* different length isn't supported. */
|
---|
4302 | }
|
---|
4303 | for (j = 0; j < clen; j++) {
|
---|
4304 | (*int_skip)[s[i + j]] = (int )(len - i - j);
|
---|
4305 | for (k = 0; k < n; k++) {
|
---|
4306 | (*int_skip)[buf[k][j]] = (int )(len - i - j);
|
---|
4307 | }
|
---|
4308 | }
|
---|
4309 | }
|
---|
4310 | }
|
---|
4311 | return 0;
|
---|
4312 | }
|
---|
4313 | #endif /* USE_SUNDAY_QUICK_SEARCH */
|
---|
4314 |
|
---|
4315 | #define OPT_EXACT_MAXLEN 24
|
---|
4316 |
|
---|
4317 | typedef struct {
|
---|
4318 | OnigDistance min; /* min byte length */
|
---|
4319 | OnigDistance max; /* max byte length */
|
---|
4320 | } MinMaxLen;
|
---|
4321 |
|
---|
4322 | typedef struct {
|
---|
4323 | MinMaxLen mmd;
|
---|
4324 | OnigEncoding enc;
|
---|
4325 | OnigOptionType options;
|
---|
4326 | OnigCaseFoldType case_fold_flag;
|
---|
4327 | ScanEnv* scan_env;
|
---|
4328 | } OptEnv;
|
---|
4329 |
|
---|
4330 | typedef struct {
|
---|
4331 | int left_anchor;
|
---|
4332 | int right_anchor;
|
---|
4333 | } OptAncInfo;
|
---|
4334 |
|
---|
4335 | typedef struct {
|
---|
4336 | MinMaxLen mmd; /* info position */
|
---|
4337 | OptAncInfo anc;
|
---|
4338 |
|
---|
4339 | int reach_end;
|
---|
4340 | int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
|
---|
4341 | int len;
|
---|
4342 | UChar s[OPT_EXACT_MAXLEN];
|
---|
4343 | } OptExactInfo;
|
---|
4344 |
|
---|
4345 | typedef struct {
|
---|
4346 | MinMaxLen mmd; /* info position */
|
---|
4347 | OptAncInfo anc;
|
---|
4348 |
|
---|
4349 | int value; /* weighted value */
|
---|
4350 | UChar map[ONIG_CHAR_TABLE_SIZE];
|
---|
4351 | } OptMapInfo;
|
---|
4352 |
|
---|
4353 | typedef struct {
|
---|
4354 | MinMaxLen len;
|
---|
4355 |
|
---|
4356 | OptAncInfo anc;
|
---|
4357 | OptExactInfo exb; /* boundary */
|
---|
4358 | OptExactInfo exm; /* middle */
|
---|
4359 | OptExactInfo expr; /* prec read (?=...) */
|
---|
4360 |
|
---|
4361 | OptMapInfo map; /* boundary */
|
---|
4362 | } NodeOptInfo;
|
---|
4363 |
|
---|
4364 |
|
---|
4365 | static int
|
---|
4366 | map_position_value(OnigEncoding enc, int i)
|
---|
4367 | {
|
---|
4368 | static const short int ByteValTable[] = {
|
---|
4369 | 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
|
---|
4370 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
---|
4371 | 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
|
---|
4372 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
|
---|
4373 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
4374 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
|
---|
4375 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
4376 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
---|
4377 | };
|
---|
4378 |
|
---|
4379 | if (i < numberof(ByteValTable)) {
|
---|
4380 | if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
|
---|
4381 | return 20;
|
---|
4382 | else
|
---|
4383 | return (int )ByteValTable[i];
|
---|
4384 | }
|
---|
4385 | else
|
---|
4386 | return 4; /* Take it easy. */
|
---|
4387 | }
|
---|
4388 |
|
---|
4389 | static int
|
---|
4390 | distance_value(MinMaxLen* mm)
|
---|
4391 | {
|
---|
4392 | /* 1000 / (min-max-dist + 1) */
|
---|
4393 | static const short int dist_vals[] = {
|
---|
4394 | 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
|
---|
4395 | 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
|
---|
4396 | 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
|
---|
4397 | 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
|
---|
4398 | 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
|
---|
4399 | 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
|
---|
4400 | 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
|
---|
4401 | 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
|
---|
4402 | 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
|
---|
4403 | 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
|
---|
4404 | };
|
---|
4405 |
|
---|
4406 | OnigDistance d;
|
---|
4407 |
|
---|
4408 | if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
|
---|
4409 |
|
---|
4410 | d = mm->max - mm->min;
|
---|
4411 | if (d < numberof(dist_vals))
|
---|
4412 | /* return dist_vals[d] * 16 / (mm->min + 12); */
|
---|
4413 | return (int )dist_vals[d];
|
---|
4414 | else
|
---|
4415 | return 1;
|
---|
4416 | }
|
---|
4417 |
|
---|
4418 | static int
|
---|
4419 | comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
|
---|
4420 | {
|
---|
4421 | if (v2 <= 0) return -1;
|
---|
4422 | if (v1 <= 0) return 1;
|
---|
4423 |
|
---|
4424 | v1 *= distance_value(d1);
|
---|
4425 | v2 *= distance_value(d2);
|
---|
4426 |
|
---|
4427 | if (v2 > v1) return 1;
|
---|
4428 | if (v2 < v1) return -1;
|
---|
4429 |
|
---|
4430 | if (d2->min < d1->min) return 1;
|
---|
4431 | if (d2->min > d1->min) return -1;
|
---|
4432 | return 0;
|
---|
4433 | }
|
---|
4434 |
|
---|
4435 | static int
|
---|
4436 | is_equal_mml(MinMaxLen* a, MinMaxLen* b)
|
---|
4437 | {
|
---|
4438 | return (a->min == b->min && a->max == b->max) ? 1 : 0;
|
---|
4439 | }
|
---|
4440 |
|
---|
4441 |
|
---|
4442 | static void
|
---|
4443 | set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
|
---|
4444 | {
|
---|
4445 | mml->min = min;
|
---|
4446 | mml->max = max;
|
---|
4447 | }
|
---|
4448 |
|
---|
4449 | static void
|
---|
4450 | clear_mml(MinMaxLen* mml)
|
---|
4451 | {
|
---|
4452 | mml->min = mml->max = 0;
|
---|
4453 | }
|
---|
4454 |
|
---|
4455 | static void
|
---|
4456 | copy_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
4457 | {
|
---|
4458 | to->min = from->min;
|
---|
4459 | to->max = from->max;
|
---|
4460 | }
|
---|
4461 |
|
---|
4462 | static void
|
---|
4463 | add_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
4464 | {
|
---|
4465 | to->min = distance_add(to->min, from->min);
|
---|
4466 | to->max = distance_add(to->max, from->max);
|
---|
4467 | }
|
---|
4468 |
|
---|
4469 | #if 0
|
---|
4470 | static void
|
---|
4471 | add_len_mml(MinMaxLen* to, OnigDistance len)
|
---|
4472 | {
|
---|
4473 | to->min = distance_add(to->min, len);
|
---|
4474 | to->max = distance_add(to->max, len);
|
---|
4475 | }
|
---|
4476 | #endif
|
---|
4477 |
|
---|
4478 | static void
|
---|
4479 | alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
4480 | {
|
---|
4481 | if (to->min > from->min) to->min = from->min;
|
---|
4482 | if (to->max < from->max) to->max = from->max;
|
---|
4483 | }
|
---|
4484 |
|
---|
4485 | static void
|
---|
4486 | copy_opt_env(OptEnv* to, OptEnv* from)
|
---|
4487 | {
|
---|
4488 | *to = *from;
|
---|
4489 | }
|
---|
4490 |
|
---|
4491 | static void
|
---|
4492 | clear_opt_anc_info(OptAncInfo* anc)
|
---|
4493 | {
|
---|
4494 | anc->left_anchor = 0;
|
---|
4495 | anc->right_anchor = 0;
|
---|
4496 | }
|
---|
4497 |
|
---|
4498 | static void
|
---|
4499 | copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
|
---|
4500 | {
|
---|
4501 | *to = *from;
|
---|
4502 | }
|
---|
4503 |
|
---|
4504 | static void
|
---|
4505 | concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
|
---|
4506 | OnigDistance left_len, OnigDistance right_len)
|
---|
4507 | {
|
---|
4508 | clear_opt_anc_info(to);
|
---|
4509 |
|
---|
4510 | to->left_anchor = left->left_anchor;
|
---|
4511 | if (left_len == 0) {
|
---|
4512 | to->left_anchor |= right->left_anchor;
|
---|
4513 | }
|
---|
4514 |
|
---|
4515 | to->right_anchor = right->right_anchor;
|
---|
4516 | if (right_len == 0) {
|
---|
4517 | to->right_anchor |= left->right_anchor;
|
---|
4518 | }
|
---|
4519 | else {
|
---|
4520 | to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
|
---|
4521 | }
|
---|
4522 | }
|
---|
4523 |
|
---|
4524 | static int
|
---|
4525 | is_left_anchor(int anc)
|
---|
4526 | {
|
---|
4527 | if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
|
---|
4528 | anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
|
---|
4529 | anc == ANCHOR_PREC_READ_NOT)
|
---|
4530 | return 0;
|
---|
4531 |
|
---|
4532 | return 1;
|
---|
4533 | }
|
---|
4534 |
|
---|
4535 | static int
|
---|
4536 | is_set_opt_anc_info(OptAncInfo* to, int anc)
|
---|
4537 | {
|
---|
4538 | if ((to->left_anchor & anc) != 0) return 1;
|
---|
4539 |
|
---|
4540 | return ((to->right_anchor & anc) != 0 ? 1 : 0);
|
---|
4541 | }
|
---|
4542 |
|
---|
4543 | static void
|
---|
4544 | add_opt_anc_info(OptAncInfo* to, int anc)
|
---|
4545 | {
|
---|
4546 | if (is_left_anchor(anc))
|
---|
4547 | to->left_anchor |= anc;
|
---|
4548 | else
|
---|
4549 | to->right_anchor |= anc;
|
---|
4550 | }
|
---|
4551 |
|
---|
4552 | static void
|
---|
4553 | remove_opt_anc_info(OptAncInfo* to, int anc)
|
---|
4554 | {
|
---|
4555 | if (is_left_anchor(anc))
|
---|
4556 | to->left_anchor &= ~anc;
|
---|
4557 | else
|
---|
4558 | to->right_anchor &= ~anc;
|
---|
4559 | }
|
---|
4560 |
|
---|
4561 | static void
|
---|
4562 | alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
|
---|
4563 | {
|
---|
4564 | to->left_anchor &= add->left_anchor;
|
---|
4565 | to->right_anchor &= add->right_anchor;
|
---|
4566 | }
|
---|
4567 |
|
---|
4568 | static int
|
---|
4569 | is_full_opt_exact_info(OptExactInfo* ex)
|
---|
4570 | {
|
---|
4571 | return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
|
---|
4572 | }
|
---|
4573 |
|
---|
4574 | static void
|
---|
4575 | clear_opt_exact_info(OptExactInfo* ex)
|
---|
4576 | {
|
---|
4577 | clear_mml(&ex->mmd);
|
---|
4578 | clear_opt_anc_info(&ex->anc);
|
---|
4579 | ex->reach_end = 0;
|
---|
4580 | ex->ignore_case = -1; /* unset */
|
---|
4581 | ex->len = 0;
|
---|
4582 | ex->s[0] = '\0';
|
---|
4583 | }
|
---|
4584 |
|
---|
4585 | static void
|
---|
4586 | copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
|
---|
4587 | {
|
---|
4588 | *to = *from;
|
---|
4589 | }
|
---|
4590 |
|
---|
4591 | static void
|
---|
4592 | concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
|
---|
4593 | {
|
---|
4594 | int i, j, len;
|
---|
4595 | UChar *p, *end;
|
---|
4596 | OptAncInfo tanc;
|
---|
4597 |
|
---|
4598 | if (to->ignore_case < 0)
|
---|
4599 | to->ignore_case = add->ignore_case;
|
---|
4600 | else if (to->ignore_case != add->ignore_case)
|
---|
4601 | return ; /* avoid */
|
---|
4602 |
|
---|
4603 | p = add->s;
|
---|
4604 | end = p + add->len;
|
---|
4605 | for (i = to->len; p < end; ) {
|
---|
4606 | len = enclen(enc, p);
|
---|
4607 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
4608 | for (j = 0; j < len && p < end; j++)
|
---|
4609 | to->s[i++] = *p++;
|
---|
4610 | }
|
---|
4611 |
|
---|
4612 | to->len = i;
|
---|
4613 | to->reach_end = (p == end ? add->reach_end : 0);
|
---|
4614 |
|
---|
4615 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
|
---|
4616 | if (! to->reach_end) tanc.right_anchor = 0;
|
---|
4617 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
4618 | }
|
---|
4619 |
|
---|
4620 | static void
|
---|
4621 | concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
|
---|
4622 | int raw ARG_UNUSED, OnigEncoding enc)
|
---|
4623 | {
|
---|
4624 | int i, j, len;
|
---|
4625 | UChar *p;
|
---|
4626 |
|
---|
4627 | for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
|
---|
4628 | len = enclen(enc, p);
|
---|
4629 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
4630 | for (j = 0; j < len && p < end; j++)
|
---|
4631 | to->s[i++] = *p++;
|
---|
4632 | }
|
---|
4633 |
|
---|
4634 | to->len = i;
|
---|
4635 | }
|
---|
4636 |
|
---|
4637 | static void
|
---|
4638 | alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
|
---|
4639 | {
|
---|
4640 | int i, j, len;
|
---|
4641 |
|
---|
4642 | if (add->len == 0 || to->len == 0) {
|
---|
4643 | clear_opt_exact_info(to);
|
---|
4644 | return ;
|
---|
4645 | }
|
---|
4646 |
|
---|
4647 | if (! is_equal_mml(&to->mmd, &add->mmd)) {
|
---|
4648 | clear_opt_exact_info(to);
|
---|
4649 | return ;
|
---|
4650 | }
|
---|
4651 |
|
---|
4652 | for (i = 0; i < to->len && i < add->len; ) {
|
---|
4653 | if (to->s[i] != add->s[i]) break;
|
---|
4654 | len = enclen(env->enc, to->s + i);
|
---|
4655 |
|
---|
4656 | for (j = 1; j < len; j++) {
|
---|
4657 | if (to->s[i+j] != add->s[i+j]) break;
|
---|
4658 | }
|
---|
4659 | if (j < len) break;
|
---|
4660 | i += len;
|
---|
4661 | }
|
---|
4662 |
|
---|
4663 | if (! add->reach_end || i < add->len || i < to->len) {
|
---|
4664 | to->reach_end = 0;
|
---|
4665 | }
|
---|
4666 | to->len = i;
|
---|
4667 | if (to->ignore_case < 0)
|
---|
4668 | to->ignore_case = add->ignore_case;
|
---|
4669 | else if (add->ignore_case >= 0)
|
---|
4670 | to->ignore_case |= add->ignore_case;
|
---|
4671 |
|
---|
4672 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
4673 | if (! to->reach_end) to->anc.right_anchor = 0;
|
---|
4674 | }
|
---|
4675 |
|
---|
4676 | static void
|
---|
4677 | select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
|
---|
4678 | {
|
---|
4679 | int v1, v2;
|
---|
4680 |
|
---|
4681 | v1 = now->len;
|
---|
4682 | v2 = alt->len;
|
---|
4683 |
|
---|
4684 | if (v2 == 0) {
|
---|
4685 | return ;
|
---|
4686 | }
|
---|
4687 | else if (v1 == 0) {
|
---|
4688 | copy_opt_exact_info(now, alt);
|
---|
4689 | return ;
|
---|
4690 | }
|
---|
4691 | else if (v1 <= 2 && v2 <= 2) {
|
---|
4692 | /* ByteValTable[x] is big value --> low price */
|
---|
4693 | v2 = map_position_value(enc, now->s[0]);
|
---|
4694 | v1 = map_position_value(enc, alt->s[0]);
|
---|
4695 |
|
---|
4696 | if (now->len > 1) v1 += 5;
|
---|
4697 | if (alt->len > 1) v2 += 5;
|
---|
4698 | }
|
---|
4699 |
|
---|
4700 | if (now->ignore_case <= 0) v1 *= 2;
|
---|
4701 | if (alt->ignore_case <= 0) v2 *= 2;
|
---|
4702 |
|
---|
4703 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
4704 | copy_opt_exact_info(now, alt);
|
---|
4705 | }
|
---|
4706 |
|
---|
4707 | static void
|
---|
4708 | clear_opt_map_info(OptMapInfo* map)
|
---|
4709 | {
|
---|
4710 | static const OptMapInfo clean_info = {
|
---|
4711 | {0, 0}, {0, 0}, 0,
|
---|
4712 | {
|
---|
4713 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4714 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4715 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4716 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4717 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4718 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4719 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4720 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4721 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4722 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4723 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4724 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4725 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4726 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4727 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
4728 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
---|
4729 | }
|
---|
4730 | };
|
---|
4731 |
|
---|
4732 | xmemcpy(map, &clean_info, sizeof(OptMapInfo));
|
---|
4733 | }
|
---|
4734 |
|
---|
4735 | static void
|
---|
4736 | copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
|
---|
4737 | {
|
---|
4738 | *to = *from;
|
---|
4739 | }
|
---|
4740 |
|
---|
4741 | static void
|
---|
4742 | add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
|
---|
4743 | {
|
---|
4744 | if (map->map[c] == 0) {
|
---|
4745 | map->map[c] = 1;
|
---|
4746 | map->value += map_position_value(enc, c);
|
---|
4747 | }
|
---|
4748 | }
|
---|
4749 |
|
---|
4750 | static int
|
---|
4751 | add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
---|
4752 | OnigEncoding enc, OnigCaseFoldType case_fold_flag)
|
---|
4753 | {
|
---|
4754 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
4755 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
4756 | int i, n;
|
---|
4757 |
|
---|
4758 | add_char_opt_map_info(map, p[0], enc);
|
---|
4759 |
|
---|
4760 | case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
|
---|
4761 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
|
---|
4762 | if (n < 0) return n;
|
---|
4763 |
|
---|
4764 | for (i = 0; i < n; i++) {
|
---|
4765 | ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
|
---|
4766 | add_char_opt_map_info(map, buf[0], enc);
|
---|
4767 | }
|
---|
4768 |
|
---|
4769 | return 0;
|
---|
4770 | }
|
---|
4771 |
|
---|
4772 | static void
|
---|
4773 | select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
|
---|
4774 | {
|
---|
4775 | const int z = 1<<15; /* 32768: something big value */
|
---|
4776 |
|
---|
4777 | int v1, v2;
|
---|
4778 |
|
---|
4779 | if (alt->value == 0) return ;
|
---|
4780 | if (now->value == 0) {
|
---|
4781 | copy_opt_map_info(now, alt);
|
---|
4782 | return ;
|
---|
4783 | }
|
---|
4784 |
|
---|
4785 | v1 = z / now->value;
|
---|
4786 | v2 = z / alt->value;
|
---|
4787 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
4788 | copy_opt_map_info(now, alt);
|
---|
4789 | }
|
---|
4790 |
|
---|
4791 | static int
|
---|
4792 | comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
|
---|
4793 | {
|
---|
4794 | #define COMP_EM_BASE 20
|
---|
4795 | int ve, vm;
|
---|
4796 |
|
---|
4797 | if (m->value <= 0) return -1;
|
---|
4798 |
|
---|
4799 | ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
|
---|
4800 | vm = COMP_EM_BASE * 5 * 2 / m->value;
|
---|
4801 | return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
|
---|
4802 | }
|
---|
4803 |
|
---|
4804 | static void
|
---|
4805 | alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
|
---|
4806 | {
|
---|
4807 | int i, val;
|
---|
4808 |
|
---|
4809 | /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
|
---|
4810 | if (to->value == 0) return ;
|
---|
4811 | if (add->value == 0 || to->mmd.max < add->mmd.min) {
|
---|
4812 | clear_opt_map_info(to);
|
---|
4813 | return ;
|
---|
4814 | }
|
---|
4815 |
|
---|
4816 | alt_merge_mml(&to->mmd, &add->mmd);
|
---|
4817 |
|
---|
4818 | val = 0;
|
---|
4819 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
4820 | if (add->map[i])
|
---|
4821 | to->map[i] = 1;
|
---|
4822 |
|
---|
4823 | if (to->map[i])
|
---|
4824 | val += map_position_value(enc, i);
|
---|
4825 | }
|
---|
4826 | to->value = val;
|
---|
4827 |
|
---|
4828 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
4829 | }
|
---|
4830 |
|
---|
4831 | static void
|
---|
4832 | set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
|
---|
4833 | {
|
---|
4834 | copy_mml(&(opt->exb.mmd), mmd);
|
---|
4835 | copy_mml(&(opt->expr.mmd), mmd);
|
---|
4836 | copy_mml(&(opt->map.mmd), mmd);
|
---|
4837 | }
|
---|
4838 |
|
---|
4839 | static void
|
---|
4840 | clear_node_opt_info(NodeOptInfo* opt)
|
---|
4841 | {
|
---|
4842 | clear_mml(&opt->len);
|
---|
4843 | clear_opt_anc_info(&opt->anc);
|
---|
4844 | clear_opt_exact_info(&opt->exb);
|
---|
4845 | clear_opt_exact_info(&opt->exm);
|
---|
4846 | clear_opt_exact_info(&opt->expr);
|
---|
4847 | clear_opt_map_info(&opt->map);
|
---|
4848 | }
|
---|
4849 |
|
---|
4850 | static void
|
---|
4851 | copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
|
---|
4852 | {
|
---|
4853 | *to = *from;
|
---|
4854 | }
|
---|
4855 |
|
---|
4856 | static void
|
---|
4857 | concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
|
---|
4858 | {
|
---|
4859 | int exb_reach, exm_reach;
|
---|
4860 | OptAncInfo tanc;
|
---|
4861 |
|
---|
4862 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
|
---|
4863 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
4864 |
|
---|
4865 | if (add->exb.len > 0 && to->len.max == 0) {
|
---|
4866 | concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
|
---|
4867 | to->len.max, add->len.max);
|
---|
4868 | copy_opt_anc_info(&add->exb.anc, &tanc);
|
---|
4869 | }
|
---|
4870 |
|
---|
4871 | if (add->map.value > 0 && to->len.max == 0) {
|
---|
4872 | if (add->map.mmd.max == 0)
|
---|
4873 | add->map.anc.left_anchor |= to->anc.left_anchor;
|
---|
4874 | }
|
---|
4875 |
|
---|
4876 | exb_reach = to->exb.reach_end;
|
---|
4877 | exm_reach = to->exm.reach_end;
|
---|
4878 |
|
---|
4879 | if (add->len.max != 0)
|
---|
4880 | to->exb.reach_end = to->exm.reach_end = 0;
|
---|
4881 |
|
---|
4882 | if (add->exb.len > 0) {
|
---|
4883 | if (exb_reach) {
|
---|
4884 | concat_opt_exact_info(&to->exb, &add->exb, enc);
|
---|
4885 | clear_opt_exact_info(&add->exb);
|
---|
4886 | }
|
---|
4887 | else if (exm_reach) {
|
---|
4888 | concat_opt_exact_info(&to->exm, &add->exb, enc);
|
---|
4889 | clear_opt_exact_info(&add->exb);
|
---|
4890 | }
|
---|
4891 | }
|
---|
4892 | select_opt_exact_info(enc, &to->exm, &add->exb);
|
---|
4893 | select_opt_exact_info(enc, &to->exm, &add->exm);
|
---|
4894 |
|
---|
4895 | if (to->expr.len > 0) {
|
---|
4896 | if (add->len.max > 0) {
|
---|
4897 | if (to->expr.len > (int )add->len.max)
|
---|
4898 | to->expr.len = (int )add->len.max;
|
---|
4899 |
|
---|
4900 | if (to->expr.mmd.max == 0)
|
---|
4901 | select_opt_exact_info(enc, &to->exb, &to->expr);
|
---|
4902 | else
|
---|
4903 | select_opt_exact_info(enc, &to->exm, &to->expr);
|
---|
4904 | }
|
---|
4905 | }
|
---|
4906 | else if (add->expr.len > 0) {
|
---|
4907 | copy_opt_exact_info(&to->expr, &add->expr);
|
---|
4908 | }
|
---|
4909 |
|
---|
4910 | select_opt_map_info(&to->map, &add->map);
|
---|
4911 |
|
---|
4912 | add_mml(&to->len, &add->len);
|
---|
4913 | }
|
---|
4914 |
|
---|
4915 | static void
|
---|
4916 | alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
|
---|
4917 | {
|
---|
4918 | alt_merge_opt_anc_info (&to->anc, &add->anc);
|
---|
4919 | alt_merge_opt_exact_info(&to->exb, &add->exb, env);
|
---|
4920 | alt_merge_opt_exact_info(&to->exm, &add->exm, env);
|
---|
4921 | alt_merge_opt_exact_info(&to->expr, &add->expr, env);
|
---|
4922 | alt_merge_opt_map_info(env->enc, &to->map, &add->map);
|
---|
4923 |
|
---|
4924 | alt_merge_mml(&to->len, &add->len);
|
---|
4925 | }
|
---|
4926 |
|
---|
4927 |
|
---|
4928 | #define MAX_NODE_OPT_INFO_REF_COUNT 5
|
---|
4929 |
|
---|
4930 | static int
|
---|
4931 | optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
---|
4932 | {
|
---|
4933 | int type;
|
---|
4934 | int r = 0;
|
---|
4935 |
|
---|
4936 | clear_node_opt_info(opt);
|
---|
4937 | set_bound_node_opt_info(opt, &env->mmd);
|
---|
4938 |
|
---|
4939 | type = NTYPE(node);
|
---|
4940 | switch (type) {
|
---|
4941 | case NT_LIST:
|
---|
4942 | {
|
---|
4943 | OptEnv nenv;
|
---|
4944 | NodeOptInfo nopt;
|
---|
4945 | Node* nd = node;
|
---|
4946 |
|
---|
4947 | copy_opt_env(&nenv, env);
|
---|
4948 | do {
|
---|
4949 | r = optimize_node_left(NCAR(nd), &nopt, &nenv);
|
---|
4950 | if (r == 0) {
|
---|
4951 | add_mml(&nenv.mmd, &nopt.len);
|
---|
4952 | concat_left_node_opt_info(env->enc, opt, &nopt);
|
---|
4953 | }
|
---|
4954 | } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
4955 | }
|
---|
4956 | break;
|
---|
4957 |
|
---|
4958 | case NT_ALT:
|
---|
4959 | {
|
---|
4960 | NodeOptInfo nopt;
|
---|
4961 | Node* nd = node;
|
---|
4962 |
|
---|
4963 | do {
|
---|
4964 | r = optimize_node_left(NCAR(nd), &nopt, env);
|
---|
4965 | if (r == 0) {
|
---|
4966 | if (nd == node) copy_node_opt_info(opt, &nopt);
|
---|
4967 | else alt_merge_node_opt_info(opt, &nopt, env);
|
---|
4968 | }
|
---|
4969 | } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
4970 | }
|
---|
4971 | break;
|
---|
4972 |
|
---|
4973 | case NT_STR:
|
---|
4974 | {
|
---|
4975 | StrNode* sn = NSTR(node);
|
---|
4976 | OnigDistance slen = sn->end - sn->s;
|
---|
4977 | int is_raw = NSTRING_IS_RAW(node);
|
---|
4978 |
|
---|
4979 | if (! NSTRING_IS_AMBIG(node)) {
|
---|
4980 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
4981 | is_raw, env->enc);
|
---|
4982 | opt->exb.ignore_case = 0;
|
---|
4983 | if (slen > 0) {
|
---|
4984 | add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
|
---|
4985 | }
|
---|
4986 | set_mml(&opt->len, slen, slen);
|
---|
4987 | }
|
---|
4988 | else {
|
---|
4989 | OnigDistance max;
|
---|
4990 |
|
---|
4991 | if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
|
---|
4992 | int n = onigenc_strlen(env->enc, sn->s, sn->end);
|
---|
4993 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
|
---|
4994 | }
|
---|
4995 | else {
|
---|
4996 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
4997 | is_raw, env->enc);
|
---|
4998 | opt->exb.ignore_case = 1;
|
---|
4999 |
|
---|
5000 | if (slen > 0) {
|
---|
5001 | r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
|
---|
5002 | env->enc, env->case_fold_flag);
|
---|
5003 | if (r != 0) break;
|
---|
5004 | }
|
---|
5005 |
|
---|
5006 | max = slen;
|
---|
5007 | }
|
---|
5008 |
|
---|
5009 | set_mml(&opt->len, slen, max);
|
---|
5010 | }
|
---|
5011 |
|
---|
5012 | if ((OnigDistance )opt->exb.len == slen)
|
---|
5013 | opt->exb.reach_end = 1;
|
---|
5014 | }
|
---|
5015 | break;
|
---|
5016 |
|
---|
5017 | case NT_CCLASS:
|
---|
5018 | {
|
---|
5019 | int i, z;
|
---|
5020 | CClassNode* cc = NCCLASS(node);
|
---|
5021 |
|
---|
5022 | /* no need to check ignore case. (set in setup_tree()) */
|
---|
5023 |
|
---|
5024 | if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
|
---|
5025 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
5026 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
5027 |
|
---|
5028 | set_mml(&opt->len, min, max);
|
---|
5029 | }
|
---|
5030 | else {
|
---|
5031 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
5032 | z = BITSET_AT(cc->bs, i);
|
---|
5033 | if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
|
---|
5034 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
5035 | }
|
---|
5036 | }
|
---|
5037 | set_mml(&opt->len, 1, 1);
|
---|
5038 | }
|
---|
5039 | }
|
---|
5040 | break;
|
---|
5041 |
|
---|
5042 | case NT_CTYPE:
|
---|
5043 | {
|
---|
5044 | int i, min, max;
|
---|
5045 | int maxcode;
|
---|
5046 |
|
---|
5047 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
5048 |
|
---|
5049 | if (max == 1) {
|
---|
5050 | min = 1;
|
---|
5051 |
|
---|
5052 | maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
|
---|
5053 | switch (NCTYPE(node)->ctype) {
|
---|
5054 | case ONIGENC_CTYPE_WORD:
|
---|
5055 | if (NCTYPE(node)->not != 0) {
|
---|
5056 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
5057 | if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
|
---|
5058 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
5059 | }
|
---|
5060 | }
|
---|
5061 | }
|
---|
5062 | else {
|
---|
5063 | for (i = 0; i < maxcode; i++) {
|
---|
5064 | if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
|
---|
5065 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
5066 | }
|
---|
5067 | }
|
---|
5068 | }
|
---|
5069 | break;
|
---|
5070 | }
|
---|
5071 | }
|
---|
5072 | else {
|
---|
5073 | min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
5074 | }
|
---|
5075 | set_mml(&opt->len, min, max);
|
---|
5076 | }
|
---|
5077 | break;
|
---|
5078 |
|
---|
5079 | case NT_CANY:
|
---|
5080 | {
|
---|
5081 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
5082 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
5083 | set_mml(&opt->len, min, max);
|
---|
5084 | }
|
---|
5085 | break;
|
---|
5086 |
|
---|
5087 | case NT_ANCHOR:
|
---|
5088 | switch (NANCHOR(node)->type) {
|
---|
5089 | case ANCHOR_BEGIN_BUF:
|
---|
5090 | case ANCHOR_BEGIN_POSITION:
|
---|
5091 | case ANCHOR_BEGIN_LINE:
|
---|
5092 | case ANCHOR_END_BUF:
|
---|
5093 | case ANCHOR_SEMI_END_BUF:
|
---|
5094 | case ANCHOR_END_LINE:
|
---|
5095 | case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
|
---|
5096 | case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
|
---|
5097 | add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
|
---|
5098 | break;
|
---|
5099 |
|
---|
5100 | case ANCHOR_PREC_READ:
|
---|
5101 | {
|
---|
5102 | NodeOptInfo nopt;
|
---|
5103 |
|
---|
5104 | r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
|
---|
5105 | if (r == 0) {
|
---|
5106 | if (nopt.exb.len > 0)
|
---|
5107 | copy_opt_exact_info(&opt->expr, &nopt.exb);
|
---|
5108 | else if (nopt.exm.len > 0)
|
---|
5109 | copy_opt_exact_info(&opt->expr, &nopt.exm);
|
---|
5110 |
|
---|
5111 | opt->expr.reach_end = 0;
|
---|
5112 |
|
---|
5113 | if (nopt.map.value > 0)
|
---|
5114 | copy_opt_map_info(&opt->map, &nopt.map);
|
---|
5115 | }
|
---|
5116 | }
|
---|
5117 | break;
|
---|
5118 |
|
---|
5119 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
5120 | break;
|
---|
5121 | }
|
---|
5122 | break;
|
---|
5123 |
|
---|
5124 | case NT_BREF:
|
---|
5125 | {
|
---|
5126 | int i;
|
---|
5127 | int* backs;
|
---|
5128 | OnigDistance min, max, tmin, tmax;
|
---|
5129 | Node** nodes = SCANENV_MEM_NODES(env->scan_env);
|
---|
5130 | BRefNode* br = NBREF(node);
|
---|
5131 |
|
---|
5132 | if (br->state & NST_RECURSION) {
|
---|
5133 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
5134 | break;
|
---|
5135 | }
|
---|
5136 | backs = BACKREFS_P(br);
|
---|
5137 | r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
|
---|
5138 | if (r != 0) break;
|
---|
5139 | r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
|
---|
5140 | if (r != 0) break;
|
---|
5141 | for (i = 1; i < br->back_num; i++) {
|
---|
5142 | r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
|
---|
5143 | if (r != 0) break;
|
---|
5144 | r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
|
---|
5145 | if (r != 0) break;
|
---|
5146 | if (min > tmin) min = tmin;
|
---|
5147 | if (max < tmax) max = tmax;
|
---|
5148 | }
|
---|
5149 | if (r == 0) set_mml(&opt->len, min, max);
|
---|
5150 | }
|
---|
5151 | break;
|
---|
5152 |
|
---|
5153 | #ifdef USE_SUBEXP_CALL
|
---|
5154 | case NT_CALL:
|
---|
5155 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
5156 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
5157 | else {
|
---|
5158 | OnigOptionType save = env->options;
|
---|
5159 | env->options = NENCLOSE(NCALL(node)->target)->option;
|
---|
5160 | r = optimize_node_left(NCALL(node)->target, opt, env);
|
---|
5161 | env->options = save;
|
---|
5162 | }
|
---|
5163 | break;
|
---|
5164 | #endif
|
---|
5165 |
|
---|
5166 | case NT_QTFR:
|
---|
5167 | {
|
---|
5168 | int i;
|
---|
5169 | OnigDistance min, max;
|
---|
5170 | NodeOptInfo nopt;
|
---|
5171 | QtfrNode* qn = NQTFR(node);
|
---|
5172 |
|
---|
5173 | r = optimize_node_left(qn->target, &nopt, env);
|
---|
5174 | if (r) break;
|
---|
5175 |
|
---|
5176 | if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
|
---|
5177 | if (env->mmd.max == 0 &&
|
---|
5178 | NTYPE(qn->target) == NT_CANY && qn->greedy) {
|
---|
5179 | if (IS_MULTILINE(env->options))
|
---|
5180 | /* implicit anchor: /.*a/ ==> /\A.*a/ */
|
---|
5181 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
|
---|
5182 | else
|
---|
5183 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
|
---|
5184 | }
|
---|
5185 | }
|
---|
5186 | else {
|
---|
5187 | if (qn->lower > 0) {
|
---|
5188 | copy_node_opt_info(opt, &nopt);
|
---|
5189 | if (nopt.exb.len > 0) {
|
---|
5190 | if (nopt.exb.reach_end) {
|
---|
5191 | for (i = 2; i <= qn->lower &&
|
---|
5192 | ! is_full_opt_exact_info(&opt->exb); i++) {
|
---|
5193 | concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
|
---|
5194 | }
|
---|
5195 | if (i < qn->lower) {
|
---|
5196 | opt->exb.reach_end = 0;
|
---|
5197 | }
|
---|
5198 | }
|
---|
5199 | }
|
---|
5200 |
|
---|
5201 | if (qn->lower != qn->upper) {
|
---|
5202 | opt->exb.reach_end = 0;
|
---|
5203 | opt->exm.reach_end = 0;
|
---|
5204 | }
|
---|
5205 | if (qn->lower > 1)
|
---|
5206 | opt->exm.reach_end = 0;
|
---|
5207 | }
|
---|
5208 | }
|
---|
5209 |
|
---|
5210 | min = distance_multiply(nopt.len.min, qn->lower);
|
---|
5211 | if (IS_REPEAT_INFINITE(qn->upper))
|
---|
5212 | max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
|
---|
5213 | else
|
---|
5214 | max = distance_multiply(nopt.len.max, qn->upper);
|
---|
5215 |
|
---|
5216 | set_mml(&opt->len, min, max);
|
---|
5217 | }
|
---|
5218 | break;
|
---|
5219 |
|
---|
5220 | case NT_ENCLOSE:
|
---|
5221 | {
|
---|
5222 | EncloseNode* en = NENCLOSE(node);
|
---|
5223 |
|
---|
5224 | switch (en->type) {
|
---|
5225 | case ENCLOSE_OPTION:
|
---|
5226 | {
|
---|
5227 | OnigOptionType save = env->options;
|
---|
5228 |
|
---|
5229 | env->options = en->option;
|
---|
5230 | r = optimize_node_left(en->target, opt, env);
|
---|
5231 | env->options = save;
|
---|
5232 | }
|
---|
5233 | break;
|
---|
5234 |
|
---|
5235 | case ENCLOSE_MEMORY:
|
---|
5236 | #ifdef USE_SUBEXP_CALL
|
---|
5237 | en->opt_count++;
|
---|
5238 | if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
|
---|
5239 | OnigDistance min, max;
|
---|
5240 |
|
---|
5241 | min = 0;
|
---|
5242 | max = ONIG_INFINITE_DISTANCE;
|
---|
5243 | if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
|
---|
5244 | if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
|
---|
5245 | set_mml(&opt->len, min, max);
|
---|
5246 | }
|
---|
5247 | else
|
---|
5248 | #endif
|
---|
5249 | {
|
---|
5250 | r = optimize_node_left(en->target, opt, env);
|
---|
5251 |
|
---|
5252 | if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
|
---|
5253 | if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
|
---|
5254 | remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
|
---|
5255 | }
|
---|
5256 | }
|
---|
5257 | break;
|
---|
5258 |
|
---|
5259 | case ENCLOSE_STOP_BACKTRACK:
|
---|
5260 | case ENCLOSE_CONDITION:
|
---|
5261 | r = optimize_node_left(en->target, opt, env);
|
---|
5262 | break;
|
---|
5263 | }
|
---|
5264 | }
|
---|
5265 | break;
|
---|
5266 |
|
---|
5267 | default:
|
---|
5268 | #ifdef ONIG_DEBUG
|
---|
5269 | fprintf(stderr, "optimize_node_left: undefined node type %d\n",
|
---|
5270 | NTYPE(node));
|
---|
5271 | #endif
|
---|
5272 | r = ONIGERR_TYPE_BUG;
|
---|
5273 | break;
|
---|
5274 | }
|
---|
5275 |
|
---|
5276 | return r;
|
---|
5277 | }
|
---|
5278 |
|
---|
5279 | static int
|
---|
5280 | set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
|
---|
5281 | {
|
---|
5282 | int r;
|
---|
5283 | int allow_reverse;
|
---|
5284 |
|
---|
5285 | if (e->len == 0) return 0;
|
---|
5286 |
|
---|
5287 | reg->exact = (UChar* )xmalloc(e->len);
|
---|
5288 | CHECK_NULL_RETURN_MEMERR(reg->exact);
|
---|
5289 | xmemcpy(reg->exact, e->s, e->len);
|
---|
5290 | reg->exact_end = reg->exact + e->len;
|
---|
5291 |
|
---|
5292 | allow_reverse =
|
---|
5293 | ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
|
---|
5294 |
|
---|
5295 | if (e->ignore_case > 0) {
|
---|
5296 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
5297 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
5298 | reg->map, &(reg->int_map), 1);
|
---|
5299 | if (r == 0) {
|
---|
5300 | reg->optimize = (allow_reverse != 0
|
---|
5301 | ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
|
---|
5302 | }
|
---|
5303 | else {
|
---|
5304 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
5305 | }
|
---|
5306 | }
|
---|
5307 | else {
|
---|
5308 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
5309 | }
|
---|
5310 | }
|
---|
5311 | else {
|
---|
5312 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
5313 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
5314 | reg->map, &(reg->int_map), 0);
|
---|
5315 | if (r) return r;
|
---|
5316 |
|
---|
5317 | reg->optimize = (allow_reverse != 0
|
---|
5318 | ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
|
---|
5319 | }
|
---|
5320 | else {
|
---|
5321 | reg->optimize = ONIG_OPTIMIZE_EXACT;
|
---|
5322 | }
|
---|
5323 | }
|
---|
5324 |
|
---|
5325 | reg->dmin = e->mmd.min;
|
---|
5326 | reg->dmax = e->mmd.max;
|
---|
5327 |
|
---|
5328 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
5329 | reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
|
---|
5330 | }
|
---|
5331 |
|
---|
5332 | return 0;
|
---|
5333 | }
|
---|
5334 |
|
---|
5335 | static void
|
---|
5336 | set_optimize_map_info(regex_t* reg, OptMapInfo* m)
|
---|
5337 | {
|
---|
5338 | int i;
|
---|
5339 |
|
---|
5340 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
5341 | reg->map[i] = m->map[i];
|
---|
5342 |
|
---|
5343 | reg->optimize = ONIG_OPTIMIZE_MAP;
|
---|
5344 | reg->dmin = m->mmd.min;
|
---|
5345 | reg->dmax = m->mmd.max;
|
---|
5346 |
|
---|
5347 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
5348 | reg->threshold_len = (int )(reg->dmin + 1);
|
---|
5349 | }
|
---|
5350 | }
|
---|
5351 |
|
---|
5352 | static void
|
---|
5353 | set_sub_anchor(regex_t* reg, OptAncInfo* anc)
|
---|
5354 | {
|
---|
5355 | reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
|
---|
5356 | reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
|
---|
5357 | }
|
---|
5358 |
|
---|
5359 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
5360 | static void print_optimize_info(FILE* f, regex_t* reg);
|
---|
5361 | #endif
|
---|
5362 |
|
---|
5363 | static int
|
---|
5364 | set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
|
---|
5365 | {
|
---|
5366 |
|
---|
5367 | int r;
|
---|
5368 | NodeOptInfo opt;
|
---|
5369 | OptEnv env;
|
---|
5370 |
|
---|
5371 | env.enc = reg->enc;
|
---|
5372 | env.options = reg->options;
|
---|
5373 | env.case_fold_flag = reg->case_fold_flag;
|
---|
5374 | env.scan_env = scan_env;
|
---|
5375 | clear_mml(&env.mmd);
|
---|
5376 |
|
---|
5377 | r = optimize_node_left(node, &opt, &env);
|
---|
5378 | if (r) return r;
|
---|
5379 |
|
---|
5380 | reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
|
---|
5381 | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
|
---|
5382 | ANCHOR_LOOK_BEHIND);
|
---|
5383 |
|
---|
5384 | reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
|
---|
5385 | ANCHOR_PREC_READ_NOT);
|
---|
5386 |
|
---|
5387 | if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
|
---|
5388 | reg->anchor_dmin = opt.len.min;
|
---|
5389 | reg->anchor_dmax = opt.len.max;
|
---|
5390 | }
|
---|
5391 |
|
---|
5392 | if (opt.exb.len > 0 || opt.exm.len > 0) {
|
---|
5393 | select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
|
---|
5394 | if (opt.map.value > 0 &&
|
---|
5395 | comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
|
---|
5396 | goto set_map;
|
---|
5397 | }
|
---|
5398 | else {
|
---|
5399 | r = set_optimize_exact_info(reg, &opt.exb);
|
---|
5400 | set_sub_anchor(reg, &opt.exb.anc);
|
---|
5401 | }
|
---|
5402 | }
|
---|
5403 | else if (opt.map.value > 0) {
|
---|
5404 | set_map:
|
---|
5405 | set_optimize_map_info(reg, &opt.map);
|
---|
5406 | set_sub_anchor(reg, &opt.map.anc);
|
---|
5407 | }
|
---|
5408 | else {
|
---|
5409 | reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
|
---|
5410 | if (opt.len.max == 0)
|
---|
5411 | reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
|
---|
5412 | }
|
---|
5413 |
|
---|
5414 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
5415 | print_optimize_info(stderr, reg);
|
---|
5416 | #endif
|
---|
5417 | return r;
|
---|
5418 | }
|
---|
5419 |
|
---|
5420 | static void
|
---|
5421 | clear_optimize_info(regex_t* reg)
|
---|
5422 | {
|
---|
5423 | reg->optimize = ONIG_OPTIMIZE_NONE;
|
---|
5424 | reg->anchor = 0;
|
---|
5425 | reg->anchor_dmin = 0;
|
---|
5426 | reg->anchor_dmax = 0;
|
---|
5427 | reg->sub_anchor = 0;
|
---|
5428 | reg->exact_end = (UChar* )NULL;
|
---|
5429 | reg->threshold_len = 0;
|
---|
5430 | if (IS_NOT_NULL(reg->exact)) {
|
---|
5431 | xfree(reg->exact);
|
---|
5432 | reg->exact = (UChar* )NULL;
|
---|
5433 | }
|
---|
5434 | }
|
---|
5435 |
|
---|
5436 | #ifdef ONIG_DEBUG
|
---|
5437 |
|
---|
5438 | static void print_enc_string(FILE* fp, OnigEncoding enc,
|
---|
5439 | const UChar *s, const UChar *end)
|
---|
5440 | {
|
---|
5441 | fprintf(fp, "\nPATTERN: /");
|
---|
5442 |
|
---|
5443 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
5444 | const UChar *p;
|
---|
5445 | OnigCodePoint code;
|
---|
5446 |
|
---|
5447 | p = s;
|
---|
5448 | while (p < end) {
|
---|
5449 | code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
---|
5450 | if (code >= 0x80) {
|
---|
5451 | fprintf(fp, " 0x%04x ", (int )code);
|
---|
5452 | }
|
---|
5453 | else {
|
---|
5454 | fputc((int )code, fp);
|
---|
5455 | }
|
---|
5456 |
|
---|
5457 | p += enclen(enc, p);
|
---|
5458 | }
|
---|
5459 | }
|
---|
5460 | else {
|
---|
5461 | while (s < end) {
|
---|
5462 | fputc((int )*s, fp);
|
---|
5463 | s++;
|
---|
5464 | }
|
---|
5465 | }
|
---|
5466 |
|
---|
5467 | fprintf(fp, "/ (%s)\n", enc->name);
|
---|
5468 | }
|
---|
5469 | #endif /* ONIG_DEBUG */
|
---|
5470 |
|
---|
5471 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
5472 | static void
|
---|
5473 | print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
|
---|
5474 | {
|
---|
5475 | if (a == ONIG_INFINITE_DISTANCE)
|
---|
5476 | fputs("inf", f);
|
---|
5477 | else
|
---|
5478 | fprintf(f, "(%"PRIuPTR")", a);
|
---|
5479 |
|
---|
5480 | fputs("-", f);
|
---|
5481 |
|
---|
5482 | if (b == ONIG_INFINITE_DISTANCE)
|
---|
5483 | fputs("inf", f);
|
---|
5484 | else
|
---|
5485 | fprintf(f, "(%"PRIuPTR")", b);
|
---|
5486 | }
|
---|
5487 |
|
---|
5488 | static void
|
---|
5489 | print_anchor(FILE* f, int anchor)
|
---|
5490 | {
|
---|
5491 | int q = 0;
|
---|
5492 |
|
---|
5493 | fprintf(f, "[");
|
---|
5494 |
|
---|
5495 | if (anchor & ANCHOR_BEGIN_BUF) {
|
---|
5496 | fprintf(f, "begin-buf");
|
---|
5497 | q = 1;
|
---|
5498 | }
|
---|
5499 | if (anchor & ANCHOR_BEGIN_LINE) {
|
---|
5500 | if (q) fprintf(f, ", ");
|
---|
5501 | q = 1;
|
---|
5502 | fprintf(f, "begin-line");
|
---|
5503 | }
|
---|
5504 | if (anchor & ANCHOR_BEGIN_POSITION) {
|
---|
5505 | if (q) fprintf(f, ", ");
|
---|
5506 | q = 1;
|
---|
5507 | fprintf(f, "begin-pos");
|
---|
5508 | }
|
---|
5509 | if (anchor & ANCHOR_END_BUF) {
|
---|
5510 | if (q) fprintf(f, ", ");
|
---|
5511 | q = 1;
|
---|
5512 | fprintf(f, "end-buf");
|
---|
5513 | }
|
---|
5514 | if (anchor & ANCHOR_SEMI_END_BUF) {
|
---|
5515 | if (q) fprintf(f, ", ");
|
---|
5516 | q = 1;
|
---|
5517 | fprintf(f, "semi-end-buf");
|
---|
5518 | }
|
---|
5519 | if (anchor & ANCHOR_END_LINE) {
|
---|
5520 | if (q) fprintf(f, ", ");
|
---|
5521 | q = 1;
|
---|
5522 | fprintf(f, "end-line");
|
---|
5523 | }
|
---|
5524 | if (anchor & ANCHOR_ANYCHAR_STAR) {
|
---|
5525 | if (q) fprintf(f, ", ");
|
---|
5526 | q = 1;
|
---|
5527 | fprintf(f, "anychar-star");
|
---|
5528 | }
|
---|
5529 | if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
|
---|
5530 | if (q) fprintf(f, ", ");
|
---|
5531 | fprintf(f, "anychar-star-ml");
|
---|
5532 | }
|
---|
5533 |
|
---|
5534 | fprintf(f, "]");
|
---|
5535 | }
|
---|
5536 |
|
---|
5537 | static void
|
---|
5538 | print_optimize_info(FILE* f, regex_t* reg)
|
---|
5539 | {
|
---|
5540 | static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
|
---|
5541 | "EXACT_IC", "MAP",
|
---|
5542 | "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
|
---|
5543 |
|
---|
5544 | fprintf(f, "optimize: %s\n", on[reg->optimize]);
|
---|
5545 | fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
|
---|
5546 | if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
|
---|
5547 | print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
|
---|
5548 | fprintf(f, "\n");
|
---|
5549 |
|
---|
5550 | if (reg->optimize) {
|
---|
5551 | fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
|
---|
5552 | fprintf(f, "\n");
|
---|
5553 | }
|
---|
5554 | fprintf(f, "\n");
|
---|
5555 |
|
---|
5556 | if (reg->exact) {
|
---|
5557 | UChar *p;
|
---|
5558 | fprintf(f, "exact: [");
|
---|
5559 | for (p = reg->exact; p < reg->exact_end; p++) {
|
---|
5560 | fputc(*p, f);
|
---|
5561 | }
|
---|
5562 | fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
|
---|
5563 | }
|
---|
5564 | else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
|
---|
5565 | int c, i, n = 0;
|
---|
5566 |
|
---|
5567 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
5568 | if (reg->map[i]) n++;
|
---|
5569 |
|
---|
5570 | fprintf(f, "map: n=%d\n", n);
|
---|
5571 | if (n > 0) {
|
---|
5572 | c = 0;
|
---|
5573 | fputc('[', f);
|
---|
5574 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
5575 | if (reg->map[i] != 0) {
|
---|
5576 | if (c > 0) fputs(", ", f);
|
---|
5577 | c++;
|
---|
5578 | if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
|
---|
5579 | ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
|
---|
5580 | fputc(i, f);
|
---|
5581 | else
|
---|
5582 | fprintf(f, "%d", i);
|
---|
5583 | }
|
---|
5584 | }
|
---|
5585 | fprintf(f, "]\n");
|
---|
5586 | }
|
---|
5587 | }
|
---|
5588 | }
|
---|
5589 | #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
|
---|
5590 |
|
---|
5591 |
|
---|
5592 | extern void
|
---|
5593 | onig_free_body(regex_t* reg)
|
---|
5594 | {
|
---|
5595 | if (IS_NOT_NULL(reg)) {
|
---|
5596 | if (IS_NOT_NULL(reg->p)) xfree(reg->p);
|
---|
5597 | if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
|
---|
5598 | if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
|
---|
5599 | if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
|
---|
5600 | if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
|
---|
5601 | if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
|
---|
5602 |
|
---|
5603 | #ifdef USE_NAMED_GROUP
|
---|
5604 | onig_names_free(reg);
|
---|
5605 | #endif
|
---|
5606 | }
|
---|
5607 | }
|
---|
5608 |
|
---|
5609 | extern void
|
---|
5610 | onig_free(regex_t* reg)
|
---|
5611 | {
|
---|
5612 | if (IS_NOT_NULL(reg)) {
|
---|
5613 | onig_free_body(reg);
|
---|
5614 | xfree(reg);
|
---|
5615 | }
|
---|
5616 | }
|
---|
5617 |
|
---|
5618 | size_t
|
---|
5619 | onig_memsize(const regex_t *reg)
|
---|
5620 | {
|
---|
5621 | size_t size = sizeof(regex_t);
|
---|
5622 | if (IS_NULL(reg)) return 0;
|
---|
5623 | if (IS_NOT_NULL(reg->p)) size += reg->alloc;
|
---|
5624 | if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
|
---|
5625 | if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
5626 | if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
5627 | if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
|
---|
5628 | if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
|
---|
5629 |
|
---|
5630 | return size;
|
---|
5631 | }
|
---|
5632 |
|
---|
5633 | size_t
|
---|
5634 | onig_region_memsize(const OnigRegion *regs)
|
---|
5635 | {
|
---|
5636 | size_t size = sizeof(*regs);
|
---|
5637 | if (IS_NULL(regs)) return 0;
|
---|
5638 | size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
|
---|
5639 | return size;
|
---|
5640 | }
|
---|
5641 |
|
---|
5642 | #define REGEX_TRANSFER(to,from) do {\
|
---|
5643 | (to)->state = ONIG_STATE_MODIFY;\
|
---|
5644 | onig_free_body(to);\
|
---|
5645 | xmemcpy(to, from, sizeof(regex_t));\
|
---|
5646 | xfree(from);\
|
---|
5647 | } while (0)
|
---|
5648 |
|
---|
5649 | extern void
|
---|
5650 | onig_transfer(regex_t* to, regex_t* from)
|
---|
5651 | {
|
---|
5652 | THREAD_ATOMIC_START;
|
---|
5653 | REGEX_TRANSFER(to, from);
|
---|
5654 | THREAD_ATOMIC_END;
|
---|
5655 | }
|
---|
5656 |
|
---|
5657 | #define REGEX_CHAIN_HEAD(reg) do {\
|
---|
5658 | while (IS_NOT_NULL((reg)->chain)) {\
|
---|
5659 | (reg) = (reg)->chain;\
|
---|
5660 | }\
|
---|
5661 | } while (0)
|
---|
5662 |
|
---|
5663 | extern void
|
---|
5664 | onig_chain_link_add(regex_t* to, regex_t* add)
|
---|
5665 | {
|
---|
5666 | THREAD_ATOMIC_START;
|
---|
5667 | REGEX_CHAIN_HEAD(to);
|
---|
5668 | to->chain = add;
|
---|
5669 | THREAD_ATOMIC_END;
|
---|
5670 | }
|
---|
5671 |
|
---|
5672 | extern void
|
---|
5673 | onig_chain_reduce(regex_t* reg)
|
---|
5674 | {
|
---|
5675 | regex_t *head, *prev;
|
---|
5676 |
|
---|
5677 | prev = reg;
|
---|
5678 | head = prev->chain;
|
---|
5679 | if (IS_NOT_NULL(head)) {
|
---|
5680 | reg->state = ONIG_STATE_MODIFY;
|
---|
5681 | while (IS_NOT_NULL(head->chain)) {
|
---|
5682 | prev = head;
|
---|
5683 | head = head->chain;
|
---|
5684 | }
|
---|
5685 | prev->chain = (regex_t* )NULL;
|
---|
5686 | REGEX_TRANSFER(reg, head);
|
---|
5687 | }
|
---|
5688 | }
|
---|
5689 |
|
---|
5690 | #ifdef ONIG_DEBUG_COMPILE
|
---|
5691 | static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
|
---|
5692 | #endif
|
---|
5693 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
5694 | static void print_tree P_((FILE* f, Node* node));
|
---|
5695 | #endif
|
---|
5696 |
|
---|
5697 | extern int
|
---|
5698 | onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
5699 | OnigErrorInfo* einfo)
|
---|
5700 | {
|
---|
5701 | #define COMPILE_INIT_SIZE 20
|
---|
5702 |
|
---|
5703 | int r;
|
---|
5704 | OnigDistance init_size;
|
---|
5705 | Node* root;
|
---|
5706 | ScanEnv scan_env = {0};
|
---|
5707 | #ifdef USE_SUBEXP_CALL
|
---|
5708 | UnsetAddrList uslist;
|
---|
5709 | #endif
|
---|
5710 |
|
---|
5711 | if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
|
---|
5712 |
|
---|
5713 | reg->state = ONIG_STATE_COMPILING;
|
---|
5714 |
|
---|
5715 | #ifdef ONIG_DEBUG
|
---|
5716 | print_enc_string(stderr, reg->enc, pattern, pattern_end);
|
---|
5717 | #endif
|
---|
5718 |
|
---|
5719 | if (reg->alloc == 0) {
|
---|
5720 | init_size = (pattern_end - pattern) * 2;
|
---|
5721 | if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
|
---|
5722 | r = BBUF_INIT(reg, init_size);
|
---|
5723 | if (r != 0) goto end;
|
---|
5724 | }
|
---|
5725 | else
|
---|
5726 | reg->used = 0;
|
---|
5727 |
|
---|
5728 | reg->num_mem = 0;
|
---|
5729 | reg->num_repeat = 0;
|
---|
5730 | reg->num_null_check = 0;
|
---|
5731 | reg->repeat_range_alloc = 0;
|
---|
5732 | reg->repeat_range = (OnigRepeatRange* )NULL;
|
---|
5733 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
5734 | reg->num_comb_exp_check = 0;
|
---|
5735 | #endif
|
---|
5736 |
|
---|
5737 | r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
|
---|
5738 | if (r != 0) goto err;
|
---|
5739 |
|
---|
5740 | #ifdef USE_NAMED_GROUP
|
---|
5741 | /* mixed use named group and no-named group */
|
---|
5742 | if (scan_env.num_named > 0 &&
|
---|
5743 | IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
5744 | !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
5745 | if (scan_env.num_named != scan_env.num_mem)
|
---|
5746 | r = disable_noname_group_capture(&root, reg, &scan_env);
|
---|
5747 | else
|
---|
5748 | r = numbered_ref_check(root);
|
---|
5749 |
|
---|
5750 | if (r != 0) goto err;
|
---|
5751 | }
|
---|
5752 | #endif
|
---|
5753 |
|
---|
5754 | #ifdef USE_SUBEXP_CALL
|
---|
5755 | if (scan_env.num_call > 0) {
|
---|
5756 | r = unset_addr_list_init(&uslist, scan_env.num_call);
|
---|
5757 | if (r != 0) goto err;
|
---|
5758 | scan_env.unset_addr_list = &uslist;
|
---|
5759 | r = setup_subexp_call(root, &scan_env);
|
---|
5760 | if (r != 0) goto err_unset;
|
---|
5761 | r = subexp_recursive_check_trav(root, &scan_env);
|
---|
5762 | if (r < 0) goto err_unset;
|
---|
5763 | r = subexp_inf_recursive_check_trav(root, &scan_env);
|
---|
5764 | if (r != 0) goto err_unset;
|
---|
5765 |
|
---|
5766 | reg->num_call = scan_env.num_call;
|
---|
5767 | }
|
---|
5768 | else
|
---|
5769 | reg->num_call = 0;
|
---|
5770 | #endif
|
---|
5771 |
|
---|
5772 | r = setup_tree(root, reg, IN_ROOT, &scan_env);
|
---|
5773 | if (r != 0) goto err_unset;
|
---|
5774 |
|
---|
5775 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
5776 | print_tree(stderr, root);
|
---|
5777 | #endif
|
---|
5778 |
|
---|
5779 | reg->capture_history = scan_env.capture_history;
|
---|
5780 | reg->bt_mem_start = scan_env.bt_mem_start;
|
---|
5781 | reg->bt_mem_start |= reg->capture_history;
|
---|
5782 | if (IS_FIND_CONDITION(reg->options))
|
---|
5783 | BIT_STATUS_ON_ALL(reg->bt_mem_end);
|
---|
5784 | else {
|
---|
5785 | reg->bt_mem_end = scan_env.bt_mem_end;
|
---|
5786 | reg->bt_mem_end |= reg->capture_history;
|
---|
5787 | }
|
---|
5788 |
|
---|
5789 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
5790 | if (scan_env.backrefed_mem == 0
|
---|
5791 | #ifdef USE_SUBEXP_CALL
|
---|
5792 | || scan_env.num_call == 0
|
---|
5793 | #endif
|
---|
5794 | ) {
|
---|
5795 | setup_comb_exp_check(root, 0, &scan_env);
|
---|
5796 | #ifdef USE_SUBEXP_CALL
|
---|
5797 | if (scan_env.has_recursion != 0) {
|
---|
5798 | scan_env.num_comb_exp_check = 0;
|
---|
5799 | }
|
---|
5800 | else
|
---|
5801 | #endif
|
---|
5802 | if (scan_env.comb_exp_max_regnum > 0) {
|
---|
5803 | int i;
|
---|
5804 | for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
|
---|
5805 | if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
|
---|
5806 | scan_env.num_comb_exp_check = 0;
|
---|
5807 | break;
|
---|
5808 | }
|
---|
5809 | }
|
---|
5810 | }
|
---|
5811 | }
|
---|
5812 |
|
---|
5813 | reg->num_comb_exp_check = scan_env.num_comb_exp_check;
|
---|
5814 | #endif
|
---|
5815 |
|
---|
5816 | clear_optimize_info(reg);
|
---|
5817 | #ifndef ONIG_DONT_OPTIMIZE
|
---|
5818 | r = set_optimize_info_from_tree(root, reg, &scan_env);
|
---|
5819 | if (r != 0) goto err_unset;
|
---|
5820 | #endif
|
---|
5821 |
|
---|
5822 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
|
---|
5823 | xfree(scan_env.mem_nodes_dynamic);
|
---|
5824 | scan_env.mem_nodes_dynamic = (Node** )NULL;
|
---|
5825 | }
|
---|
5826 |
|
---|
5827 | r = compile_tree(root, reg);
|
---|
5828 | if (r == 0) {
|
---|
5829 | r = add_opcode(reg, OP_END);
|
---|
5830 | #ifdef USE_SUBEXP_CALL
|
---|
5831 | if (scan_env.num_call > 0) {
|
---|
5832 | r = unset_addr_list_fix(&uslist, reg);
|
---|
5833 | unset_addr_list_end(&uslist);
|
---|
5834 | if (r) goto err;
|
---|
5835 | }
|
---|
5836 | #endif
|
---|
5837 |
|
---|
5838 | if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
|
---|
5839 | reg->stack_pop_level = STACK_POP_LEVEL_ALL;
|
---|
5840 | else {
|
---|
5841 | if (reg->bt_mem_start != 0)
|
---|
5842 | reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
|
---|
5843 | else
|
---|
5844 | reg->stack_pop_level = STACK_POP_LEVEL_FREE;
|
---|
5845 | }
|
---|
5846 | }
|
---|
5847 | #ifdef USE_SUBEXP_CALL
|
---|
5848 | else if (scan_env.num_call > 0) {
|
---|
5849 | unset_addr_list_end(&uslist);
|
---|
5850 | }
|
---|
5851 | #endif
|
---|
5852 | onig_node_free(root);
|
---|
5853 |
|
---|
5854 | #ifdef ONIG_DEBUG_COMPILE
|
---|
5855 | #ifdef USE_NAMED_GROUP
|
---|
5856 | onig_print_names(stderr, reg);
|
---|
5857 | #endif
|
---|
5858 | print_compiled_byte_code_list(stderr, reg);
|
---|
5859 | #endif
|
---|
5860 |
|
---|
5861 | end:
|
---|
5862 | reg->state = ONIG_STATE_NORMAL;
|
---|
5863 | return r;
|
---|
5864 |
|
---|
5865 | err_unset:
|
---|
5866 | #ifdef USE_SUBEXP_CALL
|
---|
5867 | if (scan_env.num_call > 0) {
|
---|
5868 | unset_addr_list_end(&uslist);
|
---|
5869 | }
|
---|
5870 | #endif
|
---|
5871 | err:
|
---|
5872 | if (IS_NOT_NULL(scan_env.error)) {
|
---|
5873 | if (IS_NOT_NULL(einfo)) {
|
---|
5874 | einfo->enc = scan_env.enc;
|
---|
5875 | einfo->par = scan_env.error;
|
---|
5876 | einfo->par_end = scan_env.error_end;
|
---|
5877 | }
|
---|
5878 | }
|
---|
5879 |
|
---|
5880 | onig_node_free(root);
|
---|
5881 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
|
---|
5882 | xfree(scan_env.mem_nodes_dynamic);
|
---|
5883 | return r;
|
---|
5884 | }
|
---|
5885 |
|
---|
5886 | #ifdef USE_RECOMPILE_API
|
---|
5887 | extern int
|
---|
5888 | onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
5889 | OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
---|
5890 | OnigErrorInfo* einfo)
|
---|
5891 | {
|
---|
5892 | int r;
|
---|
5893 | regex_t *new_reg;
|
---|
5894 |
|
---|
5895 | r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
|
---|
5896 | if (r) return r;
|
---|
5897 | if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
---|
5898 | onig_transfer(reg, new_reg);
|
---|
5899 | }
|
---|
5900 | else {
|
---|
5901 | onig_chain_link_add(reg, new_reg);
|
---|
5902 | }
|
---|
5903 | return 0;
|
---|
5904 | }
|
---|
5905 | #endif
|
---|
5906 |
|
---|
5907 | static int onig_inited = 0;
|
---|
5908 |
|
---|
5909 | extern int
|
---|
5910 | onig_reg_init(regex_t* reg, OnigOptionType option,
|
---|
5911 | OnigCaseFoldType case_fold_flag,
|
---|
5912 | OnigEncoding enc, OnigSyntaxType* syntax)
|
---|
5913 | {
|
---|
5914 | if (! onig_inited)
|
---|
5915 | onig_init();
|
---|
5916 |
|
---|
5917 | if (IS_NULL(reg))
|
---|
5918 | return ONIGERR_INVALID_ARGUMENT;
|
---|
5919 |
|
---|
5920 | if (ONIGENC_IS_UNDEF(enc))
|
---|
5921 | return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
|
---|
5922 |
|
---|
5923 | if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
|
---|
5924 | == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
5925 | return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
|
---|
5926 | }
|
---|
5927 |
|
---|
5928 | (reg)->state = ONIG_STATE_MODIFY;
|
---|
5929 |
|
---|
5930 | if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
|
---|
5931 | option |= syntax->options;
|
---|
5932 | option &= ~ONIG_OPTION_SINGLELINE;
|
---|
5933 | }
|
---|
5934 | else
|
---|
5935 | option |= syntax->options;
|
---|
5936 |
|
---|
5937 | (reg)->enc = enc;
|
---|
5938 | (reg)->options = option;
|
---|
5939 | (reg)->syntax = syntax;
|
---|
5940 | (reg)->optimize = 0;
|
---|
5941 | (reg)->exact = (UChar* )NULL;
|
---|
5942 | (reg)->int_map = (int* )NULL;
|
---|
5943 | (reg)->int_map_backward = (int* )NULL;
|
---|
5944 | (reg)->chain = (regex_t* )NULL;
|
---|
5945 |
|
---|
5946 | (reg)->p = (UChar* )NULL;
|
---|
5947 | (reg)->alloc = 0;
|
---|
5948 | (reg)->used = 0;
|
---|
5949 | (reg)->name_table = (void* )NULL;
|
---|
5950 |
|
---|
5951 | (reg)->case_fold_flag = case_fold_flag;
|
---|
5952 | return 0;
|
---|
5953 | }
|
---|
5954 |
|
---|
5955 | extern int
|
---|
5956 | onig_new_without_alloc(regex_t* reg, const UChar* pattern,
|
---|
5957 | const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
|
---|
5958 | OnigSyntaxType* syntax, OnigErrorInfo* einfo)
|
---|
5959 | {
|
---|
5960 | int r;
|
---|
5961 |
|
---|
5962 | r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
5963 | if (r) return r;
|
---|
5964 |
|
---|
5965 | r = onig_compile(reg, pattern, pattern_end, einfo);
|
---|
5966 | return r;
|
---|
5967 | }
|
---|
5968 |
|
---|
5969 | extern int
|
---|
5970 | onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
---|
5971 | OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
---|
5972 | OnigErrorInfo* einfo)
|
---|
5973 | {
|
---|
5974 | int r;
|
---|
5975 |
|
---|
5976 | *reg = (regex_t* )xmalloc(sizeof(regex_t));
|
---|
5977 | if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
---|
5978 |
|
---|
5979 | r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
5980 | if (r) goto err;
|
---|
5981 |
|
---|
5982 | r = onig_compile(*reg, pattern, pattern_end, einfo);
|
---|
5983 | if (r) {
|
---|
5984 | err:
|
---|
5985 | onig_free(*reg);
|
---|
5986 | *reg = NULL;
|
---|
5987 | }
|
---|
5988 | return r;
|
---|
5989 | }
|
---|
5990 |
|
---|
5991 |
|
---|
5992 | extern int
|
---|
5993 | onig_init(void)
|
---|
5994 | {
|
---|
5995 | if (onig_inited != 0)
|
---|
5996 | return 0;
|
---|
5997 |
|
---|
5998 | THREAD_SYSTEM_INIT;
|
---|
5999 | THREAD_ATOMIC_START;
|
---|
6000 |
|
---|
6001 | onig_inited = 1;
|
---|
6002 |
|
---|
6003 | onigenc_init();
|
---|
6004 | /* onigenc_set_default_caseconv_table((UChar* )0); */
|
---|
6005 |
|
---|
6006 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
6007 | onig_statistics_init();
|
---|
6008 | #endif
|
---|
6009 |
|
---|
6010 | THREAD_ATOMIC_END;
|
---|
6011 | return 0;
|
---|
6012 | }
|
---|
6013 |
|
---|
6014 |
|
---|
6015 | static OnigEndCallListItemType* EndCallTop;
|
---|
6016 |
|
---|
6017 | extern void onig_add_end_call(void (*func)(void))
|
---|
6018 | {
|
---|
6019 | OnigEndCallListItemType* item;
|
---|
6020 |
|
---|
6021 | item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
|
---|
6022 | if (item == 0) return ;
|
---|
6023 |
|
---|
6024 | item->next = EndCallTop;
|
---|
6025 | item->func = func;
|
---|
6026 |
|
---|
6027 | EndCallTop = item;
|
---|
6028 | }
|
---|
6029 |
|
---|
6030 | static void
|
---|
6031 | exec_end_call_list(void)
|
---|
6032 | {
|
---|
6033 | OnigEndCallListItemType* prev;
|
---|
6034 | void (*func)(void);
|
---|
6035 |
|
---|
6036 | while (EndCallTop != 0) {
|
---|
6037 | func = EndCallTop->func;
|
---|
6038 | (*func)();
|
---|
6039 |
|
---|
6040 | prev = EndCallTop;
|
---|
6041 | EndCallTop = EndCallTop->next;
|
---|
6042 | xfree(prev);
|
---|
6043 | }
|
---|
6044 | }
|
---|
6045 |
|
---|
6046 | extern int
|
---|
6047 | onig_end(void)
|
---|
6048 | {
|
---|
6049 | THREAD_ATOMIC_START;
|
---|
6050 |
|
---|
6051 | exec_end_call_list();
|
---|
6052 |
|
---|
6053 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
6054 | onig_print_statistics(stderr);
|
---|
6055 | #endif
|
---|
6056 |
|
---|
6057 | #ifdef USE_SHARED_CCLASS_TABLE
|
---|
6058 | onig_free_shared_cclass_table();
|
---|
6059 | #endif
|
---|
6060 |
|
---|
6061 | #ifdef USE_PARSE_TREE_NODE_RECYCLE
|
---|
6062 | onig_free_node_list();
|
---|
6063 | #endif
|
---|
6064 |
|
---|
6065 | onig_inited = 0;
|
---|
6066 |
|
---|
6067 | THREAD_ATOMIC_END;
|
---|
6068 | THREAD_SYSTEM_END;
|
---|
6069 | return 0;
|
---|
6070 | }
|
---|
6071 |
|
---|
6072 | extern int
|
---|
6073 | onig_is_in_code_range(const UChar* p, OnigCodePoint code)
|
---|
6074 | {
|
---|
6075 | OnigCodePoint n, *data;
|
---|
6076 | OnigCodePoint low, high, x;
|
---|
6077 |
|
---|
6078 | GET_CODE_POINT(n, p);
|
---|
6079 | data = (OnigCodePoint* )p;
|
---|
6080 | data++;
|
---|
6081 |
|
---|
6082 | for (low = 0, high = n; low < high; ) {
|
---|
6083 | x = (low + high) >> 1;
|
---|
6084 | if (code > data[x * 2 + 1])
|
---|
6085 | low = x + 1;
|
---|
6086 | else
|
---|
6087 | high = x;
|
---|
6088 | }
|
---|
6089 |
|
---|
6090 | return ((low < n && code >= data[low * 2]) ? 1 : 0);
|
---|
6091 | }
|
---|
6092 |
|
---|
6093 | extern int
|
---|
6094 | onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
|
---|
6095 | {
|
---|
6096 | int found;
|
---|
6097 |
|
---|
6098 | if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
|
---|
6099 | if (IS_NULL(cc->mbuf)) {
|
---|
6100 | found = 0;
|
---|
6101 | }
|
---|
6102 | else {
|
---|
6103 | found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
|
---|
6104 | }
|
---|
6105 | }
|
---|
6106 | else {
|
---|
6107 | found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
|
---|
6108 | }
|
---|
6109 |
|
---|
6110 | if (IS_NCCLASS_NOT(cc))
|
---|
6111 | return !found;
|
---|
6112 | else
|
---|
6113 | return found;
|
---|
6114 | }
|
---|
6115 |
|
---|
6116 | extern int
|
---|
6117 | onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
|
---|
6118 | {
|
---|
6119 | int len;
|
---|
6120 |
|
---|
6121 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
6122 | len = 2;
|
---|
6123 | }
|
---|
6124 | else {
|
---|
6125 | len = ONIGENC_CODE_TO_MBCLEN(enc, code);
|
---|
6126 | }
|
---|
6127 | return onig_is_code_in_cc_len(len, code, cc);
|
---|
6128 | }
|
---|
6129 |
|
---|
6130 |
|
---|
6131 | #ifdef ONIG_DEBUG
|
---|
6132 |
|
---|
6133 | /* arguments type */
|
---|
6134 | #define ARG_SPECIAL -1
|
---|
6135 | #define ARG_NON 0
|
---|
6136 | #define ARG_RELADDR 1
|
---|
6137 | #define ARG_ABSADDR 2
|
---|
6138 | #define ARG_LENGTH 3
|
---|
6139 | #define ARG_MEMNUM 4
|
---|
6140 | #define ARG_OPTION 5
|
---|
6141 | #define ARG_STATE_CHECK 6
|
---|
6142 |
|
---|
6143 | OnigOpInfoType OnigOpInfo[] = {
|
---|
6144 | { OP_FINISH, "finish", ARG_NON },
|
---|
6145 | { OP_END, "end", ARG_NON },
|
---|
6146 | { OP_EXACT1, "exact1", ARG_SPECIAL },
|
---|
6147 | { OP_EXACT2, "exact2", ARG_SPECIAL },
|
---|
6148 | { OP_EXACT3, "exact3", ARG_SPECIAL },
|
---|
6149 | { OP_EXACT4, "exact4", ARG_SPECIAL },
|
---|
6150 | { OP_EXACT5, "exact5", ARG_SPECIAL },
|
---|
6151 | { OP_EXACTN, "exactn", ARG_SPECIAL },
|
---|
6152 | { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
|
---|
6153 | { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
|
---|
6154 | { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
|
---|
6155 | { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
|
---|
6156 | { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
|
---|
6157 | { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
|
---|
6158 | { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
|
---|
6159 | { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
|
---|
6160 | { OP_CCLASS, "cclass", ARG_SPECIAL },
|
---|
6161 | { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
|
---|
6162 | { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
|
---|
6163 | { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
|
---|
6164 | { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
|
---|
6165 | { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
|
---|
6166 | { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
|
---|
6167 | { OP_ANYCHAR, "anychar", ARG_NON },
|
---|
6168 | { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
|
---|
6169 | { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
|
---|
6170 | { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
|
---|
6171 | { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
|
---|
6172 | { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
|
---|
6173 | { OP_WORD, "word", ARG_NON },
|
---|
6174 | { OP_NOT_WORD, "not-word", ARG_NON },
|
---|
6175 | { OP_WORD_BOUND, "word-bound", ARG_NON },
|
---|
6176 | { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
|
---|
6177 | { OP_WORD_BEGIN, "word-begin", ARG_NON },
|
---|
6178 | { OP_WORD_END, "word-end", ARG_NON },
|
---|
6179 | { OP_ASCII_WORD, "ascii-word", ARG_NON },
|
---|
6180 | { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
|
---|
6181 | { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
|
---|
6182 | { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
|
---|
6183 | { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
|
---|
6184 | { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
|
---|
6185 | { OP_BEGIN_BUF, "begin-buf", ARG_NON },
|
---|
6186 | { OP_END_BUF, "end-buf", ARG_NON },
|
---|
6187 | { OP_BEGIN_LINE, "begin-line", ARG_NON },
|
---|
6188 | { OP_END_LINE, "end-line", ARG_NON },
|
---|
6189 | { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
|
---|
6190 | { OP_BEGIN_POSITION, "begin-position", ARG_NON },
|
---|
6191 | { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
|
---|
6192 | { OP_BACKREF1, "backref1", ARG_NON },
|
---|
6193 | { OP_BACKREF2, "backref2", ARG_NON },
|
---|
6194 | { OP_BACKREFN, "backrefn", ARG_MEMNUM },
|
---|
6195 | { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
|
---|
6196 | { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
|
---|
6197 | { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
|
---|
6198 | { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
|
---|
6199 | { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
|
---|
6200 | { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
|
---|
6201 | { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
|
---|
6202 | { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
|
---|
6203 | { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
|
---|
6204 | { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
|
---|
6205 | { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
|
---|
6206 | { OP_SET_OPTION, "set-option", ARG_OPTION },
|
---|
6207 | { OP_KEEP, "keep", ARG_NON },
|
---|
6208 | { OP_FAIL, "fail", ARG_NON },
|
---|
6209 | { OP_JUMP, "jump", ARG_RELADDR },
|
---|
6210 | { OP_PUSH, "push", ARG_RELADDR },
|
---|
6211 | { OP_POP, "pop", ARG_NON },
|
---|
6212 | { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
|
---|
6213 | { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
|
---|
6214 | { OP_REPEAT, "repeat", ARG_SPECIAL },
|
---|
6215 | { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
|
---|
6216 | { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
|
---|
6217 | { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
|
---|
6218 | { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
|
---|
6219 | { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
|
---|
6220 | { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
|
---|
6221 | { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
|
---|
6222 | { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
|
---|
6223 | { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
|
---|
6224 | { OP_PUSH_POS, "push-pos", ARG_NON },
|
---|
6225 | { OP_POP_POS, "pop-pos", ARG_NON },
|
---|
6226 | { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
|
---|
6227 | { OP_FAIL_POS, "fail-pos", ARG_NON },
|
---|
6228 | { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
|
---|
6229 | { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
|
---|
6230 | { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
|
---|
6231 | { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
|
---|
6232 | { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
|
---|
6233 | { OP_CALL, "call", ARG_ABSADDR },
|
---|
6234 | { OP_RETURN, "return", ARG_NON },
|
---|
6235 | { OP_CONDITION, "condition", ARG_SPECIAL },
|
---|
6236 | { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
|
---|
6237 | { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
|
---|
6238 | { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
|
---|
6239 | { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
|
---|
6240 | { OP_STATE_CHECK_ANYCHAR_ML_STAR,
|
---|
6241 | "state-check-anychar-ml*", ARG_STATE_CHECK },
|
---|
6242 | { -1, "", ARG_NON }
|
---|
6243 | };
|
---|
6244 |
|
---|
6245 | static const char*
|
---|
6246 | op2name(int opcode)
|
---|
6247 | {
|
---|
6248 | int i;
|
---|
6249 |
|
---|
6250 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
6251 | if (opcode == OnigOpInfo[i].opcode)
|
---|
6252 | return OnigOpInfo[i].name;
|
---|
6253 | }
|
---|
6254 | return "";
|
---|
6255 | }
|
---|
6256 |
|
---|
6257 | static int
|
---|
6258 | op2arg_type(int opcode)
|
---|
6259 | {
|
---|
6260 | int i;
|
---|
6261 |
|
---|
6262 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
6263 | if (opcode == OnigOpInfo[i].opcode)
|
---|
6264 | return OnigOpInfo[i].arg_type;
|
---|
6265 | }
|
---|
6266 | return ARG_SPECIAL;
|
---|
6267 | }
|
---|
6268 |
|
---|
6269 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
6270 | static void
|
---|
6271 | Indent(FILE* f, int indent)
|
---|
6272 | {
|
---|
6273 | int i;
|
---|
6274 | for (i = 0; i < indent; i++) putc(' ', f);
|
---|
6275 | }
|
---|
6276 | #endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
6277 |
|
---|
6278 | static void
|
---|
6279 | p_string(FILE* f, ptrdiff_t len, UChar* s)
|
---|
6280 | {
|
---|
6281 | fputs(":", f);
|
---|
6282 | while (len-- > 0) { fputc(*s++, f); }
|
---|
6283 | }
|
---|
6284 |
|
---|
6285 | static void
|
---|
6286 | p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
|
---|
6287 | {
|
---|
6288 | int x = len * mb_len;
|
---|
6289 |
|
---|
6290 | fprintf(f, ":%d:", len);
|
---|
6291 | while (x-- > 0) { fputc(*s++, f); }
|
---|
6292 | }
|
---|
6293 |
|
---|
6294 | extern void
|
---|
6295 | onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
|
---|
6296 | OnigEncoding enc)
|
---|
6297 | {
|
---|
6298 | int i, n, arg_type;
|
---|
6299 | RelAddrType addr;
|
---|
6300 | LengthType len;
|
---|
6301 | MemNumType mem;
|
---|
6302 | StateCheckNumType scn;
|
---|
6303 | OnigCodePoint code;
|
---|
6304 | UChar *q;
|
---|
6305 |
|
---|
6306 | fprintf(f, "[%s", op2name(*bp));
|
---|
6307 | arg_type = op2arg_type(*bp);
|
---|
6308 | if (arg_type != ARG_SPECIAL) {
|
---|
6309 | bp++;
|
---|
6310 | switch (arg_type) {
|
---|
6311 | case ARG_NON:
|
---|
6312 | break;
|
---|
6313 | case ARG_RELADDR:
|
---|
6314 | GET_RELADDR_INC(addr, bp);
|
---|
6315 | fprintf(f, ":(+%d)", addr);
|
---|
6316 | break;
|
---|
6317 | case ARG_ABSADDR:
|
---|
6318 | GET_ABSADDR_INC(addr, bp);
|
---|
6319 | fprintf(f, ":(%d)", addr);
|
---|
6320 | break;
|
---|
6321 | case ARG_LENGTH:
|
---|
6322 | GET_LENGTH_INC(len, bp);
|
---|
6323 | fprintf(f, ":%d", len);
|
---|
6324 | break;
|
---|
6325 | case ARG_MEMNUM:
|
---|
6326 | mem = *((MemNumType* )bp);
|
---|
6327 | bp += SIZE_MEMNUM;
|
---|
6328 | fprintf(f, ":%d", mem);
|
---|
6329 | break;
|
---|
6330 | case ARG_OPTION:
|
---|
6331 | {
|
---|
6332 | OnigOptionType option = *((OnigOptionType* )bp);
|
---|
6333 | bp += SIZE_OPTION;
|
---|
6334 | fprintf(f, ":%d", option);
|
---|
6335 | }
|
---|
6336 | break;
|
---|
6337 |
|
---|
6338 | case ARG_STATE_CHECK:
|
---|
6339 | scn = *((StateCheckNumType* )bp);
|
---|
6340 | bp += SIZE_STATE_CHECK_NUM;
|
---|
6341 | fprintf(f, ":%d", scn);
|
---|
6342 | break;
|
---|
6343 | }
|
---|
6344 | }
|
---|
6345 | else {
|
---|
6346 | switch (*bp++) {
|
---|
6347 | case OP_EXACT1:
|
---|
6348 | case OP_ANYCHAR_STAR_PEEK_NEXT:
|
---|
6349 | case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
|
---|
6350 | p_string(f, 1, bp++); break;
|
---|
6351 | case OP_EXACT2:
|
---|
6352 | p_string(f, 2, bp); bp += 2; break;
|
---|
6353 | case OP_EXACT3:
|
---|
6354 | p_string(f, 3, bp); bp += 3; break;
|
---|
6355 | case OP_EXACT4:
|
---|
6356 | p_string(f, 4, bp); bp += 4; break;
|
---|
6357 | case OP_EXACT5:
|
---|
6358 | p_string(f, 5, bp); bp += 5; break;
|
---|
6359 | case OP_EXACTN:
|
---|
6360 | GET_LENGTH_INC(len, bp);
|
---|
6361 | p_len_string(f, len, 1, bp);
|
---|
6362 | bp += len;
|
---|
6363 | break;
|
---|
6364 |
|
---|
6365 | case OP_EXACTMB2N1:
|
---|
6366 | p_string(f, 2, bp); bp += 2; break;
|
---|
6367 | case OP_EXACTMB2N2:
|
---|
6368 | p_string(f, 4, bp); bp += 4; break;
|
---|
6369 | case OP_EXACTMB2N3:
|
---|
6370 | p_string(f, 6, bp); bp += 6; break;
|
---|
6371 | case OP_EXACTMB2N:
|
---|
6372 | GET_LENGTH_INC(len, bp);
|
---|
6373 | p_len_string(f, len, 2, bp);
|
---|
6374 | bp += len * 2;
|
---|
6375 | break;
|
---|
6376 | case OP_EXACTMB3N:
|
---|
6377 | GET_LENGTH_INC(len, bp);
|
---|
6378 | p_len_string(f, len, 3, bp);
|
---|
6379 | bp += len * 3;
|
---|
6380 | break;
|
---|
6381 | case OP_EXACTMBN:
|
---|
6382 | {
|
---|
6383 | int mb_len;
|
---|
6384 |
|
---|
6385 | GET_LENGTH_INC(mb_len, bp);
|
---|
6386 | GET_LENGTH_INC(len, bp);
|
---|
6387 | fprintf(f, ":%d:%d:", mb_len, len);
|
---|
6388 | n = len * mb_len;
|
---|
6389 | while (n-- > 0) { fputc(*bp++, f); }
|
---|
6390 | }
|
---|
6391 | break;
|
---|
6392 |
|
---|
6393 | case OP_EXACT1_IC:
|
---|
6394 | len = enclen(enc, bp);
|
---|
6395 | p_string(f, len, bp);
|
---|
6396 | bp += len;
|
---|
6397 | break;
|
---|
6398 | case OP_EXACTN_IC:
|
---|
6399 | GET_LENGTH_INC(len, bp);
|
---|
6400 | p_len_string(f, len, 1, bp);
|
---|
6401 | bp += len;
|
---|
6402 | break;
|
---|
6403 |
|
---|
6404 | case OP_CCLASS:
|
---|
6405 | n = bitset_on_num((BitSetRef )bp);
|
---|
6406 | bp += SIZE_BITSET;
|
---|
6407 | fprintf(f, ":%d", n);
|
---|
6408 | break;
|
---|
6409 |
|
---|
6410 | case OP_CCLASS_NOT:
|
---|
6411 | n = bitset_on_num((BitSetRef )bp);
|
---|
6412 | bp += SIZE_BITSET;
|
---|
6413 | fprintf(f, ":%d", n);
|
---|
6414 | break;
|
---|
6415 |
|
---|
6416 | case OP_CCLASS_MB:
|
---|
6417 | case OP_CCLASS_MB_NOT:
|
---|
6418 | GET_LENGTH_INC(len, bp);
|
---|
6419 | q = bp;
|
---|
6420 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
6421 | ALIGNMENT_RIGHT(q);
|
---|
6422 | #endif
|
---|
6423 | GET_CODE_POINT(code, q);
|
---|
6424 | bp += len;
|
---|
6425 | fprintf(f, ":%d:%d", (int )code, len);
|
---|
6426 | break;
|
---|
6427 |
|
---|
6428 | case OP_CCLASS_MIX:
|
---|
6429 | case OP_CCLASS_MIX_NOT:
|
---|
6430 | n = bitset_on_num((BitSetRef )bp);
|
---|
6431 | bp += SIZE_BITSET;
|
---|
6432 | GET_LENGTH_INC(len, bp);
|
---|
6433 | q = bp;
|
---|
6434 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
6435 | ALIGNMENT_RIGHT(q);
|
---|
6436 | #endif
|
---|
6437 | GET_CODE_POINT(code, q);
|
---|
6438 | bp += len;
|
---|
6439 | fprintf(f, ":%d:%d:%d", n, (int )code, len);
|
---|
6440 | break;
|
---|
6441 |
|
---|
6442 | case OP_CCLASS_NODE:
|
---|
6443 | {
|
---|
6444 | CClassNode *cc;
|
---|
6445 |
|
---|
6446 | GET_POINTER_INC(cc, bp);
|
---|
6447 | n = bitset_on_num(cc->bs);
|
---|
6448 | fprintf(f, ":%"PRIuPTR":%d", (uintptr_t )cc, n);
|
---|
6449 | }
|
---|
6450 | break;
|
---|
6451 |
|
---|
6452 | case OP_BACKREFN_IC:
|
---|
6453 | mem = *((MemNumType* )bp);
|
---|
6454 | bp += SIZE_MEMNUM;
|
---|
6455 | fprintf(f, ":%d", mem);
|
---|
6456 | break;
|
---|
6457 |
|
---|
6458 | case OP_BACKREF_MULTI_IC:
|
---|
6459 | case OP_BACKREF_MULTI:
|
---|
6460 | fputs(" ", f);
|
---|
6461 | GET_LENGTH_INC(len, bp);
|
---|
6462 | for (i = 0; i < len; i++) {
|
---|
6463 | GET_MEMNUM_INC(mem, bp);
|
---|
6464 | if (i > 0) fputs(", ", f);
|
---|
6465 | fprintf(f, "%d", mem);
|
---|
6466 | }
|
---|
6467 | break;
|
---|
6468 |
|
---|
6469 | case OP_BACKREF_WITH_LEVEL:
|
---|
6470 | {
|
---|
6471 | OnigOptionType option;
|
---|
6472 | LengthType level;
|
---|
6473 |
|
---|
6474 | GET_OPTION_INC(option, bp);
|
---|
6475 | fprintf(f, ":%d", option);
|
---|
6476 | GET_LENGTH_INC(level, bp);
|
---|
6477 | fprintf(f, ":%d", level);
|
---|
6478 |
|
---|
6479 | fputs(" ", f);
|
---|
6480 | GET_LENGTH_INC(len, bp);
|
---|
6481 | for (i = 0; i < len; i++) {
|
---|
6482 | GET_MEMNUM_INC(mem, bp);
|
---|
6483 | if (i > 0) fputs(", ", f);
|
---|
6484 | fprintf(f, "%d", mem);
|
---|
6485 | }
|
---|
6486 | }
|
---|
6487 | break;
|
---|
6488 |
|
---|
6489 | case OP_REPEAT:
|
---|
6490 | case OP_REPEAT_NG:
|
---|
6491 | {
|
---|
6492 | mem = *((MemNumType* )bp);
|
---|
6493 | bp += SIZE_MEMNUM;
|
---|
6494 | addr = *((RelAddrType* )bp);
|
---|
6495 | bp += SIZE_RELADDR;
|
---|
6496 | fprintf(f, ":%d:%d", mem, addr);
|
---|
6497 | }
|
---|
6498 | break;
|
---|
6499 |
|
---|
6500 | case OP_PUSH_OR_JUMP_EXACT1:
|
---|
6501 | case OP_PUSH_IF_PEEK_NEXT:
|
---|
6502 | addr = *((RelAddrType* )bp);
|
---|
6503 | bp += SIZE_RELADDR;
|
---|
6504 | fprintf(f, ":(%d)", addr);
|
---|
6505 | p_string(f, 1, bp);
|
---|
6506 | bp += 1;
|
---|
6507 | break;
|
---|
6508 |
|
---|
6509 | case OP_LOOK_BEHIND:
|
---|
6510 | GET_LENGTH_INC(len, bp);
|
---|
6511 | fprintf(f, ":%d", len);
|
---|
6512 | break;
|
---|
6513 |
|
---|
6514 | case OP_PUSH_LOOK_BEHIND_NOT:
|
---|
6515 | GET_RELADDR_INC(addr, bp);
|
---|
6516 | GET_LENGTH_INC(len, bp);
|
---|
6517 | fprintf(f, ":%d:(%d)", len, addr);
|
---|
6518 | break;
|
---|
6519 |
|
---|
6520 | case OP_STATE_CHECK_PUSH:
|
---|
6521 | case OP_STATE_CHECK_PUSH_OR_JUMP:
|
---|
6522 | scn = *((StateCheckNumType* )bp);
|
---|
6523 | bp += SIZE_STATE_CHECK_NUM;
|
---|
6524 | addr = *((RelAddrType* )bp);
|
---|
6525 | bp += SIZE_RELADDR;
|
---|
6526 | fprintf(f, ":%d:(%d)", scn, addr);
|
---|
6527 | break;
|
---|
6528 |
|
---|
6529 | case OP_CONDITION:
|
---|
6530 | GET_MEMNUM_INC(mem, bp);
|
---|
6531 | GET_RELADDR_INC(addr, bp);
|
---|
6532 | fprintf(f, ":%d:(%d)", mem, addr);
|
---|
6533 | break;
|
---|
6534 |
|
---|
6535 | default:
|
---|
6536 | fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
|
---|
6537 | *--bp);
|
---|
6538 | }
|
---|
6539 | }
|
---|
6540 | fputs("]", f);
|
---|
6541 | if (nextp) *nextp = bp;
|
---|
6542 | }
|
---|
6543 |
|
---|
6544 | #ifdef ONIG_DEBUG_COMPILE
|
---|
6545 | static void
|
---|
6546 | print_compiled_byte_code_list(FILE* f, regex_t* reg)
|
---|
6547 | {
|
---|
6548 | int ncode;
|
---|
6549 | UChar* bp = reg->p;
|
---|
6550 | UChar* end = reg->p + reg->used;
|
---|
6551 |
|
---|
6552 | fprintf(f, "code length: %d", reg->used);
|
---|
6553 |
|
---|
6554 | ncode = -1;
|
---|
6555 | while (bp < end) {
|
---|
6556 | ncode++;
|
---|
6557 | if (ncode % 5 == 0)
|
---|
6558 | fprintf(f, "\n%ld:", bp - reg->p);
|
---|
6559 | else
|
---|
6560 | fprintf(f, " %ld:", bp - reg->p);
|
---|
6561 | onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
|
---|
6562 | }
|
---|
6563 |
|
---|
6564 | fprintf(f, "\n");
|
---|
6565 | }
|
---|
6566 | #endif /* ONIG_DEBUG_COMPILE */
|
---|
6567 |
|
---|
6568 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
6569 | static void
|
---|
6570 | print_indent_tree(FILE* f, Node* node, int indent)
|
---|
6571 | {
|
---|
6572 | int i, type, container_p = 0;
|
---|
6573 | int add = 3;
|
---|
6574 | UChar* p;
|
---|
6575 |
|
---|
6576 | Indent(f, indent);
|
---|
6577 | if (IS_NULL(node)) {
|
---|
6578 | fprintf(f, "ERROR: null node!!!\n");
|
---|
6579 | exit (0);
|
---|
6580 | }
|
---|
6581 |
|
---|
6582 | type = NTYPE(node);
|
---|
6583 | switch (type) {
|
---|
6584 | case NT_LIST:
|
---|
6585 | case NT_ALT:
|
---|
6586 | if (NTYPE(node) == NT_LIST)
|
---|
6587 | fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
|
---|
6588 | else
|
---|
6589 | fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
|
---|
6590 |
|
---|
6591 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
6592 | while (IS_NOT_NULL(node = NCDR(node))) {
|
---|
6593 | if (NTYPE(node) != type) {
|
---|
6594 | fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
|
---|
6595 | exit(0);
|
---|
6596 | }
|
---|
6597 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
6598 | }
|
---|
6599 | break;
|
---|
6600 |
|
---|
6601 | case NT_STR:
|
---|
6602 | fprintf(f, "<string%s:%"PRIxPTR">",
|
---|
6603 | (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
|
---|
6604 | for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
|
---|
6605 | if (*p >= 0x20 && *p < 0x7f)
|
---|
6606 | fputc(*p, f);
|
---|
6607 | else {
|
---|
6608 | fprintf(f, " 0x%02x", *p);
|
---|
6609 | }
|
---|
6610 | }
|
---|
6611 | break;
|
---|
6612 |
|
---|
6613 | case NT_CCLASS:
|
---|
6614 | fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
|
---|
6615 | if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
|
---|
6616 | if (NCCLASS(node)->mbuf) {
|
---|
6617 | BBuf* bbuf = NCCLASS(node)->mbuf;
|
---|
6618 | for (i = 0; i < (int )bbuf->used; i++) {
|
---|
6619 | if (i > 0) fprintf(f, ",");
|
---|
6620 | fprintf(f, "%0x", bbuf->p[i]);
|
---|
6621 | }
|
---|
6622 | }
|
---|
6623 | break;
|
---|
6624 |
|
---|
6625 | case NT_CTYPE:
|
---|
6626 | fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
|
---|
6627 | switch (NCTYPE(node)->ctype) {
|
---|
6628 | case ONIGENC_CTYPE_WORD:
|
---|
6629 | if (NCTYPE(node)->not != 0)
|
---|
6630 | fputs("not word", f);
|
---|
6631 | else
|
---|
6632 | fputs("word", f);
|
---|
6633 | break;
|
---|
6634 |
|
---|
6635 | default:
|
---|
6636 | fprintf(f, "ERROR: undefined ctype.\n");
|
---|
6637 | exit(0);
|
---|
6638 | }
|
---|
6639 | break;
|
---|
6640 |
|
---|
6641 | case NT_CANY:
|
---|
6642 | fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
|
---|
6643 | break;
|
---|
6644 |
|
---|
6645 | case NT_ANCHOR:
|
---|
6646 | fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
|
---|
6647 | switch (NANCHOR(node)->type) {
|
---|
6648 | case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
|
---|
6649 | case ANCHOR_END_BUF: fputs("end buf", f); break;
|
---|
6650 | case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
|
---|
6651 | case ANCHOR_END_LINE: fputs("end line", f); break;
|
---|
6652 | case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
|
---|
6653 | case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
|
---|
6654 | case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
|
---|
6655 |
|
---|
6656 | case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
|
---|
6657 | case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
|
---|
6658 | #ifdef USE_WORD_BEGIN_END
|
---|
6659 | case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
|
---|
6660 | case ANCHOR_WORD_END: fputs("word end", f); break;
|
---|
6661 | #endif
|
---|
6662 | case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
|
---|
6663 | case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
|
---|
6664 | case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
|
---|
6665 | case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
|
---|
6666 | case ANCHOR_KEEP: fputs("keep",f); break;
|
---|
6667 |
|
---|
6668 | default:
|
---|
6669 | fprintf(f, "ERROR: undefined anchor type.\n");
|
---|
6670 | break;
|
---|
6671 | }
|
---|
6672 | break;
|
---|
6673 |
|
---|
6674 | case NT_BREF:
|
---|
6675 | {
|
---|
6676 | int* p;
|
---|
6677 | BRefNode* br = NBREF(node);
|
---|
6678 | p = BACKREFS_P(br);
|
---|
6679 | fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
|
---|
6680 | for (i = 0; i < br->back_num; i++) {
|
---|
6681 | if (i > 0) fputs(", ", f);
|
---|
6682 | fprintf(f, "%d", p[i]);
|
---|
6683 | }
|
---|
6684 | }
|
---|
6685 | break;
|
---|
6686 |
|
---|
6687 | #ifdef USE_SUBEXP_CALL
|
---|
6688 | case NT_CALL:
|
---|
6689 | {
|
---|
6690 | CallNode* cn = NCALL(node);
|
---|
6691 | fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
|
---|
6692 | p_string(f, cn->name_end - cn->name, cn->name);
|
---|
6693 | }
|
---|
6694 | break;
|
---|
6695 | #endif
|
---|
6696 |
|
---|
6697 | case NT_QTFR:
|
---|
6698 | fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
|
---|
6699 | NQTFR(node)->lower, NQTFR(node)->upper,
|
---|
6700 | (NQTFR(node)->greedy ? "" : "?"));
|
---|
6701 | print_indent_tree(f, NQTFR(node)->target, indent + add);
|
---|
6702 | break;
|
---|
6703 |
|
---|
6704 | case NT_ENCLOSE:
|
---|
6705 | fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
|
---|
6706 | switch (NENCLOSE(node)->type) {
|
---|
6707 | case ENCLOSE_OPTION:
|
---|
6708 | fprintf(f, "option:%d", NENCLOSE(node)->option);
|
---|
6709 | break;
|
---|
6710 | case ENCLOSE_MEMORY:
|
---|
6711 | fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
|
---|
6712 | break;
|
---|
6713 | case ENCLOSE_STOP_BACKTRACK:
|
---|
6714 | fprintf(f, "stop-bt");
|
---|
6715 | break;
|
---|
6716 | case ENCLOSE_CONDITION:
|
---|
6717 | fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
|
---|
6718 | break;
|
---|
6719 |
|
---|
6720 | default:
|
---|
6721 | break;
|
---|
6722 | }
|
---|
6723 | fprintf(f, "\n");
|
---|
6724 | print_indent_tree(f, NENCLOSE(node)->target, indent + add);
|
---|
6725 | break;
|
---|
6726 |
|
---|
6727 | default:
|
---|
6728 | fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
|
---|
6729 | break;
|
---|
6730 | }
|
---|
6731 |
|
---|
6732 | if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
|
---|
6733 | type != NT_ENCLOSE)
|
---|
6734 | fprintf(f, "\n");
|
---|
6735 |
|
---|
6736 | if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
|
---|
6737 |
|
---|
6738 | fflush(f);
|
---|
6739 | }
|
---|
6740 |
|
---|
6741 | static void
|
---|
6742 | print_tree(FILE* f, Node* node)
|
---|
6743 | {
|
---|
6744 | print_indent_tree(f, node, 0);
|
---|
6745 | }
|
---|
6746 | #endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
6747 | #endif /* ONIG_DEBUG */
|
---|