[279] | 1 | /**********************************************************************
|
---|
| 2 | regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
|
---|
| 3 | **********************************************************************/
|
---|
| 4 | /*-
|
---|
| 5 | * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
---|
[331] | 6 | * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
|
---|
[279] | 7 | * All rights reserved.
|
---|
| 8 | *
|
---|
| 9 | * Redistribution and use in source and binary forms, with or without
|
---|
| 10 | * modification, are permitted provided that the following conditions
|
---|
| 11 | * are met:
|
---|
| 12 | * 1. Redistributions of source code must retain the above copyright
|
---|
| 13 | * notice, this list of conditions and the following disclaimer.
|
---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
| 15 | * notice, this list of conditions and the following disclaimer in the
|
---|
| 16 | * documentation and/or other materials provided with the distribution.
|
---|
| 17 | *
|
---|
| 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
| 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
| 20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
| 21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
| 22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
| 23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
| 24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
| 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
| 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
| 27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
| 28 | * SUCH DAMAGE.
|
---|
| 29 | */
|
---|
| 30 |
|
---|
| 31 | #include "regparse.h"
|
---|
| 32 |
|
---|
| 33 | OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
|
---|
| 34 |
|
---|
| 35 | extern OnigCaseFoldType
|
---|
| 36 | onig_get_default_case_fold_flag(void)
|
---|
| 37 | {
|
---|
| 38 | return OnigDefaultCaseFoldFlag;
|
---|
| 39 | }
|
---|
| 40 |
|
---|
| 41 | extern int
|
---|
| 42 | onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
|
---|
| 43 | {
|
---|
| 44 | OnigDefaultCaseFoldFlag = case_fold_flag;
|
---|
| 45 | return 0;
|
---|
| 46 | }
|
---|
| 47 |
|
---|
| 48 |
|
---|
| 49 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 50 | static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
---|
| 51 | #endif
|
---|
| 52 |
|
---|
| 53 | #if 0
|
---|
| 54 | static UChar*
|
---|
| 55 | str_dup(UChar* s, UChar* end)
|
---|
| 56 | {
|
---|
| 57 | ptrdiff_t len = end - s;
|
---|
| 58 |
|
---|
| 59 | if (len > 0) {
|
---|
| 60 | UChar* r = (UChar* )xmalloc(len + 1);
|
---|
| 61 | CHECK_NULL_RETURN(r);
|
---|
| 62 | xmemcpy(r, s, len);
|
---|
| 63 | r[len] = (UChar )0;
|
---|
| 64 | return r;
|
---|
| 65 | }
|
---|
| 66 | else return NULL;
|
---|
| 67 | }
|
---|
| 68 | #endif
|
---|
| 69 |
|
---|
| 70 | static void
|
---|
| 71 | swap_node(Node* a, Node* b)
|
---|
| 72 | {
|
---|
| 73 | Node c;
|
---|
| 74 | c = *a; *a = *b; *b = c;
|
---|
| 75 |
|
---|
| 76 | if (NTYPE(a) == NT_STR) {
|
---|
| 77 | StrNode* sn = NSTR(a);
|
---|
| 78 | if (sn->capa == 0) {
|
---|
| 79 | size_t len = sn->end - sn->s;
|
---|
| 80 | sn->s = sn->buf;
|
---|
| 81 | sn->end = sn->s + len;
|
---|
| 82 | }
|
---|
| 83 | }
|
---|
| 84 |
|
---|
| 85 | if (NTYPE(b) == NT_STR) {
|
---|
| 86 | StrNode* sn = NSTR(b);
|
---|
| 87 | if (sn->capa == 0) {
|
---|
| 88 | size_t len = sn->end - sn->s;
|
---|
| 89 | sn->s = sn->buf;
|
---|
| 90 | sn->end = sn->s + len;
|
---|
| 91 | }
|
---|
| 92 | }
|
---|
| 93 | }
|
---|
| 94 |
|
---|
| 95 | static OnigDistance
|
---|
| 96 | distance_add(OnigDistance d1, OnigDistance d2)
|
---|
| 97 | {
|
---|
| 98 | if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
|
---|
| 99 | return ONIG_INFINITE_DISTANCE;
|
---|
| 100 | else {
|
---|
| 101 | if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
|
---|
| 102 | else return ONIG_INFINITE_DISTANCE;
|
---|
| 103 | }
|
---|
| 104 | }
|
---|
| 105 |
|
---|
| 106 | static OnigDistance
|
---|
| 107 | distance_multiply(OnigDistance d, int m)
|
---|
| 108 | {
|
---|
| 109 | if (m == 0) return 0;
|
---|
| 110 |
|
---|
| 111 | if (d < ONIG_INFINITE_DISTANCE / m)
|
---|
| 112 | return d * m;
|
---|
| 113 | else
|
---|
| 114 | return ONIG_INFINITE_DISTANCE;
|
---|
| 115 | }
|
---|
| 116 |
|
---|
| 117 | static int
|
---|
| 118 | bitset_is_empty(BitSetRef bs)
|
---|
| 119 | {
|
---|
| 120 | int i;
|
---|
| 121 | for (i = 0; i < BITSET_SIZE; i++) {
|
---|
| 122 | if (bs[i] != 0) return 0;
|
---|
| 123 | }
|
---|
| 124 | return 1;
|
---|
| 125 | }
|
---|
| 126 |
|
---|
| 127 | #ifdef ONIG_DEBUG
|
---|
| 128 | static int
|
---|
| 129 | bitset_on_num(BitSetRef bs)
|
---|
| 130 | {
|
---|
| 131 | int i, n;
|
---|
| 132 |
|
---|
| 133 | n = 0;
|
---|
| 134 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 135 | if (BITSET_AT(bs, i)) n++;
|
---|
| 136 | }
|
---|
| 137 | return n;
|
---|
| 138 | }
|
---|
| 139 | #endif
|
---|
| 140 |
|
---|
| 141 | extern int
|
---|
| 142 | onig_bbuf_init(BBuf* buf, OnigDistance size)
|
---|
| 143 | {
|
---|
| 144 | if (size <= 0) {
|
---|
| 145 | size = 0;
|
---|
| 146 | buf->p = NULL;
|
---|
| 147 | }
|
---|
| 148 | else {
|
---|
| 149 | buf->p = (UChar* )xmalloc(size);
|
---|
| 150 | if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
|
---|
| 151 | }
|
---|
| 152 |
|
---|
| 153 | buf->alloc = (unsigned int )size;
|
---|
| 154 | buf->used = 0;
|
---|
| 155 | return 0;
|
---|
| 156 | }
|
---|
| 157 |
|
---|
| 158 |
|
---|
| 159 | #ifdef USE_SUBEXP_CALL
|
---|
| 160 |
|
---|
| 161 | static int
|
---|
| 162 | unset_addr_list_init(UnsetAddrList* uslist, int size)
|
---|
| 163 | {
|
---|
| 164 | UnsetAddr* p;
|
---|
| 165 |
|
---|
| 166 | p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
|
---|
| 167 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 168 | uslist->num = 0;
|
---|
| 169 | uslist->alloc = size;
|
---|
| 170 | uslist->us = p;
|
---|
| 171 | return 0;
|
---|
| 172 | }
|
---|
| 173 |
|
---|
| 174 | static void
|
---|
| 175 | unset_addr_list_end(UnsetAddrList* uslist)
|
---|
| 176 | {
|
---|
| 177 | if (IS_NOT_NULL(uslist->us))
|
---|
| 178 | xfree(uslist->us);
|
---|
| 179 | }
|
---|
| 180 |
|
---|
| 181 | static int
|
---|
| 182 | unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
|
---|
| 183 | {
|
---|
| 184 | UnsetAddr* p;
|
---|
| 185 | int size;
|
---|
| 186 |
|
---|
| 187 | if (uslist->num >= uslist->alloc) {
|
---|
| 188 | size = uslist->alloc * 2;
|
---|
| 189 | p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
|
---|
| 190 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 191 | uslist->alloc = size;
|
---|
| 192 | uslist->us = p;
|
---|
| 193 | }
|
---|
| 194 |
|
---|
| 195 | uslist->us[uslist->num].offset = offset;
|
---|
| 196 | uslist->us[uslist->num].target = node;
|
---|
| 197 | uslist->num++;
|
---|
| 198 | return 0;
|
---|
| 199 | }
|
---|
| 200 | #endif /* USE_SUBEXP_CALL */
|
---|
| 201 |
|
---|
| 202 |
|
---|
| 203 | static int
|
---|
| 204 | add_opcode(regex_t* reg, int opcode)
|
---|
| 205 | {
|
---|
| 206 | BBUF_ADD1(reg, opcode);
|
---|
| 207 | return 0;
|
---|
| 208 | }
|
---|
| 209 |
|
---|
| 210 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 211 | static int
|
---|
| 212 | add_state_check_num(regex_t* reg, int num)
|
---|
| 213 | {
|
---|
| 214 | StateCheckNumType n = (StateCheckNumType )num;
|
---|
| 215 |
|
---|
| 216 | BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
|
---|
| 217 | return 0;
|
---|
| 218 | }
|
---|
| 219 | #endif
|
---|
| 220 |
|
---|
| 221 | static int
|
---|
| 222 | add_rel_addr(regex_t* reg, int addr)
|
---|
| 223 | {
|
---|
| 224 | RelAddrType ra = (RelAddrType )addr;
|
---|
| 225 |
|
---|
| 226 | BBUF_ADD(reg, &ra, SIZE_RELADDR);
|
---|
| 227 | return 0;
|
---|
| 228 | }
|
---|
| 229 |
|
---|
| 230 | static int
|
---|
| 231 | add_abs_addr(regex_t* reg, int addr)
|
---|
| 232 | {
|
---|
| 233 | AbsAddrType ra = (AbsAddrType )addr;
|
---|
| 234 |
|
---|
| 235 | BBUF_ADD(reg, &ra, SIZE_ABSADDR);
|
---|
| 236 | return 0;
|
---|
| 237 | }
|
---|
| 238 |
|
---|
| 239 | static int
|
---|
| 240 | add_length(regex_t* reg, OnigDistance len)
|
---|
| 241 | {
|
---|
| 242 | LengthType l = (LengthType )len;
|
---|
| 243 |
|
---|
| 244 | BBUF_ADD(reg, &l, SIZE_LENGTH);
|
---|
| 245 | return 0;
|
---|
| 246 | }
|
---|
| 247 |
|
---|
| 248 | static int
|
---|
| 249 | add_mem_num(regex_t* reg, int num)
|
---|
| 250 | {
|
---|
| 251 | MemNumType n = (MemNumType )num;
|
---|
| 252 |
|
---|
| 253 | BBUF_ADD(reg, &n, SIZE_MEMNUM);
|
---|
| 254 | return 0;
|
---|
| 255 | }
|
---|
| 256 |
|
---|
[331] | 257 | #if 0
|
---|
[279] | 258 | static int
|
---|
| 259 | add_pointer(regex_t* reg, void* addr)
|
---|
| 260 | {
|
---|
| 261 | PointerType ptr = (PointerType )addr;
|
---|
| 262 |
|
---|
| 263 | BBUF_ADD(reg, &ptr, SIZE_POINTER);
|
---|
| 264 | return 0;
|
---|
| 265 | }
|
---|
[331] | 266 | #endif
|
---|
[279] | 267 |
|
---|
| 268 | static int
|
---|
| 269 | add_option(regex_t* reg, OnigOptionType option)
|
---|
| 270 | {
|
---|
| 271 | BBUF_ADD(reg, &option, SIZE_OPTION);
|
---|
| 272 | return 0;
|
---|
| 273 | }
|
---|
| 274 |
|
---|
| 275 | static int
|
---|
| 276 | add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
|
---|
| 277 | {
|
---|
| 278 | int r;
|
---|
| 279 |
|
---|
| 280 | r = add_opcode(reg, opcode);
|
---|
| 281 | if (r) return r;
|
---|
| 282 | r = add_rel_addr(reg, addr);
|
---|
| 283 | return r;
|
---|
| 284 | }
|
---|
| 285 |
|
---|
| 286 | static int
|
---|
| 287 | add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
|
---|
| 288 | {
|
---|
| 289 | BBUF_ADD(reg, bytes, len);
|
---|
| 290 | return 0;
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | static int
|
---|
| 294 | add_bitset(regex_t* reg, BitSetRef bs)
|
---|
| 295 | {
|
---|
| 296 | BBUF_ADD(reg, bs, SIZE_BITSET);
|
---|
| 297 | return 0;
|
---|
| 298 | }
|
---|
| 299 |
|
---|
| 300 | static int
|
---|
| 301 | add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
|
---|
| 302 | {
|
---|
| 303 | int r;
|
---|
| 304 |
|
---|
| 305 | r = add_opcode(reg, opcode);
|
---|
| 306 | if (r) return r;
|
---|
| 307 | r = add_option(reg, option);
|
---|
| 308 | return r;
|
---|
| 309 | }
|
---|
| 310 |
|
---|
| 311 | static int compile_length_tree(Node* node, regex_t* reg);
|
---|
| 312 | static int compile_tree(Node* node, regex_t* reg);
|
---|
| 313 |
|
---|
| 314 |
|
---|
| 315 | #define IS_NEED_STR_LEN_OP_EXACT(op) \
|
---|
| 316 | ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
|
---|
| 317 | (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
|
---|
| 318 |
|
---|
| 319 | static int
|
---|
| 320 | select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
|
---|
| 321 | {
|
---|
| 322 | int op;
|
---|
| 323 | OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
|
---|
| 324 |
|
---|
| 325 | if (ignore_case) {
|
---|
| 326 | switch (str_len) {
|
---|
| 327 | case 1: op = OP_EXACT1_IC; break;
|
---|
| 328 | default: op = OP_EXACTN_IC; break;
|
---|
| 329 | }
|
---|
| 330 | }
|
---|
| 331 | else {
|
---|
| 332 | switch (mb_len) {
|
---|
| 333 | case 1:
|
---|
| 334 | switch (str_len) {
|
---|
| 335 | case 1: op = OP_EXACT1; break;
|
---|
| 336 | case 2: op = OP_EXACT2; break;
|
---|
| 337 | case 3: op = OP_EXACT3; break;
|
---|
| 338 | case 4: op = OP_EXACT4; break;
|
---|
| 339 | case 5: op = OP_EXACT5; break;
|
---|
| 340 | default: op = OP_EXACTN; break;
|
---|
| 341 | }
|
---|
| 342 | break;
|
---|
| 343 |
|
---|
| 344 | case 2:
|
---|
| 345 | switch (str_len) {
|
---|
| 346 | case 1: op = OP_EXACTMB2N1; break;
|
---|
| 347 | case 2: op = OP_EXACTMB2N2; break;
|
---|
| 348 | case 3: op = OP_EXACTMB2N3; break;
|
---|
| 349 | default: op = OP_EXACTMB2N; break;
|
---|
| 350 | }
|
---|
| 351 | break;
|
---|
| 352 |
|
---|
| 353 | case 3:
|
---|
| 354 | op = OP_EXACTMB3N;
|
---|
| 355 | break;
|
---|
| 356 |
|
---|
| 357 | default:
|
---|
| 358 | op = OP_EXACTMBN;
|
---|
| 359 | break;
|
---|
| 360 | }
|
---|
| 361 | }
|
---|
| 362 | return op;
|
---|
| 363 | }
|
---|
| 364 |
|
---|
| 365 | static int
|
---|
| 366 | compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
|
---|
| 367 | {
|
---|
| 368 | int r;
|
---|
| 369 | int saved_num_null_check = reg->num_null_check;
|
---|
| 370 |
|
---|
| 371 | if (empty_info != 0) {
|
---|
| 372 | r = add_opcode(reg, OP_NULL_CHECK_START);
|
---|
| 373 | if (r) return r;
|
---|
| 374 | r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
|
---|
| 375 | if (r) return r;
|
---|
| 376 | reg->num_null_check++;
|
---|
| 377 | }
|
---|
| 378 |
|
---|
| 379 | r = compile_tree(node, reg);
|
---|
| 380 | if (r) return r;
|
---|
| 381 |
|
---|
| 382 | if (empty_info != 0) {
|
---|
| 383 | if (empty_info == NQ_TARGET_IS_EMPTY)
|
---|
| 384 | r = add_opcode(reg, OP_NULL_CHECK_END);
|
---|
| 385 | else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
|
---|
| 386 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
|
---|
| 387 | else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
|
---|
| 388 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
|
---|
| 389 |
|
---|
| 390 | if (r) return r;
|
---|
| 391 | r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
|
---|
| 392 | }
|
---|
| 393 | return r;
|
---|
| 394 | }
|
---|
| 395 |
|
---|
| 396 | #ifdef USE_SUBEXP_CALL
|
---|
| 397 | static int
|
---|
| 398 | compile_call(CallNode* node, regex_t* reg)
|
---|
| 399 | {
|
---|
| 400 | int r;
|
---|
| 401 |
|
---|
| 402 | r = add_opcode(reg, OP_CALL);
|
---|
| 403 | if (r) return r;
|
---|
| 404 | r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
|
---|
| 405 | node->target);
|
---|
| 406 | if (r) return r;
|
---|
| 407 | r = add_abs_addr(reg, 0 /*dummy addr.*/);
|
---|
| 408 | return r;
|
---|
| 409 | }
|
---|
| 410 | #endif
|
---|
| 411 |
|
---|
| 412 | static int
|
---|
| 413 | compile_tree_n_times(Node* node, int n, regex_t* reg)
|
---|
| 414 | {
|
---|
| 415 | int i, r;
|
---|
| 416 |
|
---|
| 417 | for (i = 0; i < n; i++) {
|
---|
| 418 | r = compile_tree(node, reg);
|
---|
| 419 | if (r) return r;
|
---|
| 420 | }
|
---|
| 421 | return 0;
|
---|
| 422 | }
|
---|
| 423 |
|
---|
| 424 | static int
|
---|
| 425 | add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
|
---|
| 426 | regex_t* reg ARG_UNUSED, int ignore_case)
|
---|
| 427 | {
|
---|
| 428 | int len;
|
---|
| 429 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
| 430 |
|
---|
| 431 | len = SIZE_OPCODE;
|
---|
| 432 |
|
---|
| 433 | if (op == OP_EXACTMBN) len += SIZE_LENGTH;
|
---|
| 434 | if (IS_NEED_STR_LEN_OP_EXACT(op))
|
---|
| 435 | len += SIZE_LENGTH;
|
---|
| 436 |
|
---|
| 437 | len += (int )byte_len;
|
---|
| 438 | return len;
|
---|
| 439 | }
|
---|
| 440 |
|
---|
| 441 | static int
|
---|
| 442 | add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
|
---|
| 443 | regex_t* reg, int ignore_case)
|
---|
| 444 | {
|
---|
| 445 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
| 446 | add_opcode(reg, op);
|
---|
| 447 |
|
---|
| 448 | if (op == OP_EXACTMBN)
|
---|
| 449 | add_length(reg, mb_len);
|
---|
| 450 |
|
---|
| 451 | if (IS_NEED_STR_LEN_OP_EXACT(op)) {
|
---|
| 452 | if (op == OP_EXACTN_IC)
|
---|
| 453 | add_length(reg, byte_len);
|
---|
| 454 | else
|
---|
| 455 | add_length(reg, byte_len / mb_len);
|
---|
| 456 | }
|
---|
| 457 |
|
---|
| 458 | add_bytes(reg, s, byte_len);
|
---|
| 459 | return 0;
|
---|
| 460 | }
|
---|
| 461 |
|
---|
| 462 |
|
---|
| 463 | static int
|
---|
| 464 | compile_length_string_node(Node* node, regex_t* reg)
|
---|
| 465 | {
|
---|
| 466 | int rlen, r, len, prev_len, blen, ambig;
|
---|
| 467 | OnigEncoding enc = reg->enc;
|
---|
| 468 | UChar *p, *prev;
|
---|
| 469 | StrNode* sn;
|
---|
| 470 |
|
---|
| 471 | sn = NSTR(node);
|
---|
| 472 | if (sn->end <= sn->s)
|
---|
| 473 | return 0;
|
---|
| 474 |
|
---|
| 475 | ambig = NSTRING_IS_AMBIG(node);
|
---|
| 476 |
|
---|
| 477 | p = prev = sn->s;
|
---|
[331] | 478 | prev_len = enclen(enc, p, sn->end);
|
---|
[279] | 479 | p += prev_len;
|
---|
| 480 | blen = prev_len;
|
---|
| 481 | rlen = 0;
|
---|
| 482 |
|
---|
| 483 | for (; p < sn->end; ) {
|
---|
[331] | 484 | len = enclen(enc, p, sn->end);
|
---|
[279] | 485 | if (len == prev_len || ambig) {
|
---|
| 486 | blen += len;
|
---|
| 487 | }
|
---|
| 488 | else {
|
---|
| 489 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
| 490 | rlen += r;
|
---|
| 491 | prev = p;
|
---|
| 492 | blen = len;
|
---|
| 493 | prev_len = len;
|
---|
| 494 | }
|
---|
| 495 | p += len;
|
---|
| 496 | }
|
---|
| 497 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
| 498 | rlen += r;
|
---|
| 499 | return rlen;
|
---|
| 500 | }
|
---|
| 501 |
|
---|
| 502 | static int
|
---|
| 503 | compile_length_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
| 504 | {
|
---|
| 505 | if (sn->end <= sn->s)
|
---|
| 506 | return 0;
|
---|
| 507 |
|
---|
| 508 | return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
| 509 | }
|
---|
| 510 |
|
---|
| 511 | static int
|
---|
| 512 | compile_string_node(Node* node, regex_t* reg)
|
---|
| 513 | {
|
---|
| 514 | int r, len, prev_len, blen, ambig;
|
---|
| 515 | OnigEncoding enc = reg->enc;
|
---|
| 516 | UChar *p, *prev, *end;
|
---|
| 517 | StrNode* sn;
|
---|
| 518 |
|
---|
| 519 | sn = NSTR(node);
|
---|
| 520 | if (sn->end <= sn->s)
|
---|
| 521 | return 0;
|
---|
| 522 |
|
---|
| 523 | end = sn->end;
|
---|
| 524 | ambig = NSTRING_IS_AMBIG(node);
|
---|
| 525 |
|
---|
| 526 | p = prev = sn->s;
|
---|
[331] | 527 | prev_len = enclen(enc, p, end);
|
---|
[279] | 528 | p += prev_len;
|
---|
| 529 | blen = prev_len;
|
---|
| 530 |
|
---|
| 531 | for (; p < end; ) {
|
---|
[331] | 532 | len = enclen(enc, p, end);
|
---|
[279] | 533 | if (len == prev_len || ambig) {
|
---|
| 534 | blen += len;
|
---|
| 535 | }
|
---|
| 536 | else {
|
---|
| 537 | r = add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
| 538 | if (r) return r;
|
---|
| 539 |
|
---|
| 540 | prev = p;
|
---|
| 541 | blen = len;
|
---|
| 542 | prev_len = len;
|
---|
| 543 | }
|
---|
| 544 |
|
---|
| 545 | p += len;
|
---|
| 546 | }
|
---|
| 547 | return add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
| 548 | }
|
---|
| 549 |
|
---|
| 550 | static int
|
---|
| 551 | compile_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
| 552 | {
|
---|
| 553 | if (sn->end <= sn->s)
|
---|
| 554 | return 0;
|
---|
| 555 |
|
---|
| 556 | return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
| 557 | }
|
---|
| 558 |
|
---|
| 559 | static int
|
---|
| 560 | add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
|
---|
| 561 | {
|
---|
| 562 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 563 | add_length(reg, mbuf->used);
|
---|
| 564 | return add_bytes(reg, mbuf->p, mbuf->used);
|
---|
| 565 | #else
|
---|
| 566 | int r, pad_size;
|
---|
| 567 | UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
|
---|
| 568 |
|
---|
| 569 | GET_ALIGNMENT_PAD_SIZE(p, pad_size);
|
---|
| 570 | add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
|
---|
| 571 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
| 572 |
|
---|
| 573 | r = add_bytes(reg, mbuf->p, mbuf->used);
|
---|
| 574 |
|
---|
| 575 | /* padding for return value from compile_length_cclass_node() to be fix. */
|
---|
| 576 | pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
|
---|
| 577 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
| 578 | return r;
|
---|
| 579 | #endif
|
---|
| 580 | }
|
---|
| 581 |
|
---|
| 582 | static int
|
---|
| 583 | compile_length_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
| 584 | {
|
---|
| 585 | int len;
|
---|
| 586 |
|
---|
| 587 | if (IS_NULL(cc->mbuf)) {
|
---|
| 588 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
| 589 | }
|
---|
| 590 | else {
|
---|
| 591 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
| 592 | len = SIZE_OPCODE;
|
---|
| 593 | }
|
---|
| 594 | else {
|
---|
| 595 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
| 596 | }
|
---|
| 597 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 598 | len += SIZE_LENGTH + cc->mbuf->used;
|
---|
| 599 | #else
|
---|
| 600 | len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
|
---|
| 601 | #endif
|
---|
| 602 | }
|
---|
| 603 |
|
---|
| 604 | return len;
|
---|
| 605 | }
|
---|
| 606 |
|
---|
| 607 | static int
|
---|
| 608 | compile_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
| 609 | {
|
---|
| 610 | int r;
|
---|
| 611 |
|
---|
| 612 | if (IS_NULL(cc->mbuf)) {
|
---|
| 613 | if (IS_NCCLASS_NOT(cc))
|
---|
| 614 | add_opcode(reg, OP_CCLASS_NOT);
|
---|
| 615 | else
|
---|
| 616 | add_opcode(reg, OP_CCLASS);
|
---|
| 617 |
|
---|
| 618 | r = add_bitset(reg, cc->bs);
|
---|
| 619 | }
|
---|
| 620 | else {
|
---|
| 621 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
| 622 | if (IS_NCCLASS_NOT(cc))
|
---|
[331] | 623 | add_opcode(reg, OP_CCLASS_MB_NOT);
|
---|
[279] | 624 | else
|
---|
[331] | 625 | add_opcode(reg, OP_CCLASS_MB);
|
---|
[279] | 626 |
|
---|
| 627 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
| 628 | }
|
---|
| 629 | else {
|
---|
| 630 | if (IS_NCCLASS_NOT(cc))
|
---|
[331] | 631 | add_opcode(reg, OP_CCLASS_MIX_NOT);
|
---|
[279] | 632 | else
|
---|
[331] | 633 | add_opcode(reg, OP_CCLASS_MIX);
|
---|
[279] | 634 |
|
---|
| 635 | r = add_bitset(reg, cc->bs);
|
---|
| 636 | if (r) return r;
|
---|
| 637 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
| 638 | }
|
---|
| 639 | }
|
---|
| 640 |
|
---|
| 641 | return r;
|
---|
| 642 | }
|
---|
| 643 |
|
---|
| 644 | static int
|
---|
| 645 | entry_repeat_range(regex_t* reg, int id, int lower, int upper)
|
---|
| 646 | {
|
---|
| 647 | #define REPEAT_RANGE_ALLOC 4
|
---|
| 648 |
|
---|
| 649 | OnigRepeatRange* p;
|
---|
| 650 |
|
---|
| 651 | if (reg->repeat_range_alloc == 0) {
|
---|
| 652 | p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
|
---|
| 653 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 654 | reg->repeat_range = p;
|
---|
| 655 | reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
|
---|
| 656 | }
|
---|
| 657 | else if (reg->repeat_range_alloc <= id) {
|
---|
| 658 | int n;
|
---|
| 659 | n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
|
---|
| 660 | p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
|
---|
| 661 | sizeof(OnigRepeatRange) * n);
|
---|
| 662 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 663 | reg->repeat_range = p;
|
---|
| 664 | reg->repeat_range_alloc = n;
|
---|
| 665 | }
|
---|
| 666 | else {
|
---|
| 667 | p = reg->repeat_range;
|
---|
| 668 | }
|
---|
| 669 |
|
---|
| 670 | p[id].lower = lower;
|
---|
| 671 | p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
|
---|
| 672 | return 0;
|
---|
| 673 | }
|
---|
| 674 |
|
---|
| 675 | static int
|
---|
| 676 | compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
|
---|
| 677 | regex_t* reg)
|
---|
| 678 | {
|
---|
| 679 | int r;
|
---|
| 680 | int num_repeat = reg->num_repeat;
|
---|
| 681 |
|
---|
| 682 | r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
|
---|
| 683 | if (r) return r;
|
---|
| 684 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
| 685 | reg->num_repeat++;
|
---|
| 686 | if (r) return r;
|
---|
| 687 | r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
|
---|
| 688 | if (r) return r;
|
---|
| 689 |
|
---|
| 690 | r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
|
---|
| 691 | if (r) return r;
|
---|
| 692 |
|
---|
| 693 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 694 | if (r) return r;
|
---|
| 695 |
|
---|
| 696 | if (
|
---|
| 697 | #ifdef USE_SUBEXP_CALL
|
---|
| 698 | reg->num_call > 0 ||
|
---|
| 699 | #endif
|
---|
| 700 | IS_QUANTIFIER_IN_REPEAT(qn)) {
|
---|
| 701 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
|
---|
| 702 | }
|
---|
| 703 | else {
|
---|
| 704 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
|
---|
| 705 | }
|
---|
| 706 | if (r) return r;
|
---|
| 707 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
| 708 | return r;
|
---|
| 709 | }
|
---|
| 710 |
|
---|
| 711 | static int
|
---|
| 712 | is_anychar_star_quantifier(QtfrNode* qn)
|
---|
| 713 | {
|
---|
| 714 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
---|
| 715 | NTYPE(qn->target) == NT_CANY)
|
---|
| 716 | return 1;
|
---|
| 717 | else
|
---|
| 718 | return 0;
|
---|
| 719 | }
|
---|
| 720 |
|
---|
| 721 | #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
|
---|
| 722 | #define CKN_ON (ckn > 0)
|
---|
| 723 |
|
---|
| 724 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 725 |
|
---|
| 726 | static int
|
---|
| 727 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 728 | {
|
---|
| 729 | int len, mod_tlen, cklen;
|
---|
| 730 | int ckn;
|
---|
| 731 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 732 | int empty_info = qn->target_empty_info;
|
---|
| 733 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 734 |
|
---|
| 735 | if (tlen < 0) return tlen;
|
---|
| 736 |
|
---|
| 737 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
| 738 |
|
---|
| 739 | cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
|
---|
| 740 |
|
---|
| 741 | /* anychar repeat */
|
---|
| 742 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
| 743 | if (qn->greedy && infinite) {
|
---|
| 744 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
|
---|
[331] | 745 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
|
---|
[279] | 746 | else
|
---|
[331] | 747 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
|
---|
[279] | 748 | }
|
---|
| 749 | }
|
---|
| 750 |
|
---|
| 751 | if (empty_info != 0)
|
---|
| 752 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 753 | else
|
---|
| 754 | mod_tlen = tlen;
|
---|
| 755 |
|
---|
| 756 | if (infinite && qn->lower <= 1) {
|
---|
| 757 | if (qn->greedy) {
|
---|
| 758 | if (qn->lower == 1)
|
---|
| 759 | len = SIZE_OP_JUMP;
|
---|
| 760 | else
|
---|
| 761 | len = 0;
|
---|
| 762 |
|
---|
| 763 | len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
|
---|
| 764 | }
|
---|
| 765 | else {
|
---|
| 766 | if (qn->lower == 0)
|
---|
| 767 | len = SIZE_OP_JUMP;
|
---|
| 768 | else
|
---|
| 769 | len = 0;
|
---|
| 770 |
|
---|
| 771 | len += mod_tlen + SIZE_OP_PUSH + cklen;
|
---|
| 772 | }
|
---|
| 773 | }
|
---|
| 774 | else if (qn->upper == 0) {
|
---|
| 775 | if (qn->is_refered != 0) /* /(?<n>..){0}/ */
|
---|
| 776 | len = SIZE_OP_JUMP + tlen;
|
---|
| 777 | else
|
---|
| 778 | len = 0;
|
---|
| 779 | }
|
---|
| 780 | else if (qn->upper == 1 && qn->greedy) {
|
---|
| 781 | if (qn->lower == 0) {
|
---|
| 782 | if (CKN_ON) {
|
---|
| 783 | len = SIZE_OP_STATE_CHECK_PUSH + tlen;
|
---|
| 784 | }
|
---|
| 785 | else {
|
---|
| 786 | len = SIZE_OP_PUSH + tlen;
|
---|
| 787 | }
|
---|
| 788 | }
|
---|
| 789 | else {
|
---|
| 790 | len = tlen;
|
---|
| 791 | }
|
---|
| 792 | }
|
---|
| 793 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 794 | len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
|
---|
| 795 | }
|
---|
| 796 | else {
|
---|
| 797 | len = SIZE_OP_REPEAT_INC
|
---|
| 798 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
| 799 | if (CKN_ON)
|
---|
| 800 | len += SIZE_OP_STATE_CHECK;
|
---|
| 801 | }
|
---|
| 802 |
|
---|
| 803 | return len;
|
---|
| 804 | }
|
---|
| 805 |
|
---|
| 806 | static int
|
---|
| 807 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 808 | {
|
---|
| 809 | int r, mod_tlen;
|
---|
| 810 | int ckn;
|
---|
| 811 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 812 | int empty_info = qn->target_empty_info;
|
---|
| 813 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 814 |
|
---|
| 815 | if (tlen < 0) return tlen;
|
---|
| 816 |
|
---|
| 817 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
| 818 |
|
---|
| 819 | if (is_anychar_star_quantifier(qn)) {
|
---|
| 820 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 821 | if (r) return r;
|
---|
| 822 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
|
---|
| 823 | if (IS_MULTILINE(reg->options))
|
---|
| 824 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
| 825 | else
|
---|
| 826 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
| 827 | if (r) return r;
|
---|
| 828 | if (CKN_ON) {
|
---|
| 829 | r = add_state_check_num(reg, ckn);
|
---|
| 830 | if (r) return r;
|
---|
| 831 | }
|
---|
| 832 |
|
---|
| 833 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
| 834 | }
|
---|
| 835 | else {
|
---|
| 836 | if (IS_MULTILINE(reg->options)) {
|
---|
| 837 | r = add_opcode(reg, (CKN_ON ?
|
---|
| 838 | OP_STATE_CHECK_ANYCHAR_ML_STAR
|
---|
| 839 | : OP_ANYCHAR_ML_STAR));
|
---|
| 840 | }
|
---|
| 841 | else {
|
---|
| 842 | r = add_opcode(reg, (CKN_ON ?
|
---|
| 843 | OP_STATE_CHECK_ANYCHAR_STAR
|
---|
| 844 | : OP_ANYCHAR_STAR));
|
---|
| 845 | }
|
---|
| 846 | if (r) return r;
|
---|
| 847 | if (CKN_ON)
|
---|
| 848 | r = add_state_check_num(reg, ckn);
|
---|
| 849 |
|
---|
| 850 | return r;
|
---|
| 851 | }
|
---|
| 852 | }
|
---|
| 853 |
|
---|
| 854 | if (empty_info != 0)
|
---|
| 855 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 856 | else
|
---|
| 857 | mod_tlen = tlen;
|
---|
| 858 |
|
---|
| 859 | if (infinite && qn->lower <= 1) {
|
---|
| 860 | if (qn->greedy) {
|
---|
| 861 | if (qn->lower == 1) {
|
---|
| 862 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 863 | (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
|
---|
| 864 | if (r) return r;
|
---|
| 865 | }
|
---|
| 866 |
|
---|
| 867 | if (CKN_ON) {
|
---|
| 868 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
| 869 | if (r) return r;
|
---|
| 870 | r = add_state_check_num(reg, ckn);
|
---|
| 871 | if (r) return r;
|
---|
| 872 | r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
|
---|
| 873 | }
|
---|
| 874 | else {
|
---|
| 875 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
| 876 | }
|
---|
| 877 | if (r) return r;
|
---|
| 878 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 879 | if (r) return r;
|
---|
| 880 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 881 | -(mod_tlen + (int )SIZE_OP_JUMP
|
---|
| 882 | + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
|
---|
| 883 | }
|
---|
| 884 | else {
|
---|
| 885 | if (qn->lower == 0) {
|
---|
| 886 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
| 887 | if (r) return r;
|
---|
| 888 | }
|
---|
| 889 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 890 | if (r) return r;
|
---|
| 891 | if (CKN_ON) {
|
---|
| 892 | r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
|
---|
| 893 | if (r) return r;
|
---|
| 894 | r = add_state_check_num(reg, ckn);
|
---|
| 895 | if (r) return r;
|
---|
| 896 | r = add_rel_addr(reg,
|
---|
| 897 | -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
|
---|
| 898 | }
|
---|
| 899 | else
|
---|
| 900 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
| 901 | }
|
---|
| 902 | }
|
---|
| 903 | else if (qn->upper == 0) {
|
---|
| 904 | if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
| 905 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 906 | if (r) return r;
|
---|
| 907 | r = compile_tree(qn->target, reg);
|
---|
| 908 | }
|
---|
| 909 | else
|
---|
| 910 | r = 0;
|
---|
| 911 | }
|
---|
| 912 | else if (qn->upper == 1 && qn->greedy) {
|
---|
| 913 | if (qn->lower == 0) {
|
---|
| 914 | if (CKN_ON) {
|
---|
| 915 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
| 916 | if (r) return r;
|
---|
| 917 | r = add_state_check_num(reg, ckn);
|
---|
| 918 | if (r) return r;
|
---|
| 919 | r = add_rel_addr(reg, tlen);
|
---|
| 920 | }
|
---|
| 921 | else {
|
---|
| 922 | r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
|
---|
| 923 | }
|
---|
| 924 | if (r) return r;
|
---|
| 925 | }
|
---|
| 926 |
|
---|
| 927 | r = compile_tree(qn->target, reg);
|
---|
| 928 | }
|
---|
| 929 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 930 | if (CKN_ON) {
|
---|
| 931 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
| 932 | if (r) return r;
|
---|
| 933 | r = add_state_check_num(reg, ckn);
|
---|
| 934 | if (r) return r;
|
---|
| 935 | r = add_rel_addr(reg, SIZE_OP_JUMP);
|
---|
| 936 | }
|
---|
| 937 | else {
|
---|
| 938 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
| 939 | }
|
---|
| 940 |
|
---|
| 941 | if (r) return r;
|
---|
| 942 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 943 | if (r) return r;
|
---|
| 944 | r = compile_tree(qn->target, reg);
|
---|
| 945 | }
|
---|
| 946 | else {
|
---|
| 947 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
| 948 | if (CKN_ON) {
|
---|
| 949 | if (r) return r;
|
---|
| 950 | r = add_opcode(reg, OP_STATE_CHECK);
|
---|
| 951 | if (r) return r;
|
---|
| 952 | r = add_state_check_num(reg, ckn);
|
---|
| 953 | }
|
---|
| 954 | }
|
---|
| 955 | return r;
|
---|
| 956 | }
|
---|
| 957 |
|
---|
| 958 | #else /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
| 959 |
|
---|
| 960 | static int
|
---|
| 961 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 962 | {
|
---|
| 963 | int len, mod_tlen;
|
---|
| 964 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 965 | int empty_info = qn->target_empty_info;
|
---|
| 966 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 967 |
|
---|
| 968 | if (tlen < 0) return tlen;
|
---|
| 969 |
|
---|
| 970 | /* anychar repeat */
|
---|
| 971 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
| 972 | if (qn->greedy && infinite) {
|
---|
| 973 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
[331] | 974 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
|
---|
[279] | 975 | else
|
---|
[331] | 976 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
|
---|
[279] | 977 | }
|
---|
| 978 | }
|
---|
| 979 |
|
---|
| 980 | if (empty_info != 0)
|
---|
| 981 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 982 | else
|
---|
| 983 | mod_tlen = tlen;
|
---|
| 984 |
|
---|
| 985 | if (infinite &&
|
---|
| 986 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 987 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
| 988 | len = SIZE_OP_JUMP;
|
---|
| 989 | }
|
---|
| 990 | else {
|
---|
| 991 | len = tlen * qn->lower;
|
---|
| 992 | }
|
---|
| 993 |
|
---|
| 994 | if (qn->greedy) {
|
---|
[331] | 995 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
[279] | 996 | if (IS_NOT_NULL(qn->head_exact))
|
---|
| 997 | len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
|
---|
[331] | 998 | else
|
---|
| 999 | #endif
|
---|
| 1000 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
[279] | 1001 | len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
|
---|
| 1002 | else
|
---|
| 1003 | len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
|
---|
| 1004 | }
|
---|
| 1005 | else
|
---|
| 1006 | len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
|
---|
| 1007 | }
|
---|
| 1008 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
| 1009 | len = SIZE_OP_JUMP + tlen;
|
---|
| 1010 | }
|
---|
| 1011 | else if (!infinite && qn->greedy &&
|
---|
| 1012 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
| 1013 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 1014 | len = tlen * qn->lower;
|
---|
| 1015 | len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
|
---|
| 1016 | }
|
---|
| 1017 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 1018 | len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
|
---|
| 1019 | }
|
---|
| 1020 | else {
|
---|
| 1021 | len = SIZE_OP_REPEAT_INC
|
---|
| 1022 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
| 1023 | }
|
---|
| 1024 |
|
---|
| 1025 | return len;
|
---|
| 1026 | }
|
---|
| 1027 |
|
---|
| 1028 | static int
|
---|
| 1029 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 1030 | {
|
---|
| 1031 | int i, r, mod_tlen;
|
---|
| 1032 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 1033 | int empty_info = qn->target_empty_info;
|
---|
| 1034 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 1035 |
|
---|
| 1036 | if (tlen < 0) return tlen;
|
---|
| 1037 |
|
---|
| 1038 | if (is_anychar_star_quantifier(qn)) {
|
---|
| 1039 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1040 | if (r) return r;
|
---|
| 1041 | if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
| 1042 | if (IS_MULTILINE(reg->options))
|
---|
| 1043 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
| 1044 | else
|
---|
| 1045 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
| 1046 | if (r) return r;
|
---|
| 1047 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
| 1048 | }
|
---|
| 1049 | else {
|
---|
| 1050 | if (IS_MULTILINE(reg->options))
|
---|
| 1051 | return add_opcode(reg, OP_ANYCHAR_ML_STAR);
|
---|
| 1052 | else
|
---|
| 1053 | return add_opcode(reg, OP_ANYCHAR_STAR);
|
---|
| 1054 | }
|
---|
| 1055 | }
|
---|
| 1056 |
|
---|
| 1057 | if (empty_info != 0)
|
---|
| 1058 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 1059 | else
|
---|
| 1060 | mod_tlen = tlen;
|
---|
| 1061 |
|
---|
| 1062 | if (infinite &&
|
---|
| 1063 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 1064 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
| 1065 | if (qn->greedy) {
|
---|
[331] | 1066 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
[279] | 1067 | if (IS_NOT_NULL(qn->head_exact))
|
---|
| 1068 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
|
---|
[331] | 1069 | else
|
---|
| 1070 | #endif
|
---|
| 1071 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
[279] | 1072 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
|
---|
| 1073 | else
|
---|
| 1074 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
|
---|
| 1075 | }
|
---|
| 1076 | else {
|
---|
| 1077 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
|
---|
| 1078 | }
|
---|
| 1079 | if (r) return r;
|
---|
| 1080 | }
|
---|
| 1081 | else {
|
---|
| 1082 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1083 | if (r) return r;
|
---|
| 1084 | }
|
---|
| 1085 |
|
---|
| 1086 | if (qn->greedy) {
|
---|
[331] | 1087 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
[279] | 1088 | if (IS_NOT_NULL(qn->head_exact)) {
|
---|
| 1089 | r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
|
---|
| 1090 | mod_tlen + SIZE_OP_JUMP);
|
---|
| 1091 | if (r) return r;
|
---|
| 1092 | add_bytes(reg, NSTR(qn->head_exact)->s, 1);
|
---|
| 1093 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1094 | if (r) return r;
|
---|
| 1095 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1096 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
|
---|
| 1097 | }
|
---|
[331] | 1098 | else
|
---|
| 1099 | #endif
|
---|
| 1100 | if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
[279] | 1101 | r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
|
---|
| 1102 | mod_tlen + SIZE_OP_JUMP);
|
---|
| 1103 | if (r) return r;
|
---|
| 1104 | add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
| 1105 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1106 | if (r) return r;
|
---|
| 1107 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1108 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
|
---|
| 1109 | }
|
---|
| 1110 | else {
|
---|
| 1111 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
| 1112 | if (r) return r;
|
---|
| 1113 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1114 | if (r) return r;
|
---|
| 1115 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1116 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
|
---|
| 1117 | }
|
---|
| 1118 | }
|
---|
| 1119 | else {
|
---|
| 1120 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
| 1121 | if (r) return r;
|
---|
| 1122 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1123 | if (r) return r;
|
---|
| 1124 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
| 1125 | }
|
---|
| 1126 | }
|
---|
| 1127 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
| 1128 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 1129 | if (r) return r;
|
---|
| 1130 | r = compile_tree(qn->target, reg);
|
---|
| 1131 | }
|
---|
| 1132 | else if (!infinite && qn->greedy &&
|
---|
| 1133 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
| 1134 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 1135 | int n = qn->upper - qn->lower;
|
---|
| 1136 |
|
---|
| 1137 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1138 | if (r) return r;
|
---|
| 1139 |
|
---|
| 1140 | for (i = 0; i < n; i++) {
|
---|
| 1141 | r = add_opcode_rel_addr(reg, OP_PUSH,
|
---|
| 1142 | (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
|
---|
| 1143 | if (r) return r;
|
---|
| 1144 | r = compile_tree(qn->target, reg);
|
---|
| 1145 | if (r) return r;
|
---|
| 1146 | }
|
---|
| 1147 | }
|
---|
| 1148 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 1149 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
| 1150 | if (r) return r;
|
---|
| 1151 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 1152 | if (r) return r;
|
---|
| 1153 | r = compile_tree(qn->target, reg);
|
---|
| 1154 | }
|
---|
| 1155 | else {
|
---|
| 1156 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
| 1157 | }
|
---|
| 1158 | return r;
|
---|
| 1159 | }
|
---|
| 1160 | #endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
| 1161 |
|
---|
| 1162 | static int
|
---|
| 1163 | compile_length_option_node(EncloseNode* node, regex_t* reg)
|
---|
| 1164 | {
|
---|
| 1165 | int tlen;
|
---|
| 1166 | OnigOptionType prev = reg->options;
|
---|
| 1167 |
|
---|
| 1168 | reg->options = node->option;
|
---|
| 1169 | tlen = compile_length_tree(node->target, reg);
|
---|
| 1170 | reg->options = prev;
|
---|
| 1171 |
|
---|
| 1172 | if (tlen < 0) return tlen;
|
---|
| 1173 |
|
---|
| 1174 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
| 1175 | return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
|
---|
| 1176 | + tlen + SIZE_OP_SET_OPTION;
|
---|
| 1177 | }
|
---|
| 1178 | else
|
---|
| 1179 | return tlen;
|
---|
| 1180 | }
|
---|
| 1181 |
|
---|
| 1182 | static int
|
---|
| 1183 | compile_option_node(EncloseNode* node, regex_t* reg)
|
---|
| 1184 | {
|
---|
| 1185 | int r;
|
---|
| 1186 | OnigOptionType prev = reg->options;
|
---|
| 1187 |
|
---|
| 1188 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
| 1189 | r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
|
---|
| 1190 | if (r) return r;
|
---|
| 1191 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
| 1192 | if (r) return r;
|
---|
| 1193 | r = add_opcode(reg, OP_FAIL);
|
---|
| 1194 | if (r) return r;
|
---|
| 1195 | }
|
---|
| 1196 |
|
---|
| 1197 | reg->options = node->option;
|
---|
| 1198 | r = compile_tree(node->target, reg);
|
---|
| 1199 | reg->options = prev;
|
---|
| 1200 |
|
---|
| 1201 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
| 1202 | if (r) return r;
|
---|
| 1203 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
| 1204 | }
|
---|
| 1205 | return r;
|
---|
| 1206 | }
|
---|
| 1207 |
|
---|
| 1208 | static int
|
---|
| 1209 | compile_length_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
| 1210 | {
|
---|
| 1211 | int len;
|
---|
| 1212 | int tlen;
|
---|
| 1213 |
|
---|
| 1214 | if (node->type == ENCLOSE_OPTION)
|
---|
| 1215 | return compile_length_option_node(node, reg);
|
---|
| 1216 |
|
---|
| 1217 | if (node->target) {
|
---|
| 1218 | tlen = compile_length_tree(node->target, reg);
|
---|
| 1219 | if (tlen < 0) return tlen;
|
---|
| 1220 | }
|
---|
| 1221 | else
|
---|
| 1222 | tlen = 0;
|
---|
| 1223 |
|
---|
| 1224 | switch (node->type) {
|
---|
| 1225 | case ENCLOSE_MEMORY:
|
---|
| 1226 | #ifdef USE_SUBEXP_CALL
|
---|
| 1227 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
| 1228 | len = SIZE_OP_MEMORY_START_PUSH + tlen
|
---|
| 1229 | + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
|
---|
| 1230 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1231 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1232 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
| 1233 | else
|
---|
| 1234 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1235 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
| 1236 | }
|
---|
[331] | 1237 | else if (IS_ENCLOSE_RECURSION(node)) {
|
---|
| 1238 | len = SIZE_OP_MEMORY_START_PUSH;
|
---|
| 1239 | len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
|
---|
| 1240 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_REC);
|
---|
| 1241 | }
|
---|
[279] | 1242 | else
|
---|
| 1243 | #endif
|
---|
| 1244 | {
|
---|
| 1245 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
| 1246 | len = SIZE_OP_MEMORY_START_PUSH;
|
---|
| 1247 | else
|
---|
| 1248 | len = SIZE_OP_MEMORY_START;
|
---|
| 1249 |
|
---|
| 1250 | len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
|
---|
| 1251 | ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
|
---|
| 1252 | }
|
---|
| 1253 | break;
|
---|
| 1254 |
|
---|
| 1255 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 1256 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
| 1257 | QtfrNode* qn = NQTFR(node->target);
|
---|
| 1258 | tlen = compile_length_tree(qn->target, reg);
|
---|
| 1259 | if (tlen < 0) return tlen;
|
---|
| 1260 |
|
---|
| 1261 | len = tlen * qn->lower
|
---|
| 1262 | + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
|
---|
| 1263 | }
|
---|
| 1264 | else {
|
---|
| 1265 | len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
|
---|
| 1266 | }
|
---|
| 1267 | break;
|
---|
| 1268 |
|
---|
| 1269 | case ENCLOSE_CONDITION:
|
---|
| 1270 | len = SIZE_OP_CONDITION;
|
---|
| 1271 | if (NTYPE(node->target) == NT_ALT) {
|
---|
| 1272 | Node* x = node->target;
|
---|
| 1273 |
|
---|
| 1274 | tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
| 1275 | if (tlen < 0) return tlen;
|
---|
| 1276 | len += tlen + SIZE_OP_JUMP;
|
---|
| 1277 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
| 1278 | x = NCDR(x);
|
---|
| 1279 | tlen = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
| 1280 | if (tlen < 0) return tlen;
|
---|
| 1281 | len += tlen;
|
---|
| 1282 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
| 1283 | }
|
---|
| 1284 | else {
|
---|
| 1285 | return ONIGERR_PARSER_BUG;
|
---|
| 1286 | }
|
---|
| 1287 | break;
|
---|
| 1288 |
|
---|
[331] | 1289 | case ENCLOSE_ABSENT:
|
---|
| 1290 | len = SIZE_OP_PUSH_ABSENT_POS + SIZE_OP_ABSENT + tlen + SIZE_OP_ABSENT_END;
|
---|
| 1291 | break;
|
---|
| 1292 |
|
---|
[279] | 1293 | default:
|
---|
| 1294 | return ONIGERR_TYPE_BUG;
|
---|
| 1295 | break;
|
---|
| 1296 | }
|
---|
| 1297 |
|
---|
| 1298 | return len;
|
---|
| 1299 | }
|
---|
| 1300 |
|
---|
| 1301 | static int get_char_length_tree(Node* node, regex_t* reg, int* len);
|
---|
| 1302 |
|
---|
| 1303 | static int
|
---|
| 1304 | compile_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
| 1305 | {
|
---|
| 1306 | int r, len;
|
---|
| 1307 |
|
---|
| 1308 | if (node->type == ENCLOSE_OPTION)
|
---|
| 1309 | return compile_option_node(node, reg);
|
---|
| 1310 |
|
---|
| 1311 | switch (node->type) {
|
---|
| 1312 | case ENCLOSE_MEMORY:
|
---|
| 1313 | #ifdef USE_SUBEXP_CALL
|
---|
| 1314 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
| 1315 | r = add_opcode(reg, OP_CALL);
|
---|
| 1316 | if (r) return r;
|
---|
| 1317 | node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
|
---|
| 1318 | node->state |= NST_ADDR_FIXED;
|
---|
| 1319 | r = add_abs_addr(reg, (int )node->call_addr);
|
---|
| 1320 | if (r) return r;
|
---|
| 1321 | len = compile_length_tree(node->target, reg);
|
---|
| 1322 | len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
|
---|
| 1323 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1324 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1325 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
| 1326 | else
|
---|
| 1327 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1328 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
| 1329 |
|
---|
| 1330 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
| 1331 | if (r) return r;
|
---|
| 1332 | }
|
---|
| 1333 | #endif
|
---|
| 1334 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
| 1335 | r = add_opcode(reg, OP_MEMORY_START_PUSH);
|
---|
| 1336 | else
|
---|
| 1337 | r = add_opcode(reg, OP_MEMORY_START);
|
---|
| 1338 | if (r) return r;
|
---|
| 1339 | r = add_mem_num(reg, node->regnum);
|
---|
| 1340 | if (r) return r;
|
---|
| 1341 | r = compile_tree(node->target, reg);
|
---|
| 1342 | if (r) return r;
|
---|
| 1343 | #ifdef USE_SUBEXP_CALL
|
---|
| 1344 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
| 1345 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1346 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
| 1347 | ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
|
---|
| 1348 | else
|
---|
| 1349 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
| 1350 | ? OP_MEMORY_END_REC : OP_MEMORY_END));
|
---|
| 1351 |
|
---|
| 1352 | if (r) return r;
|
---|
| 1353 | r = add_mem_num(reg, node->regnum);
|
---|
| 1354 | if (r) return r;
|
---|
| 1355 | r = add_opcode(reg, OP_RETURN);
|
---|
| 1356 | }
|
---|
[331] | 1357 | else if (IS_ENCLOSE_RECURSION(node)) {
|
---|
| 1358 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1359 | r = add_opcode(reg, OP_MEMORY_END_PUSH_REC);
|
---|
| 1360 | else
|
---|
| 1361 | r = add_opcode(reg, OP_MEMORY_END_REC);
|
---|
| 1362 | if (r) return r;
|
---|
| 1363 | r = add_mem_num(reg, node->regnum);
|
---|
| 1364 | }
|
---|
[279] | 1365 | else
|
---|
| 1366 | #endif
|
---|
| 1367 | {
|
---|
| 1368 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1369 | r = add_opcode(reg, OP_MEMORY_END_PUSH);
|
---|
| 1370 | else
|
---|
| 1371 | r = add_opcode(reg, OP_MEMORY_END);
|
---|
| 1372 | if (r) return r;
|
---|
| 1373 | r = add_mem_num(reg, node->regnum);
|
---|
| 1374 | }
|
---|
| 1375 | break;
|
---|
| 1376 |
|
---|
| 1377 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 1378 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
| 1379 | QtfrNode* qn = NQTFR(node->target);
|
---|
| 1380 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1381 | if (r) return r;
|
---|
| 1382 |
|
---|
| 1383 | len = compile_length_tree(qn->target, reg);
|
---|
| 1384 | if (len < 0) return len;
|
---|
| 1385 |
|
---|
| 1386 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
|
---|
| 1387 | if (r) return r;
|
---|
| 1388 | r = compile_tree(qn->target, reg);
|
---|
| 1389 | if (r) return r;
|
---|
| 1390 | r = add_opcode(reg, OP_POP);
|
---|
| 1391 | if (r) return r;
|
---|
| 1392 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1393 | -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
|
---|
| 1394 | }
|
---|
| 1395 | else {
|
---|
| 1396 | r = add_opcode(reg, OP_PUSH_STOP_BT);
|
---|
| 1397 | if (r) return r;
|
---|
| 1398 | r = compile_tree(node->target, reg);
|
---|
| 1399 | if (r) return r;
|
---|
| 1400 | r = add_opcode(reg, OP_POP_STOP_BT);
|
---|
| 1401 | }
|
---|
| 1402 | break;
|
---|
| 1403 |
|
---|
| 1404 | case ENCLOSE_CONDITION:
|
---|
| 1405 | r = add_opcode(reg, OP_CONDITION);
|
---|
| 1406 | if (r) return r;
|
---|
| 1407 | r = add_mem_num(reg, node->regnum);
|
---|
| 1408 | if (r) return r;
|
---|
| 1409 |
|
---|
| 1410 | if (NTYPE(node->target) == NT_ALT) {
|
---|
| 1411 | Node* x = node->target;
|
---|
| 1412 | int len2;
|
---|
| 1413 |
|
---|
| 1414 | len = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
| 1415 | if (len < 0) return len;
|
---|
| 1416 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
| 1417 | x = NCDR(x);
|
---|
| 1418 | len2 = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
| 1419 | if (len2 < 0) return len2;
|
---|
| 1420 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
| 1421 |
|
---|
| 1422 | x = node->target;
|
---|
| 1423 | r = add_rel_addr(reg, len + SIZE_OP_JUMP);
|
---|
| 1424 | if (r) return r;
|
---|
| 1425 | r = compile_tree(NCAR(x), reg); /* yes-node */
|
---|
| 1426 | if (r) return r;
|
---|
| 1427 | r = add_opcode_rel_addr(reg, OP_JUMP, len2);
|
---|
| 1428 | if (r) return r;
|
---|
| 1429 | x = NCDR(x);
|
---|
| 1430 | r = compile_tree(NCAR(x), reg); /* no-node */
|
---|
| 1431 | }
|
---|
| 1432 | else {
|
---|
| 1433 | return ONIGERR_PARSER_BUG;
|
---|
| 1434 | }
|
---|
| 1435 | break;
|
---|
| 1436 |
|
---|
[331] | 1437 | case ENCLOSE_ABSENT:
|
---|
| 1438 | len = compile_length_tree(node->target, reg);
|
---|
| 1439 | if (len < 0) return len;
|
---|
| 1440 |
|
---|
| 1441 | r = add_opcode(reg, OP_PUSH_ABSENT_POS);
|
---|
| 1442 | if (r) return r;
|
---|
| 1443 | r = add_opcode_rel_addr(reg, OP_ABSENT, len + SIZE_OP_ABSENT_END);
|
---|
| 1444 | if (r) return r;
|
---|
| 1445 | r = compile_tree(node->target, reg);
|
---|
| 1446 | if (r) return r;
|
---|
| 1447 | r = add_opcode(reg, OP_ABSENT_END);
|
---|
| 1448 | break;
|
---|
| 1449 |
|
---|
[279] | 1450 | default:
|
---|
| 1451 | return ONIGERR_TYPE_BUG;
|
---|
| 1452 | break;
|
---|
| 1453 | }
|
---|
| 1454 |
|
---|
| 1455 | return r;
|
---|
| 1456 | }
|
---|
| 1457 |
|
---|
| 1458 | static int
|
---|
| 1459 | compile_length_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
| 1460 | {
|
---|
| 1461 | int len;
|
---|
| 1462 | int tlen = 0;
|
---|
| 1463 |
|
---|
| 1464 | if (node->target) {
|
---|
| 1465 | tlen = compile_length_tree(node->target, reg);
|
---|
| 1466 | if (tlen < 0) return tlen;
|
---|
| 1467 | }
|
---|
| 1468 |
|
---|
| 1469 | switch (node->type) {
|
---|
| 1470 | case ANCHOR_PREC_READ:
|
---|
| 1471 | len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
|
---|
| 1472 | break;
|
---|
| 1473 | case ANCHOR_PREC_READ_NOT:
|
---|
| 1474 | len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
|
---|
| 1475 | break;
|
---|
| 1476 | case ANCHOR_LOOK_BEHIND:
|
---|
| 1477 | len = SIZE_OP_LOOK_BEHIND + tlen;
|
---|
| 1478 | break;
|
---|
| 1479 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 1480 | len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
|
---|
| 1481 | break;
|
---|
| 1482 |
|
---|
| 1483 | default:
|
---|
| 1484 | len = SIZE_OPCODE;
|
---|
| 1485 | break;
|
---|
| 1486 | }
|
---|
| 1487 |
|
---|
| 1488 | return len;
|
---|
| 1489 | }
|
---|
| 1490 |
|
---|
| 1491 | static int
|
---|
| 1492 | compile_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
| 1493 | {
|
---|
| 1494 | int r, len;
|
---|
| 1495 |
|
---|
| 1496 | switch (node->type) {
|
---|
| 1497 | case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
|
---|
| 1498 | case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
|
---|
| 1499 | case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
|
---|
| 1500 | case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
|
---|
| 1501 | case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
|
---|
| 1502 | case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
|
---|
| 1503 |
|
---|
| 1504 | case ANCHOR_WORD_BOUND:
|
---|
| 1505 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
|
---|
| 1506 | else r = add_opcode(reg, OP_WORD_BOUND);
|
---|
| 1507 | break;
|
---|
| 1508 | case ANCHOR_NOT_WORD_BOUND:
|
---|
| 1509 | if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
|
---|
| 1510 | else r = add_opcode(reg, OP_NOT_WORD_BOUND);
|
---|
| 1511 | break;
|
---|
| 1512 | #ifdef USE_WORD_BEGIN_END
|
---|
| 1513 | case ANCHOR_WORD_BEGIN:
|
---|
| 1514 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
|
---|
| 1515 | else r = add_opcode(reg, OP_WORD_BEGIN);
|
---|
| 1516 | break;
|
---|
| 1517 | case ANCHOR_WORD_END:
|
---|
| 1518 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
|
---|
| 1519 | else r = add_opcode(reg, OP_WORD_END);
|
---|
| 1520 | break;
|
---|
| 1521 | #endif
|
---|
| 1522 | case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
|
---|
| 1523 |
|
---|
| 1524 | case ANCHOR_PREC_READ:
|
---|
| 1525 | r = add_opcode(reg, OP_PUSH_POS);
|
---|
| 1526 | if (r) return r;
|
---|
| 1527 | r = compile_tree(node->target, reg);
|
---|
| 1528 | if (r) return r;
|
---|
| 1529 | r = add_opcode(reg, OP_POP_POS);
|
---|
| 1530 | break;
|
---|
| 1531 |
|
---|
| 1532 | case ANCHOR_PREC_READ_NOT:
|
---|
| 1533 | len = compile_length_tree(node->target, reg);
|
---|
| 1534 | if (len < 0) return len;
|
---|
| 1535 | r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
|
---|
| 1536 | if (r) return r;
|
---|
| 1537 | r = compile_tree(node->target, reg);
|
---|
| 1538 | if (r) return r;
|
---|
| 1539 | r = add_opcode(reg, OP_FAIL_POS);
|
---|
| 1540 | break;
|
---|
| 1541 |
|
---|
| 1542 | case ANCHOR_LOOK_BEHIND:
|
---|
| 1543 | {
|
---|
| 1544 | int n;
|
---|
| 1545 | r = add_opcode(reg, OP_LOOK_BEHIND);
|
---|
| 1546 | if (r) return r;
|
---|
| 1547 | if (node->char_len < 0) {
|
---|
| 1548 | r = get_char_length_tree(node->target, reg, &n);
|
---|
| 1549 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 1550 | }
|
---|
| 1551 | else
|
---|
| 1552 | n = node->char_len;
|
---|
| 1553 | r = add_length(reg, n);
|
---|
| 1554 | if (r) return r;
|
---|
| 1555 | r = compile_tree(node->target, reg);
|
---|
| 1556 | }
|
---|
| 1557 | break;
|
---|
| 1558 |
|
---|
| 1559 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 1560 | {
|
---|
| 1561 | int n;
|
---|
| 1562 | len = compile_length_tree(node->target, reg);
|
---|
| 1563 | r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
|
---|
| 1564 | len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
|
---|
| 1565 | if (r) return r;
|
---|
| 1566 | if (node->char_len < 0) {
|
---|
| 1567 | r = get_char_length_tree(node->target, reg, &n);
|
---|
| 1568 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 1569 | }
|
---|
| 1570 | else
|
---|
| 1571 | n = node->char_len;
|
---|
| 1572 | r = add_length(reg, n);
|
---|
| 1573 | if (r) return r;
|
---|
| 1574 | r = compile_tree(node->target, reg);
|
---|
| 1575 | if (r) return r;
|
---|
| 1576 | r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
|
---|
| 1577 | }
|
---|
| 1578 | break;
|
---|
| 1579 |
|
---|
| 1580 | default:
|
---|
| 1581 | return ONIGERR_TYPE_BUG;
|
---|
| 1582 | break;
|
---|
| 1583 | }
|
---|
| 1584 |
|
---|
| 1585 | return r;
|
---|
| 1586 | }
|
---|
| 1587 |
|
---|
| 1588 | static int
|
---|
| 1589 | compile_length_tree(Node* node, regex_t* reg)
|
---|
| 1590 | {
|
---|
| 1591 | int len, type, r;
|
---|
| 1592 |
|
---|
| 1593 | type = NTYPE(node);
|
---|
| 1594 | switch (type) {
|
---|
| 1595 | case NT_LIST:
|
---|
| 1596 | len = 0;
|
---|
| 1597 | do {
|
---|
| 1598 | r = compile_length_tree(NCAR(node), reg);
|
---|
| 1599 | if (r < 0) return r;
|
---|
| 1600 | len += r;
|
---|
| 1601 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1602 | r = len;
|
---|
| 1603 | break;
|
---|
| 1604 |
|
---|
| 1605 | case NT_ALT:
|
---|
| 1606 | {
|
---|
| 1607 | int n = 0;
|
---|
| 1608 | len = 0;
|
---|
| 1609 | do {
|
---|
[331] | 1610 | r = compile_length_tree(NCAR(node), reg);
|
---|
| 1611 | if (r < 0) return r;
|
---|
| 1612 | len += r;
|
---|
| 1613 | n++;
|
---|
[279] | 1614 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1615 | r = len;
|
---|
| 1616 | r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
|
---|
| 1617 | }
|
---|
| 1618 | break;
|
---|
| 1619 |
|
---|
| 1620 | case NT_STR:
|
---|
| 1621 | if (NSTRING_IS_RAW(node))
|
---|
| 1622 | r = compile_length_string_raw_node(NSTR(node), reg);
|
---|
| 1623 | else
|
---|
| 1624 | r = compile_length_string_node(node, reg);
|
---|
| 1625 | break;
|
---|
| 1626 |
|
---|
| 1627 | case NT_CCLASS:
|
---|
| 1628 | r = compile_length_cclass_node(NCCLASS(node), reg);
|
---|
| 1629 | break;
|
---|
| 1630 |
|
---|
| 1631 | case NT_CTYPE:
|
---|
| 1632 | case NT_CANY:
|
---|
| 1633 | r = SIZE_OPCODE;
|
---|
| 1634 | break;
|
---|
| 1635 |
|
---|
| 1636 | case NT_BREF:
|
---|
| 1637 | {
|
---|
| 1638 | BRefNode* br = NBREF(node);
|
---|
| 1639 |
|
---|
| 1640 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 1641 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
[331] | 1642 | r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
|
---|
[279] | 1643 | SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
| 1644 | }
|
---|
| 1645 | else
|
---|
| 1646 | #endif
|
---|
| 1647 | if (br->back_num == 1) {
|
---|
| 1648 | r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
|
---|
| 1649 | ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
|
---|
| 1650 | }
|
---|
| 1651 | else {
|
---|
| 1652 | r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
| 1653 | }
|
---|
| 1654 | }
|
---|
| 1655 | break;
|
---|
| 1656 |
|
---|
| 1657 | #ifdef USE_SUBEXP_CALL
|
---|
| 1658 | case NT_CALL:
|
---|
| 1659 | r = SIZE_OP_CALL;
|
---|
| 1660 | break;
|
---|
| 1661 | #endif
|
---|
| 1662 |
|
---|
| 1663 | case NT_QTFR:
|
---|
| 1664 | r = compile_length_quantifier_node(NQTFR(node), reg);
|
---|
| 1665 | break;
|
---|
| 1666 |
|
---|
| 1667 | case NT_ENCLOSE:
|
---|
| 1668 | r = compile_length_enclose_node(NENCLOSE(node), reg);
|
---|
| 1669 | break;
|
---|
| 1670 |
|
---|
| 1671 | case NT_ANCHOR:
|
---|
| 1672 | r = compile_length_anchor_node(NANCHOR(node), reg);
|
---|
| 1673 | break;
|
---|
| 1674 |
|
---|
| 1675 | default:
|
---|
| 1676 | return ONIGERR_TYPE_BUG;
|
---|
| 1677 | break;
|
---|
| 1678 | }
|
---|
| 1679 |
|
---|
| 1680 | return r;
|
---|
| 1681 | }
|
---|
| 1682 |
|
---|
| 1683 | static int
|
---|
| 1684 | compile_tree(Node* node, regex_t* reg)
|
---|
| 1685 | {
|
---|
| 1686 | int n, type, len, pos, r = 0;
|
---|
| 1687 |
|
---|
| 1688 | type = NTYPE(node);
|
---|
| 1689 | switch (type) {
|
---|
| 1690 | case NT_LIST:
|
---|
| 1691 | do {
|
---|
| 1692 | r = compile_tree(NCAR(node), reg);
|
---|
| 1693 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1694 | break;
|
---|
| 1695 |
|
---|
| 1696 | case NT_ALT:
|
---|
| 1697 | {
|
---|
| 1698 | Node* x = node;
|
---|
| 1699 | len = 0;
|
---|
| 1700 | do {
|
---|
| 1701 | len += compile_length_tree(NCAR(x), reg);
|
---|
| 1702 | if (NCDR(x) != NULL) {
|
---|
| 1703 | len += SIZE_OP_PUSH + SIZE_OP_JUMP;
|
---|
| 1704 | }
|
---|
| 1705 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
| 1706 | pos = reg->used + len; /* goal position */
|
---|
| 1707 |
|
---|
| 1708 | do {
|
---|
| 1709 | len = compile_length_tree(NCAR(node), reg);
|
---|
| 1710 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
| 1711 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
|
---|
| 1712 | if (r) break;
|
---|
| 1713 | }
|
---|
| 1714 | r = compile_tree(NCAR(node), reg);
|
---|
| 1715 | if (r) break;
|
---|
| 1716 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
| 1717 | len = pos - (reg->used + SIZE_OP_JUMP);
|
---|
| 1718 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
| 1719 | if (r) break;
|
---|
| 1720 | }
|
---|
| 1721 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1722 | }
|
---|
| 1723 | break;
|
---|
| 1724 |
|
---|
| 1725 | case NT_STR:
|
---|
| 1726 | if (NSTRING_IS_RAW(node))
|
---|
| 1727 | r = compile_string_raw_node(NSTR(node), reg);
|
---|
| 1728 | else
|
---|
| 1729 | r = compile_string_node(node, reg);
|
---|
| 1730 | break;
|
---|
| 1731 |
|
---|
| 1732 | case NT_CCLASS:
|
---|
| 1733 | r = compile_cclass_node(NCCLASS(node), reg);
|
---|
| 1734 | break;
|
---|
| 1735 |
|
---|
| 1736 | case NT_CTYPE:
|
---|
| 1737 | {
|
---|
| 1738 | int op;
|
---|
| 1739 |
|
---|
| 1740 | switch (NCTYPE(node)->ctype) {
|
---|
| 1741 | case ONIGENC_CTYPE_WORD:
|
---|
| 1742 | if (NCTYPE(node)->ascii_range != 0) {
|
---|
| 1743 | if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
|
---|
| 1744 | else op = OP_ASCII_WORD;
|
---|
| 1745 | }
|
---|
| 1746 | else {
|
---|
| 1747 | if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
|
---|
| 1748 | else op = OP_WORD;
|
---|
| 1749 | }
|
---|
| 1750 | break;
|
---|
| 1751 | default:
|
---|
| 1752 | return ONIGERR_TYPE_BUG;
|
---|
| 1753 | break;
|
---|
| 1754 | }
|
---|
| 1755 | r = add_opcode(reg, op);
|
---|
| 1756 | }
|
---|
| 1757 | break;
|
---|
| 1758 |
|
---|
| 1759 | case NT_CANY:
|
---|
| 1760 | if (IS_MULTILINE(reg->options))
|
---|
| 1761 | r = add_opcode(reg, OP_ANYCHAR_ML);
|
---|
| 1762 | else
|
---|
| 1763 | r = add_opcode(reg, OP_ANYCHAR);
|
---|
| 1764 | break;
|
---|
| 1765 |
|
---|
| 1766 | case NT_BREF:
|
---|
| 1767 | {
|
---|
| 1768 | BRefNode* br = NBREF(node);
|
---|
| 1769 |
|
---|
| 1770 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 1771 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
| 1772 | r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
|
---|
| 1773 | if (r) return r;
|
---|
| 1774 | r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
|
---|
| 1775 | if (r) return r;
|
---|
| 1776 | r = add_length(reg, br->nest_level);
|
---|
| 1777 | if (r) return r;
|
---|
| 1778 |
|
---|
| 1779 | goto add_bacref_mems;
|
---|
| 1780 | }
|
---|
| 1781 | else
|
---|
| 1782 | #endif
|
---|
| 1783 | if (br->back_num == 1) {
|
---|
| 1784 | n = br->back_static[0];
|
---|
| 1785 | if (IS_IGNORECASE(reg->options)) {
|
---|
| 1786 | r = add_opcode(reg, OP_BACKREFN_IC);
|
---|
| 1787 | if (r) return r;
|
---|
| 1788 | r = add_mem_num(reg, n);
|
---|
| 1789 | }
|
---|
| 1790 | else {
|
---|
| 1791 | switch (n) {
|
---|
| 1792 | case 1: r = add_opcode(reg, OP_BACKREF1); break;
|
---|
| 1793 | case 2: r = add_opcode(reg, OP_BACKREF2); break;
|
---|
| 1794 | default:
|
---|
| 1795 | r = add_opcode(reg, OP_BACKREFN);
|
---|
| 1796 | if (r) return r;
|
---|
| 1797 | r = add_mem_num(reg, n);
|
---|
| 1798 | break;
|
---|
| 1799 | }
|
---|
| 1800 | }
|
---|
| 1801 | }
|
---|
| 1802 | else {
|
---|
| 1803 | int i;
|
---|
| 1804 | int* p;
|
---|
| 1805 |
|
---|
[331] | 1806 | if (IS_IGNORECASE(reg->options)) {
|
---|
| 1807 | r = add_opcode(reg, OP_BACKREF_MULTI_IC);
|
---|
| 1808 | }
|
---|
| 1809 | else {
|
---|
| 1810 | r = add_opcode(reg, OP_BACKREF_MULTI);
|
---|
| 1811 | }
|
---|
[279] | 1812 | if (r) return r;
|
---|
| 1813 |
|
---|
| 1814 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 1815 | add_bacref_mems:
|
---|
| 1816 | #endif
|
---|
| 1817 | r = add_length(reg, br->back_num);
|
---|
| 1818 | if (r) return r;
|
---|
| 1819 | p = BACKREFS_P(br);
|
---|
| 1820 | for (i = br->back_num - 1; i >= 0; i--) {
|
---|
| 1821 | r = add_mem_num(reg, p[i]);
|
---|
| 1822 | if (r) return r;
|
---|
| 1823 | }
|
---|
| 1824 | }
|
---|
| 1825 | }
|
---|
| 1826 | break;
|
---|
| 1827 |
|
---|
| 1828 | #ifdef USE_SUBEXP_CALL
|
---|
| 1829 | case NT_CALL:
|
---|
| 1830 | r = compile_call(NCALL(node), reg);
|
---|
| 1831 | break;
|
---|
| 1832 | #endif
|
---|
| 1833 |
|
---|
| 1834 | case NT_QTFR:
|
---|
| 1835 | r = compile_quantifier_node(NQTFR(node), reg);
|
---|
| 1836 | break;
|
---|
| 1837 |
|
---|
| 1838 | case NT_ENCLOSE:
|
---|
| 1839 | r = compile_enclose_node(NENCLOSE(node), reg);
|
---|
| 1840 | break;
|
---|
| 1841 |
|
---|
| 1842 | case NT_ANCHOR:
|
---|
| 1843 | r = compile_anchor_node(NANCHOR(node), reg);
|
---|
| 1844 | break;
|
---|
| 1845 |
|
---|
| 1846 | default:
|
---|
| 1847 | #ifdef ONIG_DEBUG
|
---|
| 1848 | fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
|
---|
| 1849 | #endif
|
---|
| 1850 | break;
|
---|
| 1851 | }
|
---|
| 1852 |
|
---|
| 1853 | return r;
|
---|
| 1854 | }
|
---|
| 1855 |
|
---|
| 1856 | #ifdef USE_NAMED_GROUP
|
---|
| 1857 |
|
---|
| 1858 | static int
|
---|
| 1859 | noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
|
---|
| 1860 | {
|
---|
| 1861 | int r = 0;
|
---|
| 1862 | Node* node = *plink;
|
---|
| 1863 |
|
---|
| 1864 | switch (NTYPE(node)) {
|
---|
| 1865 | case NT_LIST:
|
---|
| 1866 | case NT_ALT:
|
---|
| 1867 | do {
|
---|
| 1868 | r = noname_disable_map(&(NCAR(node)), map, counter);
|
---|
| 1869 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1870 | break;
|
---|
| 1871 |
|
---|
| 1872 | case NT_QTFR:
|
---|
| 1873 | {
|
---|
| 1874 | Node** ptarget = &(NQTFR(node)->target);
|
---|
| 1875 | Node* old = *ptarget;
|
---|
| 1876 | r = noname_disable_map(ptarget, map, counter);
|
---|
| 1877 | if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
|
---|
| 1878 | onig_reduce_nested_quantifier(node, *ptarget);
|
---|
| 1879 | }
|
---|
| 1880 | }
|
---|
| 1881 | break;
|
---|
| 1882 |
|
---|
| 1883 | case NT_ENCLOSE:
|
---|
| 1884 | {
|
---|
| 1885 | EncloseNode* en = NENCLOSE(node);
|
---|
| 1886 | if (en->type == ENCLOSE_MEMORY) {
|
---|
| 1887 | if (IS_ENCLOSE_NAMED_GROUP(en)) {
|
---|
| 1888 | (*counter)++;
|
---|
| 1889 | map[en->regnum].new_val = *counter;
|
---|
| 1890 | en->regnum = *counter;
|
---|
| 1891 | }
|
---|
| 1892 | else if (en->regnum != 0) {
|
---|
| 1893 | *plink = en->target;
|
---|
| 1894 | en->target = NULL_NODE;
|
---|
| 1895 | onig_node_free(node);
|
---|
| 1896 | r = noname_disable_map(plink, map, counter);
|
---|
| 1897 | break;
|
---|
| 1898 | }
|
---|
| 1899 | }
|
---|
| 1900 | r = noname_disable_map(&(en->target), map, counter);
|
---|
| 1901 | }
|
---|
| 1902 | break;
|
---|
| 1903 |
|
---|
| 1904 | case NT_ANCHOR:
|
---|
[331] | 1905 | if (NANCHOR(node)->target)
|
---|
| 1906 | r = noname_disable_map(&(NANCHOR(node)->target), map, counter);
|
---|
[279] | 1907 | break;
|
---|
| 1908 |
|
---|
| 1909 | default:
|
---|
| 1910 | break;
|
---|
| 1911 | }
|
---|
| 1912 |
|
---|
| 1913 | return r;
|
---|
| 1914 | }
|
---|
| 1915 |
|
---|
| 1916 | static int
|
---|
| 1917 | renumber_node_backref(Node* node, GroupNumRemap* map)
|
---|
| 1918 | {
|
---|
| 1919 | int i, pos, n, old_num;
|
---|
| 1920 | int *backs;
|
---|
| 1921 | BRefNode* bn = NBREF(node);
|
---|
| 1922 |
|
---|
| 1923 | if (! IS_BACKREF_NAME_REF(bn))
|
---|
| 1924 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 1925 |
|
---|
| 1926 | old_num = bn->back_num;
|
---|
| 1927 | if (IS_NULL(bn->back_dynamic))
|
---|
| 1928 | backs = bn->back_static;
|
---|
| 1929 | else
|
---|
| 1930 | backs = bn->back_dynamic;
|
---|
| 1931 |
|
---|
| 1932 | for (i = 0, pos = 0; i < old_num; i++) {
|
---|
| 1933 | n = map[backs[i]].new_val;
|
---|
| 1934 | if (n > 0) {
|
---|
| 1935 | backs[pos] = n;
|
---|
| 1936 | pos++;
|
---|
| 1937 | }
|
---|
| 1938 | }
|
---|
| 1939 |
|
---|
| 1940 | bn->back_num = pos;
|
---|
| 1941 | return 0;
|
---|
| 1942 | }
|
---|
| 1943 |
|
---|
| 1944 | static int
|
---|
| 1945 | renumber_by_map(Node* node, GroupNumRemap* map)
|
---|
| 1946 | {
|
---|
| 1947 | int r = 0;
|
---|
| 1948 |
|
---|
| 1949 | switch (NTYPE(node)) {
|
---|
| 1950 | case NT_LIST:
|
---|
| 1951 | case NT_ALT:
|
---|
| 1952 | do {
|
---|
| 1953 | r = renumber_by_map(NCAR(node), map);
|
---|
| 1954 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1955 | break;
|
---|
| 1956 | case NT_QTFR:
|
---|
| 1957 | r = renumber_by_map(NQTFR(node)->target, map);
|
---|
| 1958 | break;
|
---|
| 1959 | case NT_ENCLOSE:
|
---|
| 1960 | {
|
---|
| 1961 | EncloseNode* en = NENCLOSE(node);
|
---|
| 1962 | if (en->type == ENCLOSE_CONDITION)
|
---|
[331] | 1963 | en->regnum = map[en->regnum].new_val;
|
---|
[279] | 1964 | r = renumber_by_map(en->target, map);
|
---|
| 1965 | }
|
---|
| 1966 | break;
|
---|
| 1967 |
|
---|
| 1968 | case NT_BREF:
|
---|
| 1969 | r = renumber_node_backref(node, map);
|
---|
| 1970 | break;
|
---|
| 1971 |
|
---|
| 1972 | case NT_ANCHOR:
|
---|
[331] | 1973 | if (NANCHOR(node)->target)
|
---|
| 1974 | r = renumber_by_map(NANCHOR(node)->target, map);
|
---|
[279] | 1975 | break;
|
---|
| 1976 |
|
---|
| 1977 | default:
|
---|
| 1978 | break;
|
---|
| 1979 | }
|
---|
| 1980 |
|
---|
| 1981 | return r;
|
---|
| 1982 | }
|
---|
| 1983 |
|
---|
| 1984 | static int
|
---|
| 1985 | numbered_ref_check(Node* node)
|
---|
| 1986 | {
|
---|
| 1987 | int r = 0;
|
---|
| 1988 |
|
---|
| 1989 | switch (NTYPE(node)) {
|
---|
| 1990 | case NT_LIST:
|
---|
| 1991 | case NT_ALT:
|
---|
| 1992 | do {
|
---|
| 1993 | r = numbered_ref_check(NCAR(node));
|
---|
| 1994 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1995 | break;
|
---|
| 1996 | case NT_QTFR:
|
---|
| 1997 | r = numbered_ref_check(NQTFR(node)->target);
|
---|
| 1998 | break;
|
---|
| 1999 | case NT_ENCLOSE:
|
---|
| 2000 | r = numbered_ref_check(NENCLOSE(node)->target);
|
---|
| 2001 | break;
|
---|
| 2002 |
|
---|
| 2003 | case NT_BREF:
|
---|
| 2004 | if (! IS_BACKREF_NAME_REF(NBREF(node)))
|
---|
| 2005 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 2006 | break;
|
---|
| 2007 |
|
---|
[331] | 2008 | case NT_ANCHOR:
|
---|
| 2009 | if (NANCHOR(node)->target)
|
---|
| 2010 | r = numbered_ref_check(NANCHOR(node)->target);
|
---|
| 2011 | break;
|
---|
| 2012 |
|
---|
[279] | 2013 | default:
|
---|
| 2014 | break;
|
---|
| 2015 | }
|
---|
| 2016 |
|
---|
| 2017 | return r;
|
---|
| 2018 | }
|
---|
| 2019 |
|
---|
| 2020 | static int
|
---|
[331] | 2021 | disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
|
---|
[279] | 2022 | {
|
---|
| 2023 | int r, i, pos, counter;
|
---|
| 2024 | BitStatusType loc;
|
---|
[331] | 2025 | GroupNumRemap* map;
|
---|
[279] | 2026 |
|
---|
[331] | 2027 | map = (GroupNumRemap* )xalloca(sizeof(GroupNumRemap) * (env->num_mem + 1));
|
---|
[279] | 2028 | CHECK_NULL_RETURN_MEMERR(map);
|
---|
| 2029 | for (i = 1; i <= env->num_mem; i++) {
|
---|
| 2030 | map[i].new_val = 0;
|
---|
| 2031 | }
|
---|
| 2032 | counter = 0;
|
---|
| 2033 | r = noname_disable_map(root, map, &counter);
|
---|
| 2034 | if (r != 0) return r;
|
---|
| 2035 |
|
---|
| 2036 | r = renumber_by_map(*root, map);
|
---|
| 2037 | if (r != 0) return r;
|
---|
| 2038 |
|
---|
| 2039 | for (i = 1, pos = 1; i <= env->num_mem; i++) {
|
---|
| 2040 | if (map[i].new_val > 0) {
|
---|
| 2041 | SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
|
---|
| 2042 | pos++;
|
---|
| 2043 | }
|
---|
| 2044 | }
|
---|
| 2045 |
|
---|
| 2046 | loc = env->capture_history;
|
---|
| 2047 | BIT_STATUS_CLEAR(env->capture_history);
|
---|
| 2048 | for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
|
---|
| 2049 | if (BIT_STATUS_AT(loc, i)) {
|
---|
| 2050 | BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
|
---|
| 2051 | }
|
---|
| 2052 | }
|
---|
| 2053 |
|
---|
| 2054 | env->num_mem = env->num_named;
|
---|
| 2055 | reg->num_mem = env->num_named;
|
---|
| 2056 |
|
---|
| 2057 | return onig_renumber_name_table(reg, map);
|
---|
| 2058 | }
|
---|
| 2059 | #endif /* USE_NAMED_GROUP */
|
---|
| 2060 |
|
---|
| 2061 | #ifdef USE_SUBEXP_CALL
|
---|
| 2062 | static int
|
---|
| 2063 | unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
|
---|
| 2064 | {
|
---|
| 2065 | int i, offset;
|
---|
| 2066 | EncloseNode* en;
|
---|
| 2067 | AbsAddrType addr;
|
---|
| 2068 |
|
---|
| 2069 | for (i = 0; i < uslist->num; i++) {
|
---|
| 2070 | en = NENCLOSE(uslist->us[i].target);
|
---|
| 2071 | if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
|
---|
| 2072 | addr = en->call_addr;
|
---|
| 2073 | offset = uslist->us[i].offset;
|
---|
| 2074 |
|
---|
| 2075 | BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
|
---|
| 2076 | }
|
---|
| 2077 | return 0;
|
---|
| 2078 | }
|
---|
| 2079 | #endif
|
---|
| 2080 |
|
---|
| 2081 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
| 2082 | static int
|
---|
| 2083 | quantifiers_memory_node_info(Node* node)
|
---|
| 2084 | {
|
---|
| 2085 | int r = 0;
|
---|
| 2086 |
|
---|
| 2087 | switch (NTYPE(node)) {
|
---|
| 2088 | case NT_LIST:
|
---|
| 2089 | case NT_ALT:
|
---|
| 2090 | {
|
---|
| 2091 | int v;
|
---|
| 2092 | do {
|
---|
| 2093 | v = quantifiers_memory_node_info(NCAR(node));
|
---|
| 2094 | if (v > r) r = v;
|
---|
| 2095 | } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2096 | }
|
---|
| 2097 | break;
|
---|
| 2098 |
|
---|
[331] | 2099 | # ifdef USE_SUBEXP_CALL
|
---|
[279] | 2100 | case NT_CALL:
|
---|
| 2101 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
| 2102 | return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
|
---|
| 2103 | }
|
---|
| 2104 | else
|
---|
| 2105 | r = quantifiers_memory_node_info(NCALL(node)->target);
|
---|
| 2106 | break;
|
---|
[331] | 2107 | # endif
|
---|
[279] | 2108 |
|
---|
| 2109 | case NT_QTFR:
|
---|
| 2110 | {
|
---|
| 2111 | QtfrNode* qn = NQTFR(node);
|
---|
| 2112 | if (qn->upper != 0) {
|
---|
| 2113 | r = quantifiers_memory_node_info(qn->target);
|
---|
| 2114 | }
|
---|
| 2115 | }
|
---|
| 2116 | break;
|
---|
| 2117 |
|
---|
| 2118 | case NT_ENCLOSE:
|
---|
| 2119 | {
|
---|
| 2120 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2121 | switch (en->type) {
|
---|
| 2122 | case ENCLOSE_MEMORY:
|
---|
| 2123 | return NQ_TARGET_IS_EMPTY_MEM;
|
---|
| 2124 | break;
|
---|
| 2125 |
|
---|
| 2126 | case ENCLOSE_OPTION:
|
---|
| 2127 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2128 | case ENCLOSE_CONDITION:
|
---|
[331] | 2129 | case ENCLOSE_ABSENT:
|
---|
[279] | 2130 | r = quantifiers_memory_node_info(en->target);
|
---|
| 2131 | break;
|
---|
| 2132 | default:
|
---|
| 2133 | break;
|
---|
| 2134 | }
|
---|
| 2135 | }
|
---|
| 2136 | break;
|
---|
| 2137 |
|
---|
| 2138 | case NT_BREF:
|
---|
| 2139 | case NT_STR:
|
---|
| 2140 | case NT_CTYPE:
|
---|
| 2141 | case NT_CCLASS:
|
---|
| 2142 | case NT_CANY:
|
---|
| 2143 | case NT_ANCHOR:
|
---|
| 2144 | default:
|
---|
| 2145 | break;
|
---|
| 2146 | }
|
---|
| 2147 |
|
---|
| 2148 | return r;
|
---|
| 2149 | }
|
---|
| 2150 | #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
|
---|
| 2151 |
|
---|
| 2152 | static int
|
---|
| 2153 | get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
|
---|
| 2154 | {
|
---|
| 2155 | OnigDistance tmin;
|
---|
| 2156 | int r = 0;
|
---|
| 2157 |
|
---|
| 2158 | *min = 0;
|
---|
| 2159 | switch (NTYPE(node)) {
|
---|
| 2160 | case NT_BREF:
|
---|
| 2161 | {
|
---|
| 2162 | int i;
|
---|
| 2163 | int* backs;
|
---|
| 2164 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 2165 | BRefNode* br = NBREF(node);
|
---|
| 2166 | if (br->state & NST_RECURSION) break;
|
---|
| 2167 |
|
---|
| 2168 | backs = BACKREFS_P(br);
|
---|
| 2169 | if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 2170 | r = get_min_match_length(nodes[backs[0]], min, env);
|
---|
| 2171 | if (r != 0) break;
|
---|
| 2172 | for (i = 1; i < br->back_num; i++) {
|
---|
| 2173 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 2174 | r = get_min_match_length(nodes[backs[i]], &tmin, env);
|
---|
| 2175 | if (r != 0) break;
|
---|
| 2176 | if (*min > tmin) *min = tmin;
|
---|
| 2177 | }
|
---|
| 2178 | }
|
---|
| 2179 | break;
|
---|
| 2180 |
|
---|
| 2181 | #ifdef USE_SUBEXP_CALL
|
---|
| 2182 | case NT_CALL:
|
---|
| 2183 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
| 2184 | EncloseNode* en = NENCLOSE(NCALL(node)->target);
|
---|
| 2185 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
| 2186 | *min = en->min_len;
|
---|
| 2187 | }
|
---|
| 2188 | else
|
---|
| 2189 | r = get_min_match_length(NCALL(node)->target, min, env);
|
---|
| 2190 | break;
|
---|
| 2191 | #endif
|
---|
| 2192 |
|
---|
| 2193 | case NT_LIST:
|
---|
| 2194 | do {
|
---|
| 2195 | r = get_min_match_length(NCAR(node), &tmin, env);
|
---|
| 2196 | if (r == 0) *min += tmin;
|
---|
| 2197 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2198 | break;
|
---|
| 2199 |
|
---|
| 2200 | case NT_ALT:
|
---|
| 2201 | {
|
---|
| 2202 | Node *x, *y;
|
---|
| 2203 | y = node;
|
---|
| 2204 | do {
|
---|
| 2205 | x = NCAR(y);
|
---|
| 2206 | r = get_min_match_length(x, &tmin, env);
|
---|
| 2207 | if (r != 0) break;
|
---|
| 2208 | if (y == node) *min = tmin;
|
---|
| 2209 | else if (*min > tmin) *min = tmin;
|
---|
| 2210 | } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
|
---|
| 2211 | }
|
---|
| 2212 | break;
|
---|
| 2213 |
|
---|
| 2214 | case NT_STR:
|
---|
| 2215 | {
|
---|
| 2216 | StrNode* sn = NSTR(node);
|
---|
| 2217 | *min = sn->end - sn->s;
|
---|
| 2218 | }
|
---|
| 2219 | break;
|
---|
| 2220 |
|
---|
| 2221 | case NT_CTYPE:
|
---|
| 2222 | *min = 1;
|
---|
| 2223 | break;
|
---|
| 2224 |
|
---|
| 2225 | case NT_CCLASS:
|
---|
| 2226 | case NT_CANY:
|
---|
| 2227 | *min = 1;
|
---|
| 2228 | break;
|
---|
| 2229 |
|
---|
| 2230 | case NT_QTFR:
|
---|
| 2231 | {
|
---|
| 2232 | QtfrNode* qn = NQTFR(node);
|
---|
| 2233 |
|
---|
| 2234 | if (qn->lower > 0) {
|
---|
| 2235 | r = get_min_match_length(qn->target, min, env);
|
---|
| 2236 | if (r == 0)
|
---|
| 2237 | *min = distance_multiply(*min, qn->lower);
|
---|
| 2238 | }
|
---|
| 2239 | }
|
---|
| 2240 | break;
|
---|
| 2241 |
|
---|
| 2242 | case NT_ENCLOSE:
|
---|
| 2243 | {
|
---|
| 2244 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2245 | switch (en->type) {
|
---|
| 2246 | case ENCLOSE_MEMORY:
|
---|
[331] | 2247 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
| 2248 | *min = en->min_len;
|
---|
| 2249 | else {
|
---|
| 2250 | if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
| 2251 | *min = 0; /* recursive */
|
---|
| 2252 | else {
|
---|
| 2253 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 2254 | r = get_min_match_length(en->target, min, env);
|
---|
| 2255 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 2256 | if (r == 0) {
|
---|
| 2257 | en->min_len = *min;
|
---|
| 2258 | SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
|
---|
| 2259 | }
|
---|
[279] | 2260 | }
|
---|
[331] | 2261 | }
|
---|
| 2262 | break;
|
---|
| 2263 |
|
---|
[279] | 2264 | case ENCLOSE_OPTION:
|
---|
| 2265 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2266 | case ENCLOSE_CONDITION:
|
---|
| 2267 | r = get_min_match_length(en->target, min, env);
|
---|
| 2268 | break;
|
---|
[331] | 2269 |
|
---|
| 2270 | case ENCLOSE_ABSENT:
|
---|
| 2271 | break;
|
---|
[279] | 2272 | }
|
---|
| 2273 | }
|
---|
| 2274 | break;
|
---|
| 2275 |
|
---|
| 2276 | case NT_ANCHOR:
|
---|
| 2277 | default:
|
---|
| 2278 | break;
|
---|
| 2279 | }
|
---|
| 2280 |
|
---|
| 2281 | return r;
|
---|
| 2282 | }
|
---|
| 2283 |
|
---|
| 2284 | static int
|
---|
| 2285 | get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
|
---|
| 2286 | {
|
---|
| 2287 | OnigDistance tmax;
|
---|
| 2288 | int r = 0;
|
---|
| 2289 |
|
---|
| 2290 | *max = 0;
|
---|
| 2291 | switch (NTYPE(node)) {
|
---|
| 2292 | case NT_LIST:
|
---|
| 2293 | do {
|
---|
| 2294 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
| 2295 | if (r == 0)
|
---|
| 2296 | *max = distance_add(*max, tmax);
|
---|
| 2297 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2298 | break;
|
---|
| 2299 |
|
---|
| 2300 | case NT_ALT:
|
---|
| 2301 | do {
|
---|
| 2302 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
| 2303 | if (r == 0 && *max < tmax) *max = tmax;
|
---|
| 2304 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2305 | break;
|
---|
| 2306 |
|
---|
| 2307 | case NT_STR:
|
---|
| 2308 | {
|
---|
| 2309 | StrNode* sn = NSTR(node);
|
---|
| 2310 | *max = sn->end - sn->s;
|
---|
| 2311 | }
|
---|
| 2312 | break;
|
---|
| 2313 |
|
---|
| 2314 | case NT_CTYPE:
|
---|
| 2315 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 2316 | break;
|
---|
| 2317 |
|
---|
| 2318 | case NT_CCLASS:
|
---|
| 2319 | case NT_CANY:
|
---|
| 2320 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 2321 | break;
|
---|
| 2322 |
|
---|
| 2323 | case NT_BREF:
|
---|
| 2324 | {
|
---|
| 2325 | int i;
|
---|
| 2326 | int* backs;
|
---|
| 2327 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 2328 | BRefNode* br = NBREF(node);
|
---|
| 2329 | if (br->state & NST_RECURSION) {
|
---|
| 2330 | *max = ONIG_INFINITE_DISTANCE;
|
---|
| 2331 | break;
|
---|
| 2332 | }
|
---|
| 2333 | backs = BACKREFS_P(br);
|
---|
| 2334 | for (i = 0; i < br->back_num; i++) {
|
---|
| 2335 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 2336 | r = get_max_match_length(nodes[backs[i]], &tmax, env);
|
---|
| 2337 | if (r != 0) break;
|
---|
| 2338 | if (*max < tmax) *max = tmax;
|
---|
| 2339 | }
|
---|
| 2340 | }
|
---|
| 2341 | break;
|
---|
| 2342 |
|
---|
| 2343 | #ifdef USE_SUBEXP_CALL
|
---|
| 2344 | case NT_CALL:
|
---|
| 2345 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
| 2346 | r = get_max_match_length(NCALL(node)->target, max, env);
|
---|
| 2347 | else
|
---|
| 2348 | *max = ONIG_INFINITE_DISTANCE;
|
---|
| 2349 | break;
|
---|
| 2350 | #endif
|
---|
| 2351 |
|
---|
| 2352 | case NT_QTFR:
|
---|
| 2353 | {
|
---|
| 2354 | QtfrNode* qn = NQTFR(node);
|
---|
| 2355 |
|
---|
| 2356 | if (qn->upper != 0) {
|
---|
| 2357 | r = get_max_match_length(qn->target, max, env);
|
---|
| 2358 | if (r == 0 && *max != 0) {
|
---|
| 2359 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
| 2360 | *max = distance_multiply(*max, qn->upper);
|
---|
| 2361 | else
|
---|
| 2362 | *max = ONIG_INFINITE_DISTANCE;
|
---|
| 2363 | }
|
---|
| 2364 | }
|
---|
| 2365 | }
|
---|
| 2366 | break;
|
---|
| 2367 |
|
---|
| 2368 | case NT_ENCLOSE:
|
---|
| 2369 | {
|
---|
| 2370 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2371 | switch (en->type) {
|
---|
| 2372 | case ENCLOSE_MEMORY:
|
---|
| 2373 | if (IS_ENCLOSE_MAX_FIXED(en))
|
---|
| 2374 | *max = en->max_len;
|
---|
| 2375 | else {
|
---|
[331] | 2376 | if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
| 2377 | *max = ONIG_INFINITE_DISTANCE;
|
---|
| 2378 | else {
|
---|
| 2379 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 2380 | r = get_max_match_length(en->target, max, env);
|
---|
| 2381 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 2382 | if (r == 0) {
|
---|
| 2383 | en->max_len = *max;
|
---|
| 2384 | SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
|
---|
| 2385 | }
|
---|
[279] | 2386 | }
|
---|
| 2387 | }
|
---|
| 2388 | break;
|
---|
[331] | 2389 |
|
---|
[279] | 2390 | case ENCLOSE_OPTION:
|
---|
| 2391 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2392 | case ENCLOSE_CONDITION:
|
---|
| 2393 | r = get_max_match_length(en->target, max, env);
|
---|
| 2394 | break;
|
---|
[331] | 2395 |
|
---|
| 2396 | case ENCLOSE_ABSENT:
|
---|
| 2397 | break;
|
---|
[279] | 2398 | }
|
---|
| 2399 | }
|
---|
| 2400 | break;
|
---|
| 2401 |
|
---|
| 2402 | case NT_ANCHOR:
|
---|
| 2403 | default:
|
---|
| 2404 | break;
|
---|
| 2405 | }
|
---|
| 2406 |
|
---|
| 2407 | return r;
|
---|
| 2408 | }
|
---|
| 2409 |
|
---|
| 2410 | #define GET_CHAR_LEN_VARLEN -1
|
---|
| 2411 | #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
|
---|
| 2412 |
|
---|
| 2413 | /* fixed size pattern node only */
|
---|
| 2414 | static int
|
---|
| 2415 | get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
|
---|
| 2416 | {
|
---|
| 2417 | int tlen;
|
---|
| 2418 | int r = 0;
|
---|
| 2419 |
|
---|
| 2420 | level++;
|
---|
| 2421 | *len = 0;
|
---|
| 2422 | switch (NTYPE(node)) {
|
---|
| 2423 | case NT_LIST:
|
---|
| 2424 | do {
|
---|
| 2425 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
| 2426 | if (r == 0)
|
---|
| 2427 | *len = (int )distance_add(*len, tlen);
|
---|
| 2428 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2429 | break;
|
---|
| 2430 |
|
---|
| 2431 | case NT_ALT:
|
---|
| 2432 | {
|
---|
| 2433 | int tlen2;
|
---|
| 2434 | int varlen = 0;
|
---|
| 2435 |
|
---|
| 2436 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
| 2437 | while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
|
---|
| 2438 | r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
|
---|
| 2439 | if (r == 0) {
|
---|
| 2440 | if (tlen != tlen2)
|
---|
| 2441 | varlen = 1;
|
---|
| 2442 | }
|
---|
| 2443 | }
|
---|
| 2444 | if (r == 0) {
|
---|
| 2445 | if (varlen != 0) {
|
---|
| 2446 | if (level == 1)
|
---|
| 2447 | r = GET_CHAR_LEN_TOP_ALT_VARLEN;
|
---|
| 2448 | else
|
---|
| 2449 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2450 | }
|
---|
| 2451 | else
|
---|
| 2452 | *len = tlen;
|
---|
| 2453 | }
|
---|
| 2454 | }
|
---|
| 2455 | break;
|
---|
| 2456 |
|
---|
| 2457 | case NT_STR:
|
---|
| 2458 | {
|
---|
| 2459 | StrNode* sn = NSTR(node);
|
---|
| 2460 | UChar *s = sn->s;
|
---|
| 2461 | while (s < sn->end) {
|
---|
[331] | 2462 | s += enclen(reg->enc, s, sn->end);
|
---|
[279] | 2463 | (*len)++;
|
---|
| 2464 | }
|
---|
| 2465 | }
|
---|
| 2466 | break;
|
---|
| 2467 |
|
---|
| 2468 | case NT_QTFR:
|
---|
| 2469 | {
|
---|
| 2470 | QtfrNode* qn = NQTFR(node);
|
---|
| 2471 | if (qn->lower == qn->upper) {
|
---|
| 2472 | r = get_char_length_tree1(qn->target, reg, &tlen, level);
|
---|
| 2473 | if (r == 0)
|
---|
| 2474 | *len = (int )distance_multiply(tlen, qn->lower);
|
---|
| 2475 | }
|
---|
| 2476 | else
|
---|
| 2477 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2478 | }
|
---|
| 2479 | break;
|
---|
| 2480 |
|
---|
| 2481 | #ifdef USE_SUBEXP_CALL
|
---|
| 2482 | case NT_CALL:
|
---|
| 2483 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
| 2484 | r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
|
---|
| 2485 | else
|
---|
| 2486 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2487 | break;
|
---|
| 2488 | #endif
|
---|
| 2489 |
|
---|
| 2490 | case NT_CTYPE:
|
---|
| 2491 | *len = 1;
|
---|
| 2492 | break;
|
---|
| 2493 |
|
---|
| 2494 | case NT_CCLASS:
|
---|
| 2495 | case NT_CANY:
|
---|
| 2496 | *len = 1;
|
---|
| 2497 | break;
|
---|
| 2498 |
|
---|
| 2499 | case NT_ENCLOSE:
|
---|
| 2500 | {
|
---|
| 2501 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2502 | switch (en->type) {
|
---|
| 2503 | case ENCLOSE_MEMORY:
|
---|
| 2504 | #ifdef USE_SUBEXP_CALL
|
---|
| 2505 | if (IS_ENCLOSE_CLEN_FIXED(en))
|
---|
| 2506 | *len = en->char_len;
|
---|
| 2507 | else {
|
---|
| 2508 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
| 2509 | if (r == 0) {
|
---|
| 2510 | en->char_len = *len;
|
---|
| 2511 | SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
|
---|
| 2512 | }
|
---|
| 2513 | }
|
---|
| 2514 | break;
|
---|
| 2515 | #endif
|
---|
| 2516 | case ENCLOSE_OPTION:
|
---|
| 2517 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2518 | case ENCLOSE_CONDITION:
|
---|
| 2519 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
| 2520 | break;
|
---|
[331] | 2521 | case ENCLOSE_ABSENT:
|
---|
[279] | 2522 | default:
|
---|
| 2523 | break;
|
---|
| 2524 | }
|
---|
| 2525 | }
|
---|
| 2526 | break;
|
---|
| 2527 |
|
---|
| 2528 | case NT_ANCHOR:
|
---|
| 2529 | break;
|
---|
| 2530 |
|
---|
| 2531 | default:
|
---|
| 2532 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2533 | break;
|
---|
| 2534 | }
|
---|
| 2535 |
|
---|
| 2536 | return r;
|
---|
| 2537 | }
|
---|
| 2538 |
|
---|
| 2539 | static int
|
---|
| 2540 | get_char_length_tree(Node* node, regex_t* reg, int* len)
|
---|
| 2541 | {
|
---|
| 2542 | return get_char_length_tree1(node, reg, len, 0);
|
---|
| 2543 | }
|
---|
| 2544 |
|
---|
| 2545 | /* x is not included y ==> 1 : 0 */
|
---|
| 2546 | static int
|
---|
| 2547 | is_not_included(Node* x, Node* y, regex_t* reg)
|
---|
| 2548 | {
|
---|
| 2549 | int i;
|
---|
| 2550 | OnigDistance len;
|
---|
| 2551 | OnigCodePoint code;
|
---|
| 2552 | UChar *p;
|
---|
| 2553 | int ytype;
|
---|
| 2554 |
|
---|
| 2555 | retry:
|
---|
| 2556 | ytype = NTYPE(y);
|
---|
| 2557 | switch (NTYPE(x)) {
|
---|
| 2558 | case NT_CTYPE:
|
---|
| 2559 | {
|
---|
| 2560 | switch (ytype) {
|
---|
| 2561 | case NT_CTYPE:
|
---|
| 2562 | if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
|
---|
| 2563 | NCTYPE(y)->not != NCTYPE(x)->not &&
|
---|
| 2564 | NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
|
---|
| 2565 | return 1;
|
---|
| 2566 | else
|
---|
| 2567 | return 0;
|
---|
| 2568 | break;
|
---|
| 2569 |
|
---|
| 2570 | case NT_CCLASS:
|
---|
| 2571 | swap:
|
---|
| 2572 | {
|
---|
| 2573 | Node* tmp;
|
---|
| 2574 | tmp = x; x = y; y = tmp;
|
---|
| 2575 | goto retry;
|
---|
| 2576 | }
|
---|
| 2577 | break;
|
---|
| 2578 |
|
---|
| 2579 | case NT_STR:
|
---|
| 2580 | goto swap;
|
---|
| 2581 | break;
|
---|
| 2582 |
|
---|
| 2583 | default:
|
---|
| 2584 | break;
|
---|
| 2585 | }
|
---|
| 2586 | }
|
---|
| 2587 | break;
|
---|
| 2588 |
|
---|
| 2589 | case NT_CCLASS:
|
---|
| 2590 | {
|
---|
| 2591 | CClassNode* xc = NCCLASS(x);
|
---|
| 2592 | switch (ytype) {
|
---|
| 2593 | case NT_CTYPE:
|
---|
| 2594 | switch (NCTYPE(y)->ctype) {
|
---|
| 2595 | case ONIGENC_CTYPE_WORD:
|
---|
| 2596 | if (NCTYPE(y)->not == 0) {
|
---|
| 2597 | if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
|
---|
| 2598 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 2599 | if (BITSET_AT(xc->bs, i)) {
|
---|
| 2600 | if (NCTYPE(y)->ascii_range) {
|
---|
| 2601 | if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
|
---|
| 2602 | }
|
---|
| 2603 | else {
|
---|
| 2604 | if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
|
---|
| 2605 | }
|
---|
| 2606 | }
|
---|
| 2607 | }
|
---|
| 2608 | return 1;
|
---|
| 2609 | }
|
---|
| 2610 | return 0;
|
---|
| 2611 | }
|
---|
| 2612 | else {
|
---|
| 2613 | if (IS_NOT_NULL(xc->mbuf)) return 0;
|
---|
| 2614 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 2615 | int is_word;
|
---|
| 2616 | if (NCTYPE(y)->ascii_range)
|
---|
| 2617 | is_word = IS_CODE_SB_WORD(reg->enc, i);
|
---|
| 2618 | else
|
---|
| 2619 | is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
|
---|
| 2620 | if (! is_word) {
|
---|
| 2621 | if (!IS_NCCLASS_NOT(xc)) {
|
---|
| 2622 | if (BITSET_AT(xc->bs, i))
|
---|
| 2623 | return 0;
|
---|
| 2624 | }
|
---|
| 2625 | else {
|
---|
| 2626 | if (! BITSET_AT(xc->bs, i))
|
---|
| 2627 | return 0;
|
---|
| 2628 | }
|
---|
| 2629 | }
|
---|
| 2630 | }
|
---|
| 2631 | return 1;
|
---|
| 2632 | }
|
---|
| 2633 | break;
|
---|
| 2634 |
|
---|
| 2635 | default:
|
---|
| 2636 | break;
|
---|
| 2637 | }
|
---|
| 2638 | break;
|
---|
| 2639 |
|
---|
| 2640 | case NT_CCLASS:
|
---|
| 2641 | {
|
---|
| 2642 | int v;
|
---|
| 2643 | CClassNode* yc = NCCLASS(y);
|
---|
| 2644 |
|
---|
| 2645 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 2646 | v = BITSET_AT(xc->bs, i);
|
---|
| 2647 | if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
|
---|
[331] | 2648 | (v == 0 && IS_NCCLASS_NOT(xc))) {
|
---|
[279] | 2649 | v = BITSET_AT(yc->bs, i);
|
---|
| 2650 | if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
|
---|
[331] | 2651 | (v == 0 && IS_NCCLASS_NOT(yc)))
|
---|
[279] | 2652 | return 0;
|
---|
| 2653 | }
|
---|
| 2654 | }
|
---|
| 2655 | if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
|
---|
| 2656 | (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
|
---|
| 2657 | return 1;
|
---|
| 2658 | return 0;
|
---|
| 2659 | }
|
---|
| 2660 | break;
|
---|
| 2661 |
|
---|
| 2662 | case NT_STR:
|
---|
| 2663 | goto swap;
|
---|
| 2664 | break;
|
---|
| 2665 |
|
---|
| 2666 | default:
|
---|
| 2667 | break;
|
---|
| 2668 | }
|
---|
| 2669 | }
|
---|
| 2670 | break;
|
---|
| 2671 |
|
---|
| 2672 | case NT_STR:
|
---|
| 2673 | {
|
---|
| 2674 | StrNode* xs = NSTR(x);
|
---|
| 2675 | if (NSTRING_LEN(x) == 0)
|
---|
| 2676 | break;
|
---|
| 2677 |
|
---|
| 2678 | switch (ytype) {
|
---|
| 2679 | case NT_CTYPE:
|
---|
| 2680 | switch (NCTYPE(y)->ctype) {
|
---|
| 2681 | case ONIGENC_CTYPE_WORD:
|
---|
| 2682 | if (NCTYPE(y)->ascii_range) {
|
---|
| 2683 | if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
|
---|
| 2684 | return NCTYPE(y)->not;
|
---|
| 2685 | else
|
---|
| 2686 | return !(NCTYPE(y)->not);
|
---|
| 2687 | }
|
---|
| 2688 | else {
|
---|
| 2689 | if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
|
---|
| 2690 | return NCTYPE(y)->not;
|
---|
| 2691 | else
|
---|
| 2692 | return !(NCTYPE(y)->not);
|
---|
| 2693 | }
|
---|
| 2694 | break;
|
---|
| 2695 | default:
|
---|
| 2696 | break;
|
---|
| 2697 | }
|
---|
| 2698 | break;
|
---|
| 2699 |
|
---|
| 2700 | case NT_CCLASS:
|
---|
[331] | 2701 | {
|
---|
| 2702 | CClassNode* cc = NCCLASS(y);
|
---|
[279] | 2703 |
|
---|
[331] | 2704 | code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
|
---|
| 2705 | xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
|
---|
| 2706 | return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
|
---|
| 2707 | }
|
---|
| 2708 | break;
|
---|
[279] | 2709 |
|
---|
| 2710 | case NT_STR:
|
---|
[331] | 2711 | {
|
---|
| 2712 | UChar *q;
|
---|
| 2713 | StrNode* ys = NSTR(y);
|
---|
| 2714 | len = NSTRING_LEN(x);
|
---|
| 2715 | if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
|
---|
| 2716 | if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
|
---|
| 2717 | /* tiny version */
|
---|
| 2718 | return 0;
|
---|
[279] | 2719 | }
|
---|
| 2720 | else {
|
---|
| 2721 | for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
|
---|
| 2722 | if (*p != *q) return 1;
|
---|
| 2723 | }
|
---|
| 2724 | }
|
---|
| 2725 | }
|
---|
| 2726 | break;
|
---|
| 2727 |
|
---|
| 2728 | default:
|
---|
[331] | 2729 | break;
|
---|
[279] | 2730 | }
|
---|
| 2731 | }
|
---|
| 2732 | break;
|
---|
| 2733 |
|
---|
| 2734 | default:
|
---|
| 2735 | break;
|
---|
| 2736 | }
|
---|
| 2737 |
|
---|
| 2738 | return 0;
|
---|
| 2739 | }
|
---|
| 2740 |
|
---|
| 2741 | static Node*
|
---|
| 2742 | get_head_value_node(Node* node, int exact, regex_t* reg)
|
---|
| 2743 | {
|
---|
| 2744 | Node* n = NULL_NODE;
|
---|
| 2745 |
|
---|
| 2746 | switch (NTYPE(node)) {
|
---|
| 2747 | case NT_BREF:
|
---|
| 2748 | case NT_ALT:
|
---|
| 2749 | case NT_CANY:
|
---|
| 2750 | #ifdef USE_SUBEXP_CALL
|
---|
| 2751 | case NT_CALL:
|
---|
| 2752 | #endif
|
---|
| 2753 | break;
|
---|
| 2754 |
|
---|
| 2755 | case NT_CTYPE:
|
---|
| 2756 | case NT_CCLASS:
|
---|
| 2757 | if (exact == 0) {
|
---|
| 2758 | n = node;
|
---|
| 2759 | }
|
---|
| 2760 | break;
|
---|
| 2761 |
|
---|
| 2762 | case NT_LIST:
|
---|
| 2763 | n = get_head_value_node(NCAR(node), exact, reg);
|
---|
| 2764 | break;
|
---|
| 2765 |
|
---|
| 2766 | case NT_STR:
|
---|
| 2767 | {
|
---|
| 2768 | StrNode* sn = NSTR(node);
|
---|
| 2769 |
|
---|
| 2770 | if (sn->end <= sn->s)
|
---|
| 2771 | break;
|
---|
| 2772 |
|
---|
| 2773 | if (exact != 0 &&
|
---|
| 2774 | !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
|
---|
| 2775 | }
|
---|
| 2776 | else {
|
---|
| 2777 | n = node;
|
---|
| 2778 | }
|
---|
| 2779 | }
|
---|
| 2780 | break;
|
---|
| 2781 |
|
---|
| 2782 | case NT_QTFR:
|
---|
| 2783 | {
|
---|
| 2784 | QtfrNode* qn = NQTFR(node);
|
---|
| 2785 | if (qn->lower > 0) {
|
---|
[331] | 2786 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
[279] | 2787 | if (IS_NOT_NULL(qn->head_exact))
|
---|
| 2788 | n = qn->head_exact;
|
---|
| 2789 | else
|
---|
[331] | 2790 | #endif
|
---|
[279] | 2791 | n = get_head_value_node(qn->target, exact, reg);
|
---|
| 2792 | }
|
---|
| 2793 | }
|
---|
| 2794 | break;
|
---|
| 2795 |
|
---|
| 2796 | case NT_ENCLOSE:
|
---|
| 2797 | {
|
---|
| 2798 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2799 | switch (en->type) {
|
---|
| 2800 | case ENCLOSE_OPTION:
|
---|
| 2801 | {
|
---|
| 2802 | OnigOptionType options = reg->options;
|
---|
| 2803 |
|
---|
| 2804 | reg->options = NENCLOSE(node)->option;
|
---|
| 2805 | n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
|
---|
| 2806 | reg->options = options;
|
---|
| 2807 | }
|
---|
| 2808 | break;
|
---|
| 2809 |
|
---|
| 2810 | case ENCLOSE_MEMORY:
|
---|
| 2811 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2812 | case ENCLOSE_CONDITION:
|
---|
| 2813 | n = get_head_value_node(en->target, exact, reg);
|
---|
| 2814 | break;
|
---|
[331] | 2815 |
|
---|
| 2816 | case ENCLOSE_ABSENT:
|
---|
| 2817 | break;
|
---|
[279] | 2818 | }
|
---|
| 2819 | }
|
---|
| 2820 | break;
|
---|
| 2821 |
|
---|
| 2822 | case NT_ANCHOR:
|
---|
| 2823 | if (NANCHOR(node)->type == ANCHOR_PREC_READ)
|
---|
| 2824 | n = get_head_value_node(NANCHOR(node)->target, exact, reg);
|
---|
| 2825 | break;
|
---|
| 2826 |
|
---|
| 2827 | default:
|
---|
| 2828 | break;
|
---|
| 2829 | }
|
---|
| 2830 |
|
---|
| 2831 | return n;
|
---|
| 2832 | }
|
---|
| 2833 |
|
---|
| 2834 | static int
|
---|
| 2835 | check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
|
---|
| 2836 | {
|
---|
| 2837 | int type, r = 0;
|
---|
| 2838 |
|
---|
| 2839 | type = NTYPE(node);
|
---|
| 2840 | if ((NTYPE2BIT(type) & type_mask) == 0)
|
---|
| 2841 | return 1;
|
---|
| 2842 |
|
---|
| 2843 | switch (type) {
|
---|
| 2844 | case NT_LIST:
|
---|
| 2845 | case NT_ALT:
|
---|
| 2846 | do {
|
---|
| 2847 | r = check_type_tree(NCAR(node), type_mask, enclose_mask,
|
---|
| 2848 | anchor_mask);
|
---|
| 2849 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2850 | break;
|
---|
| 2851 |
|
---|
| 2852 | case NT_QTFR:
|
---|
| 2853 | r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
|
---|
| 2854 | anchor_mask);
|
---|
| 2855 | break;
|
---|
| 2856 |
|
---|
| 2857 | case NT_ENCLOSE:
|
---|
| 2858 | {
|
---|
| 2859 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2860 | if ((en->type & enclose_mask) == 0)
|
---|
| 2861 | return 1;
|
---|
| 2862 |
|
---|
| 2863 | r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
|
---|
| 2864 | }
|
---|
| 2865 | break;
|
---|
| 2866 |
|
---|
| 2867 | case NT_ANCHOR:
|
---|
| 2868 | type = NANCHOR(node)->type;
|
---|
| 2869 | if ((type & anchor_mask) == 0)
|
---|
| 2870 | return 1;
|
---|
| 2871 |
|
---|
| 2872 | if (NANCHOR(node)->target)
|
---|
| 2873 | r = check_type_tree(NANCHOR(node)->target,
|
---|
| 2874 | type_mask, enclose_mask, anchor_mask);
|
---|
| 2875 | break;
|
---|
| 2876 |
|
---|
| 2877 | default:
|
---|
| 2878 | break;
|
---|
| 2879 | }
|
---|
| 2880 | return r;
|
---|
| 2881 | }
|
---|
| 2882 |
|
---|
| 2883 | #ifdef USE_SUBEXP_CALL
|
---|
| 2884 |
|
---|
[331] | 2885 | # define RECURSION_EXIST 1
|
---|
| 2886 | # define RECURSION_INFINITE 2
|
---|
[279] | 2887 |
|
---|
| 2888 | static int
|
---|
| 2889 | subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
|
---|
| 2890 | {
|
---|
| 2891 | int type;
|
---|
| 2892 | int r = 0;
|
---|
| 2893 |
|
---|
| 2894 | type = NTYPE(node);
|
---|
| 2895 | switch (type) {
|
---|
| 2896 | case NT_LIST:
|
---|
| 2897 | {
|
---|
| 2898 | Node *x;
|
---|
| 2899 | OnigDistance min;
|
---|
| 2900 | int ret;
|
---|
| 2901 |
|
---|
| 2902 | x = node;
|
---|
| 2903 | do {
|
---|
| 2904 | ret = subexp_inf_recursive_check(NCAR(x), env, head);
|
---|
| 2905 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
| 2906 | r |= ret;
|
---|
| 2907 | if (head) {
|
---|
| 2908 | ret = get_min_match_length(NCAR(x), &min, env);
|
---|
| 2909 | if (ret != 0) return ret;
|
---|
| 2910 | if (min != 0) head = 0;
|
---|
| 2911 | }
|
---|
| 2912 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
| 2913 | }
|
---|
| 2914 | break;
|
---|
| 2915 |
|
---|
| 2916 | case NT_ALT:
|
---|
| 2917 | {
|
---|
| 2918 | int ret;
|
---|
| 2919 | r = RECURSION_EXIST;
|
---|
| 2920 | do {
|
---|
| 2921 | ret = subexp_inf_recursive_check(NCAR(node), env, head);
|
---|
| 2922 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
| 2923 | r &= ret;
|
---|
| 2924 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2925 | }
|
---|
| 2926 | break;
|
---|
| 2927 |
|
---|
| 2928 | case NT_QTFR:
|
---|
| 2929 | r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
|
---|
| 2930 | if (r == RECURSION_EXIST) {
|
---|
| 2931 | if (NQTFR(node)->lower == 0) r = 0;
|
---|
| 2932 | }
|
---|
| 2933 | break;
|
---|
| 2934 |
|
---|
| 2935 | case NT_ANCHOR:
|
---|
| 2936 | {
|
---|
| 2937 | AnchorNode* an = NANCHOR(node);
|
---|
| 2938 | switch (an->type) {
|
---|
| 2939 | case ANCHOR_PREC_READ:
|
---|
| 2940 | case ANCHOR_PREC_READ_NOT:
|
---|
| 2941 | case ANCHOR_LOOK_BEHIND:
|
---|
| 2942 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 2943 | r = subexp_inf_recursive_check(an->target, env, head);
|
---|
| 2944 | break;
|
---|
| 2945 | }
|
---|
| 2946 | }
|
---|
| 2947 | break;
|
---|
| 2948 |
|
---|
| 2949 | case NT_CALL:
|
---|
| 2950 | r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
|
---|
| 2951 | break;
|
---|
| 2952 |
|
---|
| 2953 | case NT_ENCLOSE:
|
---|
| 2954 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
| 2955 | return 0;
|
---|
| 2956 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
| 2957 | return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
|
---|
| 2958 | else {
|
---|
| 2959 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 2960 | r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
|
---|
| 2961 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 2962 | }
|
---|
| 2963 | break;
|
---|
| 2964 |
|
---|
| 2965 | default:
|
---|
| 2966 | break;
|
---|
| 2967 | }
|
---|
| 2968 |
|
---|
| 2969 | return r;
|
---|
| 2970 | }
|
---|
| 2971 |
|
---|
| 2972 | static int
|
---|
| 2973 | subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
| 2974 | {
|
---|
| 2975 | int type;
|
---|
| 2976 | int r = 0;
|
---|
| 2977 |
|
---|
| 2978 | type = NTYPE(node);
|
---|
| 2979 | switch (type) {
|
---|
| 2980 | case NT_LIST:
|
---|
| 2981 | case NT_ALT:
|
---|
| 2982 | do {
|
---|
| 2983 | r = subexp_inf_recursive_check_trav(NCAR(node), env);
|
---|
| 2984 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2985 | break;
|
---|
| 2986 |
|
---|
| 2987 | case NT_QTFR:
|
---|
| 2988 | r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
|
---|
| 2989 | break;
|
---|
| 2990 |
|
---|
| 2991 | case NT_ANCHOR:
|
---|
| 2992 | {
|
---|
| 2993 | AnchorNode* an = NANCHOR(node);
|
---|
| 2994 | switch (an->type) {
|
---|
| 2995 | case ANCHOR_PREC_READ:
|
---|
| 2996 | case ANCHOR_PREC_READ_NOT:
|
---|
| 2997 | case ANCHOR_LOOK_BEHIND:
|
---|
| 2998 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 2999 | r = subexp_inf_recursive_check_trav(an->target, env);
|
---|
| 3000 | break;
|
---|
| 3001 | }
|
---|
| 3002 | }
|
---|
| 3003 | break;
|
---|
| 3004 |
|
---|
| 3005 | case NT_ENCLOSE:
|
---|
| 3006 | {
|
---|
| 3007 | EncloseNode* en = NENCLOSE(node);
|
---|
| 3008 |
|
---|
| 3009 | if (IS_ENCLOSE_RECURSION(en)) {
|
---|
| 3010 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 3011 | r = subexp_inf_recursive_check(en->target, env, 1);
|
---|
| 3012 | if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
|
---|
| 3013 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 3014 | }
|
---|
| 3015 | r = subexp_inf_recursive_check_trav(en->target, env);
|
---|
| 3016 | }
|
---|
| 3017 |
|
---|
| 3018 | break;
|
---|
| 3019 |
|
---|
| 3020 | default:
|
---|
| 3021 | break;
|
---|
| 3022 | }
|
---|
| 3023 |
|
---|
| 3024 | return r;
|
---|
| 3025 | }
|
---|
| 3026 |
|
---|
| 3027 | static int
|
---|
| 3028 | subexp_recursive_check(Node* node)
|
---|
| 3029 | {
|
---|
| 3030 | int r = 0;
|
---|
| 3031 |
|
---|
| 3032 | switch (NTYPE(node)) {
|
---|
| 3033 | case NT_LIST:
|
---|
| 3034 | case NT_ALT:
|
---|
| 3035 | do {
|
---|
| 3036 | r |= subexp_recursive_check(NCAR(node));
|
---|
| 3037 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3038 | break;
|
---|
| 3039 |
|
---|
| 3040 | case NT_QTFR:
|
---|
| 3041 | r = subexp_recursive_check(NQTFR(node)->target);
|
---|
| 3042 | break;
|
---|
| 3043 |
|
---|
| 3044 | case NT_ANCHOR:
|
---|
| 3045 | {
|
---|
| 3046 | AnchorNode* an = NANCHOR(node);
|
---|
| 3047 | switch (an->type) {
|
---|
| 3048 | case ANCHOR_PREC_READ:
|
---|
| 3049 | case ANCHOR_PREC_READ_NOT:
|
---|
| 3050 | case ANCHOR_LOOK_BEHIND:
|
---|
| 3051 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 3052 | r = subexp_recursive_check(an->target);
|
---|
| 3053 | break;
|
---|
| 3054 | }
|
---|
| 3055 | }
|
---|
| 3056 | break;
|
---|
| 3057 |
|
---|
| 3058 | case NT_CALL:
|
---|
| 3059 | r = subexp_recursive_check(NCALL(node)->target);
|
---|
| 3060 | if (r != 0) SET_CALL_RECURSION(node);
|
---|
| 3061 | break;
|
---|
| 3062 |
|
---|
| 3063 | case NT_ENCLOSE:
|
---|
| 3064 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
| 3065 | return 0;
|
---|
| 3066 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
| 3067 | return 1; /* recursion */
|
---|
| 3068 | else {
|
---|
| 3069 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 3070 | r = subexp_recursive_check(NENCLOSE(node)->target);
|
---|
| 3071 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 3072 | }
|
---|
| 3073 | break;
|
---|
| 3074 |
|
---|
| 3075 | default:
|
---|
| 3076 | break;
|
---|
| 3077 | }
|
---|
| 3078 |
|
---|
| 3079 | return r;
|
---|
| 3080 | }
|
---|
| 3081 |
|
---|
| 3082 |
|
---|
| 3083 | static int
|
---|
| 3084 | subexp_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
| 3085 | {
|
---|
[331] | 3086 | # define FOUND_CALLED_NODE 1
|
---|
[279] | 3087 |
|
---|
| 3088 | int type;
|
---|
| 3089 | int r = 0;
|
---|
| 3090 |
|
---|
| 3091 | type = NTYPE(node);
|
---|
| 3092 | switch (type) {
|
---|
| 3093 | case NT_LIST:
|
---|
| 3094 | case NT_ALT:
|
---|
| 3095 | {
|
---|
| 3096 | int ret;
|
---|
| 3097 | do {
|
---|
| 3098 | ret = subexp_recursive_check_trav(NCAR(node), env);
|
---|
| 3099 | if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
|
---|
| 3100 | else if (ret < 0) return ret;
|
---|
| 3101 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3102 | }
|
---|
| 3103 | break;
|
---|
| 3104 |
|
---|
| 3105 | case NT_QTFR:
|
---|
| 3106 | r = subexp_recursive_check_trav(NQTFR(node)->target, env);
|
---|
| 3107 | if (NQTFR(node)->upper == 0) {
|
---|
| 3108 | if (r == FOUND_CALLED_NODE)
|
---|
| 3109 | NQTFR(node)->is_refered = 1;
|
---|
| 3110 | }
|
---|
| 3111 | break;
|
---|
| 3112 |
|
---|
| 3113 | case NT_ANCHOR:
|
---|
| 3114 | {
|
---|
| 3115 | AnchorNode* an = NANCHOR(node);
|
---|
| 3116 | switch (an->type) {
|
---|
| 3117 | case ANCHOR_PREC_READ:
|
---|
| 3118 | case ANCHOR_PREC_READ_NOT:
|
---|
| 3119 | case ANCHOR_LOOK_BEHIND:
|
---|
| 3120 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 3121 | r = subexp_recursive_check_trav(an->target, env);
|
---|
| 3122 | break;
|
---|
| 3123 | }
|
---|
| 3124 | }
|
---|
| 3125 | break;
|
---|
| 3126 |
|
---|
| 3127 | case NT_ENCLOSE:
|
---|
| 3128 | {
|
---|
| 3129 | EncloseNode* en = NENCLOSE(node);
|
---|
| 3130 |
|
---|
| 3131 | if (! IS_ENCLOSE_RECURSION(en)) {
|
---|
| 3132 | if (IS_ENCLOSE_CALLED(en)) {
|
---|
| 3133 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 3134 | r = subexp_recursive_check(en->target);
|
---|
| 3135 | if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
|
---|
| 3136 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 3137 | }
|
---|
| 3138 | }
|
---|
| 3139 | r = subexp_recursive_check_trav(en->target, env);
|
---|
| 3140 | if (IS_ENCLOSE_CALLED(en))
|
---|
| 3141 | r |= FOUND_CALLED_NODE;
|
---|
| 3142 | }
|
---|
| 3143 | break;
|
---|
| 3144 |
|
---|
| 3145 | default:
|
---|
| 3146 | break;
|
---|
| 3147 | }
|
---|
| 3148 |
|
---|
| 3149 | return r;
|
---|
| 3150 | }
|
---|
| 3151 |
|
---|
| 3152 | static int
|
---|
| 3153 | setup_subexp_call(Node* node, ScanEnv* env)
|
---|
| 3154 | {
|
---|
| 3155 | int type;
|
---|
| 3156 | int r = 0;
|
---|
| 3157 |
|
---|
| 3158 | type = NTYPE(node);
|
---|
| 3159 | switch (type) {
|
---|
| 3160 | case NT_LIST:
|
---|
| 3161 | do {
|
---|
| 3162 | r = setup_subexp_call(NCAR(node), env);
|
---|
| 3163 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3164 | break;
|
---|
| 3165 |
|
---|
| 3166 | case NT_ALT:
|
---|
| 3167 | do {
|
---|
| 3168 | r = setup_subexp_call(NCAR(node), env);
|
---|
| 3169 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3170 | break;
|
---|
| 3171 |
|
---|
| 3172 | case NT_QTFR:
|
---|
| 3173 | r = setup_subexp_call(NQTFR(node)->target, env);
|
---|
| 3174 | break;
|
---|
| 3175 | case NT_ENCLOSE:
|
---|
| 3176 | r = setup_subexp_call(NENCLOSE(node)->target, env);
|
---|
| 3177 | break;
|
---|
| 3178 |
|
---|
| 3179 | case NT_CALL:
|
---|
| 3180 | {
|
---|
| 3181 | CallNode* cn = NCALL(node);
|
---|
| 3182 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 3183 |
|
---|
| 3184 | if (cn->group_num != 0) {
|
---|
| 3185 | int gnum = cn->group_num;
|
---|
| 3186 |
|
---|
[331] | 3187 | # ifdef USE_NAMED_GROUP
|
---|
[279] | 3188 | if (env->num_named > 0 &&
|
---|
| 3189 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
| 3190 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 3191 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 3192 | }
|
---|
[331] | 3193 | # endif
|
---|
[279] | 3194 | if (gnum > env->num_mem) {
|
---|
| 3195 | onig_scan_env_set_error_string(env,
|
---|
| 3196 | ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
|
---|
| 3197 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;
|
---|
| 3198 | }
|
---|
| 3199 |
|
---|
[331] | 3200 | # ifdef USE_NAMED_GROUP
|
---|
[279] | 3201 | set_call_attr:
|
---|
[331] | 3202 | # endif
|
---|
[279] | 3203 | cn->target = nodes[cn->group_num];
|
---|
| 3204 | if (IS_NULL(cn->target)) {
|
---|
| 3205 | onig_scan_env_set_error_string(env,
|
---|
| 3206 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
| 3207 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
| 3208 | }
|
---|
| 3209 | SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
|
---|
| 3210 | BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
|
---|
| 3211 | cn->unset_addr_list = env->unset_addr_list;
|
---|
| 3212 | }
|
---|
[331] | 3213 | # ifdef USE_NAMED_GROUP
|
---|
| 3214 | # ifdef USE_PERL_SUBEXP_CALL
|
---|
[279] | 3215 | else if (cn->name == cn->name_end) {
|
---|
| 3216 | goto set_call_attr;
|
---|
| 3217 | }
|
---|
[331] | 3218 | # endif
|
---|
[279] | 3219 | else {
|
---|
| 3220 | int *refs;
|
---|
| 3221 |
|
---|
| 3222 | int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
|
---|
| 3223 | &refs);
|
---|
| 3224 | if (n <= 0) {
|
---|
| 3225 | onig_scan_env_set_error_string(env,
|
---|
| 3226 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
| 3227 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
| 3228 | }
|
---|
| 3229 | else if (n > 1 &&
|
---|
| 3230 | ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
|
---|
| 3231 | onig_scan_env_set_error_string(env,
|
---|
| 3232 | ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
|
---|
| 3233 | return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
|
---|
| 3234 | }
|
---|
| 3235 | else {
|
---|
| 3236 | cn->group_num = refs[0];
|
---|
| 3237 | goto set_call_attr;
|
---|
| 3238 | }
|
---|
| 3239 | }
|
---|
[331] | 3240 | # endif
|
---|
[279] | 3241 | }
|
---|
| 3242 | break;
|
---|
| 3243 |
|
---|
| 3244 | case NT_ANCHOR:
|
---|
| 3245 | {
|
---|
| 3246 | AnchorNode* an = NANCHOR(node);
|
---|
| 3247 |
|
---|
| 3248 | switch (an->type) {
|
---|
| 3249 | case ANCHOR_PREC_READ:
|
---|
| 3250 | case ANCHOR_PREC_READ_NOT:
|
---|
| 3251 | case ANCHOR_LOOK_BEHIND:
|
---|
| 3252 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 3253 | r = setup_subexp_call(an->target, env);
|
---|
| 3254 | break;
|
---|
| 3255 | }
|
---|
| 3256 | }
|
---|
| 3257 | break;
|
---|
| 3258 |
|
---|
| 3259 | default:
|
---|
| 3260 | break;
|
---|
| 3261 | }
|
---|
| 3262 |
|
---|
| 3263 | return r;
|
---|
| 3264 | }
|
---|
| 3265 | #endif
|
---|
| 3266 |
|
---|
| 3267 | /* divide different length alternatives in look-behind.
|
---|
| 3268 | (?<=A|B) ==> (?<=A)|(?<=B)
|
---|
| 3269 | (?<!A|B) ==> (?<!A)(?<!B)
|
---|
| 3270 | */
|
---|
| 3271 | static int
|
---|
| 3272 | divide_look_behind_alternatives(Node* node)
|
---|
| 3273 | {
|
---|
| 3274 | Node *head, *np, *insert_node;
|
---|
| 3275 | AnchorNode* an = NANCHOR(node);
|
---|
| 3276 | int anc_type = an->type;
|
---|
| 3277 |
|
---|
| 3278 | head = an->target;
|
---|
| 3279 | np = NCAR(head);
|
---|
| 3280 | swap_node(node, head);
|
---|
| 3281 | NCAR(node) = head;
|
---|
| 3282 | NANCHOR(head)->target = np;
|
---|
| 3283 |
|
---|
| 3284 | np = node;
|
---|
| 3285 | while ((np = NCDR(np)) != NULL_NODE) {
|
---|
| 3286 | insert_node = onig_node_new_anchor(anc_type);
|
---|
| 3287 | CHECK_NULL_RETURN_MEMERR(insert_node);
|
---|
| 3288 | NANCHOR(insert_node)->target = NCAR(np);
|
---|
| 3289 | NCAR(np) = insert_node;
|
---|
| 3290 | }
|
---|
| 3291 |
|
---|
| 3292 | if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
|
---|
| 3293 | np = node;
|
---|
| 3294 | do {
|
---|
| 3295 | SET_NTYPE(np, NT_LIST); /* alt -> list */
|
---|
| 3296 | } while ((np = NCDR(np)) != NULL_NODE);
|
---|
| 3297 | }
|
---|
| 3298 | return 0;
|
---|
| 3299 | }
|
---|
| 3300 |
|
---|
| 3301 | static int
|
---|
| 3302 | setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
|
---|
| 3303 | {
|
---|
| 3304 | int r, len;
|
---|
| 3305 | AnchorNode* an = NANCHOR(node);
|
---|
| 3306 |
|
---|
| 3307 | r = get_char_length_tree(an->target, reg, &len);
|
---|
| 3308 | if (r == 0)
|
---|
| 3309 | an->char_len = len;
|
---|
| 3310 | else if (r == GET_CHAR_LEN_VARLEN)
|
---|
| 3311 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 3312 | else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
|
---|
| 3313 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
|
---|
| 3314 | r = divide_look_behind_alternatives(node);
|
---|
| 3315 | else
|
---|
| 3316 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 3317 | }
|
---|
| 3318 |
|
---|
| 3319 | return r;
|
---|
| 3320 | }
|
---|
| 3321 |
|
---|
| 3322 | static int
|
---|
[331] | 3323 | next_setup(Node* node, Node* next_node, regex_t* reg)
|
---|
[279] | 3324 | {
|
---|
| 3325 | int type;
|
---|
| 3326 |
|
---|
| 3327 | retry:
|
---|
| 3328 | type = NTYPE(node);
|
---|
| 3329 | if (type == NT_QTFR) {
|
---|
| 3330 | QtfrNode* qn = NQTFR(node);
|
---|
| 3331 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 3332 | #ifdef USE_QTFR_PEEK_NEXT
|
---|
| 3333 | Node* n = get_head_value_node(next_node, 1, reg);
|
---|
| 3334 | /* '\0': for UTF-16BE etc... */
|
---|
| 3335 | if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
|
---|
| 3336 | qn->next_head_exact = n;
|
---|
| 3337 | }
|
---|
| 3338 | #endif
|
---|
| 3339 | /* automatic possessification a*b ==> (?>a*)b */
|
---|
| 3340 | if (qn->lower <= 1) {
|
---|
| 3341 | int ttype = NTYPE(qn->target);
|
---|
| 3342 | if (IS_NODE_TYPE_SIMPLE(ttype)) {
|
---|
| 3343 | Node *x, *y;
|
---|
| 3344 | x = get_head_value_node(qn->target, 0, reg);
|
---|
| 3345 | if (IS_NOT_NULL(x)) {
|
---|
| 3346 | y = get_head_value_node(next_node, 0, reg);
|
---|
| 3347 | if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
|
---|
| 3348 | Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
|
---|
| 3349 | CHECK_NULL_RETURN_MEMERR(en);
|
---|
| 3350 | SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
| 3351 | swap_node(node, en);
|
---|
| 3352 | NENCLOSE(node)->target = en;
|
---|
| 3353 | }
|
---|
| 3354 | }
|
---|
| 3355 | }
|
---|
| 3356 | }
|
---|
| 3357 | }
|
---|
| 3358 | }
|
---|
| 3359 | else if (type == NT_ENCLOSE) {
|
---|
| 3360 | EncloseNode* en = NENCLOSE(node);
|
---|
| 3361 | if (en->type == ENCLOSE_MEMORY) {
|
---|
| 3362 | node = en->target;
|
---|
| 3363 | goto retry;
|
---|
| 3364 | }
|
---|
| 3365 | }
|
---|
| 3366 | return 0;
|
---|
| 3367 | }
|
---|
| 3368 |
|
---|
| 3369 |
|
---|
| 3370 | static int
|
---|
| 3371 | update_string_node_case_fold(regex_t* reg, Node *node)
|
---|
| 3372 | {
|
---|
| 3373 | UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
| 3374 | UChar *sbuf, *ebuf, *sp;
|
---|
| 3375 | int r, i, len;
|
---|
| 3376 | OnigDistance sbuf_size;
|
---|
| 3377 | StrNode* sn = NSTR(node);
|
---|
| 3378 |
|
---|
| 3379 | end = sn->end;
|
---|
| 3380 | sbuf_size = (end - sn->s) * 2;
|
---|
| 3381 | sbuf = (UChar* )xmalloc(sbuf_size);
|
---|
| 3382 | CHECK_NULL_RETURN_MEMERR(sbuf);
|
---|
| 3383 | ebuf = sbuf + sbuf_size;
|
---|
| 3384 |
|
---|
| 3385 | sp = sbuf;
|
---|
| 3386 | p = sn->s;
|
---|
| 3387 | while (p < end) {
|
---|
| 3388 | len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
|
---|
| 3389 | for (i = 0; i < len; i++) {
|
---|
| 3390 | if (sp >= ebuf) {
|
---|
| 3391 | UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
|
---|
| 3392 | if (IS_NULL(p)) {
|
---|
| 3393 | xfree(sbuf);
|
---|
| 3394 | return ONIGERR_MEMORY;
|
---|
| 3395 | }
|
---|
| 3396 | sbuf = p;
|
---|
| 3397 | sp = sbuf + sbuf_size;
|
---|
| 3398 | sbuf_size *= 2;
|
---|
| 3399 | ebuf = sbuf + sbuf_size;
|
---|
| 3400 | }
|
---|
| 3401 |
|
---|
| 3402 | *sp++ = buf[i];
|
---|
| 3403 | }
|
---|
| 3404 | }
|
---|
| 3405 |
|
---|
| 3406 | r = onig_node_str_set(node, sbuf, sp);
|
---|
| 3407 |
|
---|
| 3408 | xfree(sbuf);
|
---|
| 3409 | return r;
|
---|
| 3410 | }
|
---|
| 3411 |
|
---|
| 3412 | static int
|
---|
| 3413 | expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
|
---|
| 3414 | regex_t* reg)
|
---|
| 3415 | {
|
---|
| 3416 | int r;
|
---|
| 3417 | Node *node;
|
---|
| 3418 |
|
---|
| 3419 | node = onig_node_new_str(s, end);
|
---|
| 3420 | if (IS_NULL(node)) return ONIGERR_MEMORY;
|
---|
| 3421 |
|
---|
| 3422 | r = update_string_node_case_fold(reg, node);
|
---|
| 3423 | if (r != 0) {
|
---|
| 3424 | onig_node_free(node);
|
---|
| 3425 | return r;
|
---|
| 3426 | }
|
---|
| 3427 |
|
---|
| 3428 | NSTRING_SET_AMBIG(node);
|
---|
| 3429 | NSTRING_SET_DONT_GET_OPT_INFO(node);
|
---|
| 3430 | *rnode = node;
|
---|
| 3431 | return 0;
|
---|
| 3432 | }
|
---|
| 3433 |
|
---|
| 3434 | static int
|
---|
| 3435 | is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
|
---|
| 3436 | int slen)
|
---|
| 3437 | {
|
---|
| 3438 | int i;
|
---|
| 3439 |
|
---|
| 3440 | for (i = 0; i < item_num; i++) {
|
---|
| 3441 | if (items[i].byte_len != slen) {
|
---|
| 3442 | return 1;
|
---|
| 3443 | }
|
---|
| 3444 | if (items[i].code_len != 1) {
|
---|
| 3445 | return 1;
|
---|
| 3446 | }
|
---|
| 3447 | }
|
---|
| 3448 | return 0;
|
---|
| 3449 | }
|
---|
| 3450 |
|
---|
| 3451 | static int
|
---|
| 3452 | expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
---|
| 3453 | UChar *p, int slen, UChar *end,
|
---|
| 3454 | regex_t* reg, Node **rnode)
|
---|
| 3455 | {
|
---|
| 3456 | int r, i, j, len, varlen;
|
---|
| 3457 | Node *anode, *var_anode, *snode, *xnode, *an;
|
---|
| 3458 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
| 3459 |
|
---|
| 3460 | *rnode = var_anode = NULL_NODE;
|
---|
| 3461 |
|
---|
| 3462 | varlen = 0;
|
---|
| 3463 | for (i = 0; i < item_num; i++) {
|
---|
| 3464 | if (items[i].byte_len != slen) {
|
---|
| 3465 | varlen = 1;
|
---|
| 3466 | break;
|
---|
| 3467 | }
|
---|
| 3468 | }
|
---|
| 3469 |
|
---|
| 3470 | if (varlen != 0) {
|
---|
| 3471 | *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3472 | if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
|
---|
| 3473 |
|
---|
| 3474 | xnode = onig_node_new_list(NULL, NULL);
|
---|
| 3475 | if (IS_NULL(xnode)) goto mem_err;
|
---|
| 3476 | NCAR(var_anode) = xnode;
|
---|
| 3477 |
|
---|
| 3478 | anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3479 | if (IS_NULL(anode)) goto mem_err;
|
---|
| 3480 | NCAR(xnode) = anode;
|
---|
| 3481 | }
|
---|
| 3482 | else {
|
---|
| 3483 | *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3484 | if (IS_NULL(anode)) return ONIGERR_MEMORY;
|
---|
| 3485 | }
|
---|
| 3486 |
|
---|
| 3487 | snode = onig_node_new_str(p, p + slen);
|
---|
| 3488 | if (IS_NULL(snode)) goto mem_err;
|
---|
| 3489 |
|
---|
| 3490 | NCAR(anode) = snode;
|
---|
| 3491 |
|
---|
| 3492 | for (i = 0; i < item_num; i++) {
|
---|
| 3493 | snode = onig_node_new_str(NULL, NULL);
|
---|
| 3494 | if (IS_NULL(snode)) goto mem_err;
|
---|
| 3495 |
|
---|
| 3496 | for (j = 0; j < items[i].code_len; j++) {
|
---|
| 3497 | len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
|
---|
| 3498 | if (len < 0) {
|
---|
| 3499 | r = len;
|
---|
| 3500 | goto mem_err2;
|
---|
| 3501 | }
|
---|
| 3502 |
|
---|
| 3503 | r = onig_node_str_cat(snode, buf, buf + len);
|
---|
| 3504 | if (r != 0) goto mem_err2;
|
---|
| 3505 | }
|
---|
| 3506 |
|
---|
| 3507 | an = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3508 | if (IS_NULL(an)) {
|
---|
| 3509 | goto mem_err2;
|
---|
| 3510 | }
|
---|
| 3511 |
|
---|
| 3512 | if (items[i].byte_len != slen) {
|
---|
| 3513 | Node *rem;
|
---|
| 3514 | UChar *q = p + items[i].byte_len;
|
---|
| 3515 |
|
---|
| 3516 | if (q < end) {
|
---|
[331] | 3517 | r = expand_case_fold_make_rem_string(&rem, q, end, reg);
|
---|
| 3518 | if (r != 0) {
|
---|
| 3519 | onig_node_free(an);
|
---|
| 3520 | goto mem_err2;
|
---|
| 3521 | }
|
---|
[279] | 3522 |
|
---|
[331] | 3523 | xnode = onig_node_list_add(NULL_NODE, snode);
|
---|
| 3524 | if (IS_NULL(xnode)) {
|
---|
| 3525 | onig_node_free(an);
|
---|
| 3526 | onig_node_free(rem);
|
---|
| 3527 | goto mem_err2;
|
---|
| 3528 | }
|
---|
| 3529 | if (IS_NULL(onig_node_list_add(xnode, rem))) {
|
---|
| 3530 | onig_node_free(an);
|
---|
| 3531 | onig_node_free(xnode);
|
---|
| 3532 | onig_node_free(rem);
|
---|
| 3533 | goto mem_err;
|
---|
| 3534 | }
|
---|
[279] | 3535 |
|
---|
[331] | 3536 | NCAR(an) = xnode;
|
---|
[279] | 3537 | }
|
---|
| 3538 | else {
|
---|
[331] | 3539 | NCAR(an) = snode;
|
---|
[279] | 3540 | }
|
---|
| 3541 |
|
---|
| 3542 | NCDR(var_anode) = an;
|
---|
| 3543 | var_anode = an;
|
---|
| 3544 | }
|
---|
| 3545 | else {
|
---|
| 3546 | NCAR(an) = snode;
|
---|
| 3547 | NCDR(anode) = an;
|
---|
| 3548 | anode = an;
|
---|
| 3549 | }
|
---|
| 3550 | }
|
---|
| 3551 |
|
---|
| 3552 | return varlen;
|
---|
| 3553 |
|
---|
| 3554 | mem_err2:
|
---|
| 3555 | onig_node_free(snode);
|
---|
| 3556 |
|
---|
| 3557 | mem_err:
|
---|
| 3558 | onig_node_free(*rnode);
|
---|
| 3559 |
|
---|
| 3560 | return ONIGERR_MEMORY;
|
---|
| 3561 | }
|
---|
| 3562 |
|
---|
| 3563 | static int
|
---|
| 3564 | expand_case_fold_string(Node* node, regex_t* reg)
|
---|
| 3565 | {
|
---|
| 3566 | #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
|
---|
| 3567 |
|
---|
| 3568 | int r, n, len, alt_num;
|
---|
| 3569 | int varlen = 0;
|
---|
| 3570 | UChar *start, *end, *p;
|
---|
| 3571 | Node *top_root, *root, *snode, *prev_node;
|
---|
| 3572 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 3573 | StrNode* sn = NSTR(node);
|
---|
| 3574 |
|
---|
| 3575 | if (NSTRING_IS_AMBIG(node)) return 0;
|
---|
| 3576 |
|
---|
| 3577 | start = sn->s;
|
---|
| 3578 | end = sn->end;
|
---|
| 3579 | if (start >= end) return 0;
|
---|
| 3580 |
|
---|
| 3581 | r = 0;
|
---|
| 3582 | top_root = root = prev_node = snode = NULL_NODE;
|
---|
| 3583 | alt_num = 1;
|
---|
| 3584 | p = start;
|
---|
| 3585 | while (p < end) {
|
---|
| 3586 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
|
---|
| 3587 | p, end, items);
|
---|
| 3588 | if (n < 0) {
|
---|
| 3589 | r = n;
|
---|
| 3590 | goto err;
|
---|
| 3591 | }
|
---|
| 3592 |
|
---|
[331] | 3593 | len = enclen(reg->enc, p, end);
|
---|
[279] | 3594 |
|
---|
| 3595 | varlen = is_case_fold_variable_len(n, items, len);
|
---|
| 3596 | if (n == 0 || varlen == 0) {
|
---|
| 3597 | if (IS_NULL(snode)) {
|
---|
| 3598 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
| 3599 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
| 3600 | if (IS_NULL(root)) {
|
---|
| 3601 | onig_node_free(prev_node);
|
---|
| 3602 | goto mem_err;
|
---|
| 3603 | }
|
---|
| 3604 | }
|
---|
| 3605 |
|
---|
| 3606 | prev_node = snode = onig_node_new_str(NULL, NULL);
|
---|
| 3607 | if (IS_NULL(snode)) goto mem_err;
|
---|
| 3608 | if (IS_NOT_NULL(root)) {
|
---|
| 3609 | if (IS_NULL(onig_node_list_add(root, snode))) {
|
---|
| 3610 | onig_node_free(snode);
|
---|
| 3611 | goto mem_err;
|
---|
| 3612 | }
|
---|
| 3613 | }
|
---|
| 3614 | }
|
---|
| 3615 |
|
---|
| 3616 | r = onig_node_str_cat(snode, p, p + len);
|
---|
| 3617 | if (r != 0) goto err;
|
---|
| 3618 | }
|
---|
| 3619 | else {
|
---|
| 3620 | alt_num *= (n + 1);
|
---|
| 3621 | if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
|
---|
| 3622 |
|
---|
| 3623 | if (IS_NOT_NULL(snode)) {
|
---|
| 3624 | r = update_string_node_case_fold(reg, snode);
|
---|
| 3625 | if (r == 0) {
|
---|
| 3626 | NSTRING_SET_AMBIG(snode);
|
---|
| 3627 | }
|
---|
| 3628 | }
|
---|
| 3629 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
| 3630 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
| 3631 | if (IS_NULL(root)) {
|
---|
| 3632 | onig_node_free(prev_node);
|
---|
| 3633 | goto mem_err;
|
---|
| 3634 | }
|
---|
| 3635 | }
|
---|
| 3636 |
|
---|
| 3637 | r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
|
---|
| 3638 | if (r < 0) goto mem_err;
|
---|
| 3639 | if (r == 1) {
|
---|
| 3640 | if (IS_NULL(root)) {
|
---|
| 3641 | top_root = prev_node;
|
---|
| 3642 | }
|
---|
| 3643 | else {
|
---|
| 3644 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
| 3645 | onig_node_free(prev_node);
|
---|
| 3646 | goto mem_err;
|
---|
| 3647 | }
|
---|
| 3648 | }
|
---|
| 3649 |
|
---|
| 3650 | root = NCAR(prev_node);
|
---|
| 3651 | }
|
---|
| 3652 | else { /* r == 0 */
|
---|
| 3653 | if (IS_NOT_NULL(root)) {
|
---|
| 3654 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
| 3655 | onig_node_free(prev_node);
|
---|
| 3656 | goto mem_err;
|
---|
| 3657 | }
|
---|
| 3658 | }
|
---|
| 3659 | }
|
---|
| 3660 |
|
---|
| 3661 | snode = NULL_NODE;
|
---|
| 3662 | }
|
---|
| 3663 |
|
---|
| 3664 | p += len;
|
---|
| 3665 | }
|
---|
| 3666 | if (IS_NOT_NULL(snode)) {
|
---|
| 3667 | r = update_string_node_case_fold(reg, snode);
|
---|
| 3668 | if (r == 0) {
|
---|
| 3669 | NSTRING_SET_AMBIG(snode);
|
---|
| 3670 | }
|
---|
| 3671 | }
|
---|
| 3672 |
|
---|
| 3673 | if (p < end) {
|
---|
| 3674 | Node *srem;
|
---|
| 3675 |
|
---|
| 3676 | r = expand_case_fold_make_rem_string(&srem, p, end, reg);
|
---|
| 3677 | if (r != 0) goto mem_err;
|
---|
| 3678 |
|
---|
| 3679 | if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
|
---|
| 3680 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
| 3681 | if (IS_NULL(root)) {
|
---|
| 3682 | onig_node_free(srem);
|
---|
| 3683 | onig_node_free(prev_node);
|
---|
| 3684 | goto mem_err;
|
---|
| 3685 | }
|
---|
| 3686 | }
|
---|
| 3687 |
|
---|
| 3688 | if (IS_NULL(root)) {
|
---|
| 3689 | prev_node = srem;
|
---|
| 3690 | }
|
---|
| 3691 | else {
|
---|
| 3692 | if (IS_NULL(onig_node_list_add(root, srem))) {
|
---|
| 3693 | onig_node_free(srem);
|
---|
| 3694 | goto mem_err;
|
---|
| 3695 | }
|
---|
| 3696 | }
|
---|
| 3697 | }
|
---|
| 3698 |
|
---|
| 3699 | /* ending */
|
---|
| 3700 | top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
|
---|
| 3701 | swap_node(node, top_root);
|
---|
| 3702 | onig_node_free(top_root);
|
---|
| 3703 | return 0;
|
---|
| 3704 |
|
---|
| 3705 | mem_err:
|
---|
| 3706 | r = ONIGERR_MEMORY;
|
---|
| 3707 |
|
---|
| 3708 | err:
|
---|
| 3709 | onig_node_free(top_root);
|
---|
| 3710 | return r;
|
---|
| 3711 | }
|
---|
| 3712 |
|
---|
| 3713 |
|
---|
| 3714 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 3715 |
|
---|
[331] | 3716 | # define CEC_THRES_NUM_BIG_REPEAT 512
|
---|
| 3717 | # define CEC_INFINITE_NUM 0x7fffffff
|
---|
[279] | 3718 |
|
---|
[331] | 3719 | # define CEC_IN_INFINITE_REPEAT (1<<0)
|
---|
| 3720 | # define CEC_IN_FINITE_REPEAT (1<<1)
|
---|
| 3721 | # define CEC_CONT_BIG_REPEAT (1<<2)
|
---|
[279] | 3722 |
|
---|
| 3723 | static int
|
---|
| 3724 | setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
---|
| 3725 | {
|
---|
| 3726 | int type;
|
---|
| 3727 | int r = state;
|
---|
| 3728 |
|
---|
| 3729 | type = NTYPE(node);
|
---|
| 3730 | switch (type) {
|
---|
| 3731 | case NT_LIST:
|
---|
| 3732 | {
|
---|
| 3733 | Node* prev = NULL_NODE;
|
---|
| 3734 | do {
|
---|
| 3735 | r = setup_comb_exp_check(NCAR(node), r, env);
|
---|
| 3736 | prev = NCAR(node);
|
---|
| 3737 | } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3738 | }
|
---|
| 3739 | break;
|
---|
| 3740 |
|
---|
| 3741 | case NT_ALT:
|
---|
| 3742 | {
|
---|
| 3743 | int ret;
|
---|
| 3744 | do {
|
---|
| 3745 | ret = setup_comb_exp_check(NCAR(node), state, env);
|
---|
| 3746 | r |= ret;
|
---|
| 3747 | } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3748 | }
|
---|
| 3749 | break;
|
---|
| 3750 |
|
---|
| 3751 | case NT_QTFR:
|
---|
| 3752 | {
|
---|
| 3753 | int child_state = state;
|
---|
| 3754 | int add_state = 0;
|
---|
| 3755 | QtfrNode* qn = NQTFR(node);
|
---|
| 3756 | Node* target = qn->target;
|
---|
| 3757 | int var_num;
|
---|
| 3758 |
|
---|
| 3759 | if (! IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 3760 | if (qn->upper > 1) {
|
---|
| 3761 | /* {0,1}, {1,1} are allowed */
|
---|
| 3762 | child_state |= CEC_IN_FINITE_REPEAT;
|
---|
| 3763 |
|
---|
| 3764 | /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
|
---|
| 3765 | if (env->backrefed_mem == 0) {
|
---|
| 3766 | if (NTYPE(qn->target) == NT_ENCLOSE) {
|
---|
| 3767 | EncloseNode* en = NENCLOSE(qn->target);
|
---|
| 3768 | if (en->type == ENCLOSE_MEMORY) {
|
---|
| 3769 | if (NTYPE(en->target) == NT_QTFR) {
|
---|
| 3770 | QtfrNode* q = NQTFR(en->target);
|
---|
| 3771 | if (IS_REPEAT_INFINITE(q->upper)
|
---|
| 3772 | && q->greedy == qn->greedy) {
|
---|
| 3773 | qn->upper = (qn->lower == 0 ? 1 : qn->lower);
|
---|
| 3774 | if (qn->upper == 1)
|
---|
| 3775 | child_state = state;
|
---|
| 3776 | }
|
---|
| 3777 | }
|
---|
| 3778 | }
|
---|
| 3779 | }
|
---|
| 3780 | }
|
---|
| 3781 | }
|
---|
| 3782 | }
|
---|
| 3783 |
|
---|
| 3784 | if (state & CEC_IN_FINITE_REPEAT) {
|
---|
| 3785 | qn->comb_exp_check_num = -1;
|
---|
| 3786 | }
|
---|
| 3787 | else {
|
---|
| 3788 | if (IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 3789 | var_num = CEC_INFINITE_NUM;
|
---|
| 3790 | child_state |= CEC_IN_INFINITE_REPEAT;
|
---|
| 3791 | }
|
---|
| 3792 | else {
|
---|
| 3793 | var_num = qn->upper - qn->lower;
|
---|
| 3794 | }
|
---|
| 3795 |
|
---|
| 3796 | if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
|
---|
| 3797 | add_state |= CEC_CONT_BIG_REPEAT;
|
---|
| 3798 |
|
---|
| 3799 | if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
|
---|
| 3800 | ((state & CEC_CONT_BIG_REPEAT) != 0 &&
|
---|
| 3801 | var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
|
---|
| 3802 | if (qn->comb_exp_check_num == 0) {
|
---|
| 3803 | env->num_comb_exp_check++;
|
---|
| 3804 | qn->comb_exp_check_num = env->num_comb_exp_check;
|
---|
| 3805 | if (env->curr_max_regnum > env->comb_exp_max_regnum)
|
---|
| 3806 | env->comb_exp_max_regnum = env->curr_max_regnum;
|
---|
| 3807 | }
|
---|
| 3808 | }
|
---|
| 3809 | }
|
---|
| 3810 |
|
---|
| 3811 | r = setup_comb_exp_check(target, child_state, env);
|
---|
| 3812 | r |= add_state;
|
---|
| 3813 | }
|
---|
| 3814 | break;
|
---|
| 3815 |
|
---|
| 3816 | case NT_ENCLOSE:
|
---|
| 3817 | {
|
---|
| 3818 | EncloseNode* en = NENCLOSE(node);
|
---|
| 3819 |
|
---|
| 3820 | switch (en->type) {
|
---|
| 3821 | case ENCLOSE_MEMORY:
|
---|
| 3822 | {
|
---|
| 3823 | if (env->curr_max_regnum < en->regnum)
|
---|
| 3824 | env->curr_max_regnum = en->regnum;
|
---|
| 3825 |
|
---|
| 3826 | r = setup_comb_exp_check(en->target, state, env);
|
---|
| 3827 | }
|
---|
| 3828 | break;
|
---|
| 3829 |
|
---|
| 3830 | default:
|
---|
| 3831 | r = setup_comb_exp_check(en->target, state, env);
|
---|
| 3832 | break;
|
---|
| 3833 | }
|
---|
| 3834 | }
|
---|
| 3835 | break;
|
---|
| 3836 |
|
---|
[331] | 3837 | # ifdef USE_SUBEXP_CALL
|
---|
[279] | 3838 | case NT_CALL:
|
---|
| 3839 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
| 3840 | env->has_recursion = 1;
|
---|
| 3841 | else
|
---|
| 3842 | r = setup_comb_exp_check(NCALL(node)->target, state, env);
|
---|
| 3843 | break;
|
---|
[331] | 3844 | # endif
|
---|
[279] | 3845 |
|
---|
| 3846 | default:
|
---|
| 3847 | break;
|
---|
| 3848 | }
|
---|
| 3849 |
|
---|
| 3850 | return r;
|
---|
| 3851 | }
|
---|
| 3852 | #endif
|
---|
| 3853 |
|
---|
| 3854 | #define IN_ALT (1<<0)
|
---|
| 3855 | #define IN_NOT (1<<1)
|
---|
| 3856 | #define IN_REPEAT (1<<2)
|
---|
| 3857 | #define IN_VAR_REPEAT (1<<3)
|
---|
[331] | 3858 | #define IN_CALL (1<<4)
|
---|
| 3859 | #define IN_RECCALL (1<<5)
|
---|
[279] | 3860 |
|
---|
| 3861 | /* setup_tree does the following work.
|
---|
| 3862 | 1. check empty loop. (set qn->target_empty_info)
|
---|
| 3863 | 2. expand ignore-case in char class.
|
---|
| 3864 | 3. set memory status bit flags. (reg->mem_stats)
|
---|
| 3865 | 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
|
---|
| 3866 | 5. find invalid patterns in look-behind.
|
---|
| 3867 | 6. expand repeated string.
|
---|
| 3868 | */
|
---|
| 3869 | static int
|
---|
| 3870 | setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
---|
| 3871 | {
|
---|
| 3872 | int type;
|
---|
| 3873 | int r = 0;
|
---|
| 3874 |
|
---|
| 3875 | restart:
|
---|
| 3876 | type = NTYPE(node);
|
---|
| 3877 | switch (type) {
|
---|
| 3878 | case NT_LIST:
|
---|
| 3879 | {
|
---|
| 3880 | Node* prev = NULL_NODE;
|
---|
| 3881 | do {
|
---|
| 3882 | r = setup_tree(NCAR(node), reg, state, env);
|
---|
| 3883 | if (IS_NOT_NULL(prev) && r == 0) {
|
---|
[331] | 3884 | r = next_setup(prev, NCAR(node), reg);
|
---|
[279] | 3885 | }
|
---|
| 3886 | prev = NCAR(node);
|
---|
| 3887 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3888 | }
|
---|
| 3889 | break;
|
---|
| 3890 |
|
---|
| 3891 | case NT_ALT:
|
---|
| 3892 | do {
|
---|
| 3893 | r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
|
---|
| 3894 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3895 | break;
|
---|
| 3896 |
|
---|
| 3897 | case NT_CCLASS:
|
---|
| 3898 | break;
|
---|
| 3899 |
|
---|
| 3900 | case NT_STR:
|
---|
| 3901 | if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
|
---|
| 3902 | r = expand_case_fold_string(node, reg);
|
---|
| 3903 | }
|
---|
| 3904 | break;
|
---|
| 3905 |
|
---|
| 3906 | case NT_CTYPE:
|
---|
| 3907 | case NT_CANY:
|
---|
| 3908 | break;
|
---|
| 3909 |
|
---|
| 3910 | #ifdef USE_SUBEXP_CALL
|
---|
| 3911 | case NT_CALL:
|
---|
| 3912 | break;
|
---|
| 3913 | #endif
|
---|
| 3914 |
|
---|
| 3915 | case NT_BREF:
|
---|
| 3916 | {
|
---|
| 3917 | int i;
|
---|
| 3918 | int* p;
|
---|
| 3919 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 3920 | BRefNode* br = NBREF(node);
|
---|
| 3921 | p = BACKREFS_P(br);
|
---|
| 3922 | for (i = 0; i < br->back_num; i++) {
|
---|
| 3923 | if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 3924 | BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
|
---|
| 3925 | BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
|
---|
| 3926 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 3927 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
| 3928 | BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
|
---|
| 3929 | }
|
---|
| 3930 | #endif
|
---|
| 3931 | SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
|
---|
| 3932 | }
|
---|
| 3933 | }
|
---|
| 3934 | break;
|
---|
| 3935 |
|
---|
| 3936 | case NT_QTFR:
|
---|
| 3937 | {
|
---|
| 3938 | OnigDistance d;
|
---|
| 3939 | QtfrNode* qn = NQTFR(node);
|
---|
| 3940 | Node* target = qn->target;
|
---|
| 3941 |
|
---|
| 3942 | if ((state & IN_REPEAT) != 0) {
|
---|
[331] | 3943 | qn->state |= NST_IN_REPEAT;
|
---|
[279] | 3944 | }
|
---|
| 3945 |
|
---|
| 3946 | if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
|
---|
| 3947 | r = get_min_match_length(target, &d, env);
|
---|
| 3948 | if (r) break;
|
---|
| 3949 | if (d == 0) {
|
---|
| 3950 | qn->target_empty_info = NQ_TARGET_IS_EMPTY;
|
---|
| 3951 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
| 3952 | r = quantifiers_memory_node_info(target);
|
---|
| 3953 | if (r < 0) break;
|
---|
| 3954 | if (r > 0) {
|
---|
| 3955 | qn->target_empty_info = r;
|
---|
| 3956 | }
|
---|
| 3957 | #endif
|
---|
| 3958 | #if 0
|
---|
| 3959 | r = get_max_match_length(target, &d, env);
|
---|
| 3960 | if (r == 0 && d == 0) {
|
---|
| 3961 | /* ()* ==> ()?, ()+ ==> () */
|
---|
| 3962 | qn->upper = 1;
|
---|
| 3963 | if (qn->lower > 1) qn->lower = 1;
|
---|
| 3964 | if (NTYPE(target) == NT_STR) {
|
---|
| 3965 | qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
|
---|
| 3966 | }
|
---|
| 3967 | }
|
---|
| 3968 | #endif
|
---|
| 3969 | }
|
---|
| 3970 | }
|
---|
| 3971 |
|
---|
| 3972 | state |= IN_REPEAT;
|
---|
| 3973 | if (qn->lower != qn->upper)
|
---|
| 3974 | state |= IN_VAR_REPEAT;
|
---|
| 3975 | r = setup_tree(target, reg, state, env);
|
---|
| 3976 | if (r) break;
|
---|
| 3977 |
|
---|
| 3978 | /* expand string */
|
---|
| 3979 | #define EXPAND_STRING_MAX_LENGTH 100
|
---|
| 3980 | if (NTYPE(target) == NT_STR) {
|
---|
| 3981 | if (qn->lower > 1) {
|
---|
| 3982 | int i, n = qn->lower;
|
---|
| 3983 | OnigDistance len = NSTRING_LEN(target);
|
---|
| 3984 | StrNode* sn = NSTR(target);
|
---|
| 3985 | Node* np;
|
---|
| 3986 |
|
---|
| 3987 | np = onig_node_new_str(sn->s, sn->end);
|
---|
| 3988 | if (IS_NULL(np)) return ONIGERR_MEMORY;
|
---|
| 3989 | NSTR(np)->flag = sn->flag;
|
---|
| 3990 |
|
---|
| 3991 | for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
|
---|
| 3992 | r = onig_node_str_cat(np, sn->s, sn->end);
|
---|
| 3993 | if (r) {
|
---|
| 3994 | onig_node_free(np);
|
---|
| 3995 | return r;
|
---|
| 3996 | }
|
---|
| 3997 | }
|
---|
| 3998 | if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 3999 | Node *np1, *np2;
|
---|
| 4000 |
|
---|
| 4001 | qn->lower -= i;
|
---|
| 4002 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
| 4003 | qn->upper -= i;
|
---|
| 4004 |
|
---|
| 4005 | np1 = onig_node_new_list(np, NULL);
|
---|
| 4006 | if (IS_NULL(np1)) {
|
---|
| 4007 | onig_node_free(np);
|
---|
| 4008 | return ONIGERR_MEMORY;
|
---|
| 4009 | }
|
---|
| 4010 | swap_node(np1, node);
|
---|
| 4011 | np2 = onig_node_list_add(node, np1);
|
---|
| 4012 | if (IS_NULL(np2)) {
|
---|
| 4013 | onig_node_free(np1);
|
---|
| 4014 | return ONIGERR_MEMORY;
|
---|
| 4015 | }
|
---|
| 4016 | }
|
---|
| 4017 | else {
|
---|
| 4018 | swap_node(np, node);
|
---|
| 4019 | onig_node_free(np);
|
---|
| 4020 | }
|
---|
| 4021 | break; /* break case NT_QTFR: */
|
---|
| 4022 | }
|
---|
| 4023 | }
|
---|
| 4024 |
|
---|
| 4025 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
| 4026 | if (qn->greedy && (qn->target_empty_info != 0)) {
|
---|
| 4027 | if (NTYPE(target) == NT_QTFR) {
|
---|
| 4028 | QtfrNode* tqn = NQTFR(target);
|
---|
| 4029 | if (IS_NOT_NULL(tqn->head_exact)) {
|
---|
| 4030 | qn->head_exact = tqn->head_exact;
|
---|
| 4031 | tqn->head_exact = NULL;
|
---|
| 4032 | }
|
---|
| 4033 | }
|
---|
| 4034 | else {
|
---|
| 4035 | qn->head_exact = get_head_value_node(qn->target, 1, reg);
|
---|
| 4036 | }
|
---|
| 4037 | }
|
---|
| 4038 | #endif
|
---|
| 4039 | }
|
---|
| 4040 | break;
|
---|
| 4041 |
|
---|
| 4042 | case NT_ENCLOSE:
|
---|
| 4043 | {
|
---|
| 4044 | EncloseNode* en = NENCLOSE(node);
|
---|
| 4045 |
|
---|
| 4046 | switch (en->type) {
|
---|
| 4047 | case ENCLOSE_OPTION:
|
---|
| 4048 | {
|
---|
| 4049 | OnigOptionType options = reg->options;
|
---|
| 4050 | reg->options = NENCLOSE(node)->option;
|
---|
| 4051 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
| 4052 | reg->options = options;
|
---|
| 4053 | }
|
---|
| 4054 | break;
|
---|
| 4055 |
|
---|
| 4056 | case ENCLOSE_MEMORY:
|
---|
[331] | 4057 | if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT | IN_CALL)) != 0) {
|
---|
[279] | 4058 | BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
|
---|
| 4059 | /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
|
---|
| 4060 | }
|
---|
[331] | 4061 | if (IS_ENCLOSE_CALLED(en))
|
---|
| 4062 | state |= IN_CALL;
|
---|
| 4063 | if (IS_ENCLOSE_RECURSION(en))
|
---|
| 4064 | state |= IN_RECCALL;
|
---|
| 4065 | else if ((state & IN_RECCALL) != 0)
|
---|
| 4066 | SET_CALL_RECURSION(node);
|
---|
| 4067 | r = setup_tree(en->target, reg, state, env);
|
---|
| 4068 | break;
|
---|
[279] | 4069 |
|
---|
| 4070 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 4071 | {
|
---|
| 4072 | Node* target = en->target;
|
---|
| 4073 | r = setup_tree(target, reg, state, env);
|
---|
| 4074 | if (NTYPE(target) == NT_QTFR) {
|
---|
| 4075 | QtfrNode* tqn = NQTFR(target);
|
---|
| 4076 | if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
|
---|
| 4077 | tqn->greedy != 0) { /* (?>a*), a*+ etc... */
|
---|
| 4078 | int qtype = NTYPE(tqn->target);
|
---|
| 4079 | if (IS_NODE_TYPE_SIMPLE(qtype))
|
---|
| 4080 | SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
| 4081 | }
|
---|
| 4082 | }
|
---|
| 4083 | }
|
---|
| 4084 | break;
|
---|
| 4085 |
|
---|
| 4086 | case ENCLOSE_CONDITION:
|
---|
| 4087 | #ifdef USE_NAMED_GROUP
|
---|
| 4088 | if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
|
---|
| 4089 | env->num_named > 0 &&
|
---|
| 4090 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
| 4091 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 4092 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 4093 | }
|
---|
| 4094 | #endif
|
---|
[331] | 4095 | if (NENCLOSE(node)->regnum > env->num_mem)
|
---|
| 4096 | return ONIGERR_INVALID_BACKREF;
|
---|
[279] | 4097 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
| 4098 | break;
|
---|
[331] | 4099 |
|
---|
| 4100 | case ENCLOSE_ABSENT:
|
---|
| 4101 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
| 4102 | break;
|
---|
[279] | 4103 | }
|
---|
| 4104 | }
|
---|
| 4105 | break;
|
---|
| 4106 |
|
---|
| 4107 | case NT_ANCHOR:
|
---|
| 4108 | {
|
---|
| 4109 | AnchorNode* an = NANCHOR(node);
|
---|
| 4110 |
|
---|
| 4111 | switch (an->type) {
|
---|
| 4112 | case ANCHOR_PREC_READ:
|
---|
| 4113 | r = setup_tree(an->target, reg, state, env);
|
---|
| 4114 | break;
|
---|
| 4115 | case ANCHOR_PREC_READ_NOT:
|
---|
| 4116 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
| 4117 | break;
|
---|
| 4118 |
|
---|
| 4119 | /* allowed node types in look-behind */
|
---|
| 4120 | #define ALLOWED_TYPE_IN_LB \
|
---|
| 4121 | ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
|
---|
| 4122 | BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
|
---|
| 4123 |
|
---|
| 4124 | #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
|
---|
| 4125 | #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
|
---|
| 4126 |
|
---|
| 4127 | #define ALLOWED_ANCHOR_IN_LB \
|
---|
| 4128 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
| 4129 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
| 4130 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
| 4131 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
| 4132 | #define ALLOWED_ANCHOR_IN_LB_NOT \
|
---|
| 4133 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
| 4134 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
| 4135 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
| 4136 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
| 4137 |
|
---|
| 4138 | case ANCHOR_LOOK_BEHIND:
|
---|
| 4139 | {
|
---|
| 4140 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
| 4141 | ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
|
---|
| 4142 | if (r < 0) return r;
|
---|
| 4143 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 4144 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
| 4145 | r = setup_tree(an->target, reg, state, env);
|
---|
[331] | 4146 | if (r != 0) return r;
|
---|
| 4147 | r = setup_look_behind(node, reg, env);
|
---|
[279] | 4148 | }
|
---|
| 4149 | break;
|
---|
| 4150 |
|
---|
| 4151 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 4152 | {
|
---|
| 4153 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
| 4154 | ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
|
---|
| 4155 | if (r < 0) return r;
|
---|
| 4156 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 4157 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
| 4158 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
[331] | 4159 | if (r != 0) return r;
|
---|
| 4160 | r = setup_look_behind(node, reg, env);
|
---|
[279] | 4161 | }
|
---|
| 4162 | break;
|
---|
| 4163 | }
|
---|
| 4164 | }
|
---|
| 4165 | break;
|
---|
| 4166 |
|
---|
| 4167 | default:
|
---|
| 4168 | break;
|
---|
| 4169 | }
|
---|
| 4170 |
|
---|
| 4171 | return r;
|
---|
| 4172 | }
|
---|
| 4173 |
|
---|
| 4174 | #ifndef USE_SUNDAY_QUICK_SEARCH
|
---|
| 4175 | /* set skip map for Boyer-Moore search */
|
---|
| 4176 | static int
|
---|
| 4177 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
| 4178 | UChar skip[], int** int_skip, int ignore_case)
|
---|
| 4179 | {
|
---|
| 4180 | OnigDistance i, len;
|
---|
| 4181 | int clen, flen, n, j, k;
|
---|
| 4182 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
| 4183 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 4184 | OnigEncoding enc = reg->enc;
|
---|
| 4185 |
|
---|
| 4186 | len = end - s;
|
---|
| 4187 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
| 4188 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
|
---|
| 4189 |
|
---|
| 4190 | n = 0;
|
---|
| 4191 | for (i = 0; i < len - 1; i += clen) {
|
---|
| 4192 | p = s + i;
|
---|
| 4193 | if (ignore_case)
|
---|
| 4194 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4195 | p, end, items);
|
---|
[331] | 4196 | clen = enclen(enc, p, end);
|
---|
| 4197 | if (p + clen > end)
|
---|
| 4198 | clen = (int )(end - p);
|
---|
[279] | 4199 |
|
---|
| 4200 | for (j = 0; j < n; j++) {
|
---|
| 4201 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4202 | return 1; /* different length isn't supported. */
|
---|
| 4203 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4204 | if (flen != clen)
|
---|
| 4205 | return 1; /* different length isn't supported. */
|
---|
| 4206 | }
|
---|
| 4207 | for (j = 0; j < clen; j++) {
|
---|
| 4208 | skip[s[i + j]] = (UChar )(len - 1 - i - j);
|
---|
| 4209 | for (k = 0; k < n; k++) {
|
---|
| 4210 | skip[buf[k][j]] = (UChar )(len - 1 - i - j);
|
---|
| 4211 | }
|
---|
| 4212 | }
|
---|
| 4213 | }
|
---|
| 4214 | }
|
---|
| 4215 | else {
|
---|
[331] | 4216 | # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
|
---|
| 4217 | /* This should not happen. */
|
---|
| 4218 | return ONIGERR_TYPE_BUG;
|
---|
| 4219 | # else
|
---|
[279] | 4220 | if (IS_NULL(*int_skip)) {
|
---|
| 4221 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
| 4222 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
| 4223 | }
|
---|
| 4224 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
|
---|
| 4225 |
|
---|
| 4226 | n = 0;
|
---|
| 4227 | for (i = 0; i < len - 1; i += clen) {
|
---|
| 4228 | p = s + i;
|
---|
| 4229 | if (ignore_case)
|
---|
| 4230 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4231 | p, end, items);
|
---|
[331] | 4232 | clen = enclen(enc, p, end);
|
---|
| 4233 | if (p + clen > end)
|
---|
| 4234 | clen = (int )(end - p);
|
---|
[279] | 4235 |
|
---|
| 4236 | for (j = 0; j < n; j++) {
|
---|
| 4237 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4238 | return 1; /* different length isn't supported. */
|
---|
| 4239 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4240 | if (flen != clen)
|
---|
| 4241 | return 1; /* different length isn't supported. */
|
---|
| 4242 | }
|
---|
| 4243 | for (j = 0; j < clen; j++) {
|
---|
| 4244 | (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
|
---|
| 4245 | for (k = 0; k < n; k++) {
|
---|
| 4246 | (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
|
---|
| 4247 | }
|
---|
| 4248 | }
|
---|
| 4249 | }
|
---|
[331] | 4250 | # endif
|
---|
[279] | 4251 | }
|
---|
| 4252 | return 0;
|
---|
| 4253 | }
|
---|
| 4254 |
|
---|
| 4255 | #else /* USE_SUNDAY_QUICK_SEARCH */
|
---|
| 4256 |
|
---|
| 4257 | /* set skip map for Sunday's quick search */
|
---|
| 4258 | static int
|
---|
| 4259 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
| 4260 | UChar skip[], int** int_skip, int ignore_case)
|
---|
| 4261 | {
|
---|
| 4262 | OnigDistance i, len;
|
---|
| 4263 | int clen, flen, n, j, k;
|
---|
| 4264 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
| 4265 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 4266 | OnigEncoding enc = reg->enc;
|
---|
| 4267 |
|
---|
| 4268 | len = end - s;
|
---|
| 4269 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
| 4270 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
|
---|
| 4271 |
|
---|
| 4272 | n = 0;
|
---|
| 4273 | for (i = 0; i < len; i += clen) {
|
---|
| 4274 | p = s + i;
|
---|
| 4275 | if (ignore_case)
|
---|
| 4276 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4277 | p, end, items);
|
---|
[331] | 4278 | clen = enclen(enc, p, end);
|
---|
| 4279 | if (p + clen > end)
|
---|
| 4280 | clen = (int )(end - p);
|
---|
[279] | 4281 |
|
---|
| 4282 | for (j = 0; j < n; j++) {
|
---|
| 4283 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4284 | return 1; /* different length isn't supported. */
|
---|
| 4285 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4286 | if (flen != clen)
|
---|
| 4287 | return 1; /* different length isn't supported. */
|
---|
| 4288 | }
|
---|
| 4289 | for (j = 0; j < clen; j++) {
|
---|
| 4290 | skip[s[i + j]] = (UChar )(len - i - j);
|
---|
| 4291 | for (k = 0; k < n; k++) {
|
---|
| 4292 | skip[buf[k][j]] = (UChar )(len - i - j);
|
---|
| 4293 | }
|
---|
| 4294 | }
|
---|
| 4295 | }
|
---|
| 4296 | }
|
---|
| 4297 | else {
|
---|
[331] | 4298 | # if OPT_EXACT_MAXLEN < ONIG_CHAR_TABLE_SIZE
|
---|
| 4299 | /* This should not happen. */
|
---|
| 4300 | return ONIGERR_TYPE_BUG;
|
---|
| 4301 | # else
|
---|
[279] | 4302 | if (IS_NULL(*int_skip)) {
|
---|
| 4303 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
| 4304 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
| 4305 | }
|
---|
| 4306 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
|
---|
| 4307 |
|
---|
| 4308 | n = 0;
|
---|
| 4309 | for (i = 0; i < len; i += clen) {
|
---|
| 4310 | p = s + i;
|
---|
| 4311 | if (ignore_case)
|
---|
| 4312 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4313 | p, end, items);
|
---|
[331] | 4314 | clen = enclen(enc, p, end);
|
---|
| 4315 | if (p + clen > end)
|
---|
| 4316 | clen = (int )(end - p);
|
---|
[279] | 4317 |
|
---|
| 4318 | for (j = 0; j < n; j++) {
|
---|
| 4319 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4320 | return 1; /* different length isn't supported. */
|
---|
| 4321 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4322 | if (flen != clen)
|
---|
| 4323 | return 1; /* different length isn't supported. */
|
---|
| 4324 | }
|
---|
| 4325 | for (j = 0; j < clen; j++) {
|
---|
| 4326 | (*int_skip)[s[i + j]] = (int )(len - i - j);
|
---|
| 4327 | for (k = 0; k < n; k++) {
|
---|
| 4328 | (*int_skip)[buf[k][j]] = (int )(len - i - j);
|
---|
| 4329 | }
|
---|
| 4330 | }
|
---|
| 4331 | }
|
---|
[331] | 4332 | # endif
|
---|
[279] | 4333 | }
|
---|
| 4334 | return 0;
|
---|
| 4335 | }
|
---|
| 4336 | #endif /* USE_SUNDAY_QUICK_SEARCH */
|
---|
| 4337 |
|
---|
| 4338 | typedef struct {
|
---|
| 4339 | OnigDistance min; /* min byte length */
|
---|
| 4340 | OnigDistance max; /* max byte length */
|
---|
| 4341 | } MinMaxLen;
|
---|
| 4342 |
|
---|
| 4343 | typedef struct {
|
---|
| 4344 | MinMaxLen mmd;
|
---|
| 4345 | OnigEncoding enc;
|
---|
| 4346 | OnigOptionType options;
|
---|
| 4347 | OnigCaseFoldType case_fold_flag;
|
---|
| 4348 | ScanEnv* scan_env;
|
---|
| 4349 | } OptEnv;
|
---|
| 4350 |
|
---|
| 4351 | typedef struct {
|
---|
| 4352 | int left_anchor;
|
---|
| 4353 | int right_anchor;
|
---|
| 4354 | } OptAncInfo;
|
---|
| 4355 |
|
---|
| 4356 | typedef struct {
|
---|
| 4357 | MinMaxLen mmd; /* info position */
|
---|
| 4358 | OptAncInfo anc;
|
---|
| 4359 |
|
---|
| 4360 | int reach_end;
|
---|
| 4361 | int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
|
---|
| 4362 | int len;
|
---|
| 4363 | UChar s[OPT_EXACT_MAXLEN];
|
---|
| 4364 | } OptExactInfo;
|
---|
| 4365 |
|
---|
| 4366 | typedef struct {
|
---|
| 4367 | MinMaxLen mmd; /* info position */
|
---|
| 4368 | OptAncInfo anc;
|
---|
| 4369 |
|
---|
| 4370 | int value; /* weighted value */
|
---|
| 4371 | UChar map[ONIG_CHAR_TABLE_SIZE];
|
---|
| 4372 | } OptMapInfo;
|
---|
| 4373 |
|
---|
| 4374 | typedef struct {
|
---|
| 4375 | MinMaxLen len;
|
---|
| 4376 |
|
---|
| 4377 | OptAncInfo anc;
|
---|
| 4378 | OptExactInfo exb; /* boundary */
|
---|
| 4379 | OptExactInfo exm; /* middle */
|
---|
| 4380 | OptExactInfo expr; /* prec read (?=...) */
|
---|
| 4381 |
|
---|
| 4382 | OptMapInfo map; /* boundary */
|
---|
| 4383 | } NodeOptInfo;
|
---|
| 4384 |
|
---|
| 4385 |
|
---|
| 4386 | static int
|
---|
| 4387 | map_position_value(OnigEncoding enc, int i)
|
---|
| 4388 | {
|
---|
| 4389 | static const short int ByteValTable[] = {
|
---|
| 4390 | 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
|
---|
| 4391 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
---|
| 4392 | 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
|
---|
| 4393 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
|
---|
| 4394 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
| 4395 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
|
---|
| 4396 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
| 4397 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
---|
| 4398 | };
|
---|
| 4399 |
|
---|
| 4400 | if (i < numberof(ByteValTable)) {
|
---|
| 4401 | if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
|
---|
| 4402 | return 20;
|
---|
| 4403 | else
|
---|
| 4404 | return (int )ByteValTable[i];
|
---|
| 4405 | }
|
---|
| 4406 | else
|
---|
| 4407 | return 4; /* Take it easy. */
|
---|
| 4408 | }
|
---|
| 4409 |
|
---|
| 4410 | static int
|
---|
| 4411 | distance_value(MinMaxLen* mm)
|
---|
| 4412 | {
|
---|
| 4413 | /* 1000 / (min-max-dist + 1) */
|
---|
| 4414 | static const short int dist_vals[] = {
|
---|
| 4415 | 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
|
---|
| 4416 | 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
|
---|
| 4417 | 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
|
---|
| 4418 | 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
|
---|
| 4419 | 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
|
---|
| 4420 | 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
|
---|
| 4421 | 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
|
---|
| 4422 | 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
|
---|
| 4423 | 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
|
---|
| 4424 | 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
|
---|
| 4425 | };
|
---|
| 4426 |
|
---|
| 4427 | OnigDistance d;
|
---|
| 4428 |
|
---|
| 4429 | if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
|
---|
| 4430 |
|
---|
| 4431 | d = mm->max - mm->min;
|
---|
| 4432 | if (d < numberof(dist_vals))
|
---|
| 4433 | /* return dist_vals[d] * 16 / (mm->min + 12); */
|
---|
| 4434 | return (int )dist_vals[d];
|
---|
| 4435 | else
|
---|
| 4436 | return 1;
|
---|
| 4437 | }
|
---|
| 4438 |
|
---|
| 4439 | static int
|
---|
| 4440 | comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
|
---|
| 4441 | {
|
---|
| 4442 | if (v2 <= 0) return -1;
|
---|
| 4443 | if (v1 <= 0) return 1;
|
---|
| 4444 |
|
---|
| 4445 | v1 *= distance_value(d1);
|
---|
| 4446 | v2 *= distance_value(d2);
|
---|
| 4447 |
|
---|
| 4448 | if (v2 > v1) return 1;
|
---|
| 4449 | if (v2 < v1) return -1;
|
---|
| 4450 |
|
---|
| 4451 | if (d2->min < d1->min) return 1;
|
---|
| 4452 | if (d2->min > d1->min) return -1;
|
---|
| 4453 | return 0;
|
---|
| 4454 | }
|
---|
| 4455 |
|
---|
| 4456 | static int
|
---|
| 4457 | is_equal_mml(MinMaxLen* a, MinMaxLen* b)
|
---|
| 4458 | {
|
---|
| 4459 | return (a->min == b->min && a->max == b->max) ? 1 : 0;
|
---|
| 4460 | }
|
---|
| 4461 |
|
---|
| 4462 |
|
---|
| 4463 | static void
|
---|
| 4464 | set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
|
---|
| 4465 | {
|
---|
| 4466 | mml->min = min;
|
---|
| 4467 | mml->max = max;
|
---|
| 4468 | }
|
---|
| 4469 |
|
---|
| 4470 | static void
|
---|
| 4471 | clear_mml(MinMaxLen* mml)
|
---|
| 4472 | {
|
---|
| 4473 | mml->min = mml->max = 0;
|
---|
| 4474 | }
|
---|
| 4475 |
|
---|
| 4476 | static void
|
---|
| 4477 | copy_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
| 4478 | {
|
---|
| 4479 | to->min = from->min;
|
---|
| 4480 | to->max = from->max;
|
---|
| 4481 | }
|
---|
| 4482 |
|
---|
| 4483 | static void
|
---|
| 4484 | add_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
| 4485 | {
|
---|
| 4486 | to->min = distance_add(to->min, from->min);
|
---|
| 4487 | to->max = distance_add(to->max, from->max);
|
---|
| 4488 | }
|
---|
| 4489 |
|
---|
| 4490 | #if 0
|
---|
| 4491 | static void
|
---|
| 4492 | add_len_mml(MinMaxLen* to, OnigDistance len)
|
---|
| 4493 | {
|
---|
| 4494 | to->min = distance_add(to->min, len);
|
---|
| 4495 | to->max = distance_add(to->max, len);
|
---|
| 4496 | }
|
---|
| 4497 | #endif
|
---|
| 4498 |
|
---|
| 4499 | static void
|
---|
| 4500 | alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
| 4501 | {
|
---|
| 4502 | if (to->min > from->min) to->min = from->min;
|
---|
| 4503 | if (to->max < from->max) to->max = from->max;
|
---|
| 4504 | }
|
---|
| 4505 |
|
---|
| 4506 | static void
|
---|
| 4507 | copy_opt_env(OptEnv* to, OptEnv* from)
|
---|
| 4508 | {
|
---|
| 4509 | *to = *from;
|
---|
| 4510 | }
|
---|
| 4511 |
|
---|
| 4512 | static void
|
---|
| 4513 | clear_opt_anc_info(OptAncInfo* anc)
|
---|
| 4514 | {
|
---|
| 4515 | anc->left_anchor = 0;
|
---|
| 4516 | anc->right_anchor = 0;
|
---|
| 4517 | }
|
---|
| 4518 |
|
---|
| 4519 | static void
|
---|
| 4520 | copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
|
---|
| 4521 | {
|
---|
| 4522 | *to = *from;
|
---|
| 4523 | }
|
---|
| 4524 |
|
---|
| 4525 | static void
|
---|
| 4526 | concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
|
---|
| 4527 | OnigDistance left_len, OnigDistance right_len)
|
---|
| 4528 | {
|
---|
| 4529 | clear_opt_anc_info(to);
|
---|
| 4530 |
|
---|
| 4531 | to->left_anchor = left->left_anchor;
|
---|
| 4532 | if (left_len == 0) {
|
---|
| 4533 | to->left_anchor |= right->left_anchor;
|
---|
| 4534 | }
|
---|
| 4535 |
|
---|
| 4536 | to->right_anchor = right->right_anchor;
|
---|
| 4537 | if (right_len == 0) {
|
---|
| 4538 | to->right_anchor |= left->right_anchor;
|
---|
| 4539 | }
|
---|
| 4540 | else {
|
---|
| 4541 | to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
|
---|
| 4542 | }
|
---|
| 4543 | }
|
---|
| 4544 |
|
---|
| 4545 | static int
|
---|
| 4546 | is_left_anchor(int anc)
|
---|
| 4547 | {
|
---|
| 4548 | if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
|
---|
| 4549 | anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
|
---|
| 4550 | anc == ANCHOR_PREC_READ_NOT)
|
---|
| 4551 | return 0;
|
---|
| 4552 |
|
---|
| 4553 | return 1;
|
---|
| 4554 | }
|
---|
| 4555 |
|
---|
| 4556 | static int
|
---|
| 4557 | is_set_opt_anc_info(OptAncInfo* to, int anc)
|
---|
| 4558 | {
|
---|
| 4559 | if ((to->left_anchor & anc) != 0) return 1;
|
---|
| 4560 |
|
---|
| 4561 | return ((to->right_anchor & anc) != 0 ? 1 : 0);
|
---|
| 4562 | }
|
---|
| 4563 |
|
---|
| 4564 | static void
|
---|
| 4565 | add_opt_anc_info(OptAncInfo* to, int anc)
|
---|
| 4566 | {
|
---|
| 4567 | if (is_left_anchor(anc))
|
---|
| 4568 | to->left_anchor |= anc;
|
---|
| 4569 | else
|
---|
| 4570 | to->right_anchor |= anc;
|
---|
| 4571 | }
|
---|
| 4572 |
|
---|
| 4573 | static void
|
---|
| 4574 | remove_opt_anc_info(OptAncInfo* to, int anc)
|
---|
| 4575 | {
|
---|
| 4576 | if (is_left_anchor(anc))
|
---|
| 4577 | to->left_anchor &= ~anc;
|
---|
| 4578 | else
|
---|
| 4579 | to->right_anchor &= ~anc;
|
---|
| 4580 | }
|
---|
| 4581 |
|
---|
| 4582 | static void
|
---|
| 4583 | alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
|
---|
| 4584 | {
|
---|
| 4585 | to->left_anchor &= add->left_anchor;
|
---|
| 4586 | to->right_anchor &= add->right_anchor;
|
---|
| 4587 | }
|
---|
| 4588 |
|
---|
| 4589 | static int
|
---|
| 4590 | is_full_opt_exact_info(OptExactInfo* ex)
|
---|
| 4591 | {
|
---|
| 4592 | return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
|
---|
| 4593 | }
|
---|
| 4594 |
|
---|
| 4595 | static void
|
---|
| 4596 | clear_opt_exact_info(OptExactInfo* ex)
|
---|
| 4597 | {
|
---|
| 4598 | clear_mml(&ex->mmd);
|
---|
| 4599 | clear_opt_anc_info(&ex->anc);
|
---|
| 4600 | ex->reach_end = 0;
|
---|
| 4601 | ex->ignore_case = -1; /* unset */
|
---|
| 4602 | ex->len = 0;
|
---|
| 4603 | ex->s[0] = '\0';
|
---|
| 4604 | }
|
---|
| 4605 |
|
---|
| 4606 | static void
|
---|
| 4607 | copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
|
---|
| 4608 | {
|
---|
| 4609 | *to = *from;
|
---|
| 4610 | }
|
---|
| 4611 |
|
---|
| 4612 | static void
|
---|
| 4613 | concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
|
---|
| 4614 | {
|
---|
| 4615 | int i, j, len;
|
---|
| 4616 | UChar *p, *end;
|
---|
| 4617 | OptAncInfo tanc;
|
---|
| 4618 |
|
---|
| 4619 | if (to->ignore_case < 0)
|
---|
| 4620 | to->ignore_case = add->ignore_case;
|
---|
| 4621 | else if (to->ignore_case != add->ignore_case)
|
---|
| 4622 | return ; /* avoid */
|
---|
| 4623 |
|
---|
| 4624 | p = add->s;
|
---|
| 4625 | end = p + add->len;
|
---|
| 4626 | for (i = to->len; p < end; ) {
|
---|
[331] | 4627 | len = enclen(enc, p, end);
|
---|
[279] | 4628 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
| 4629 | for (j = 0; j < len && p < end; j++)
|
---|
| 4630 | to->s[i++] = *p++;
|
---|
| 4631 | }
|
---|
| 4632 |
|
---|
| 4633 | to->len = i;
|
---|
| 4634 | to->reach_end = (p == end ? add->reach_end : 0);
|
---|
| 4635 |
|
---|
| 4636 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
|
---|
| 4637 | if (! to->reach_end) tanc.right_anchor = 0;
|
---|
| 4638 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
| 4639 | }
|
---|
| 4640 |
|
---|
| 4641 | static void
|
---|
| 4642 | concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
|
---|
| 4643 | int raw ARG_UNUSED, OnigEncoding enc)
|
---|
| 4644 | {
|
---|
| 4645 | int i, j, len;
|
---|
| 4646 | UChar *p;
|
---|
| 4647 |
|
---|
| 4648 | for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
|
---|
[331] | 4649 | len = enclen(enc, p, end);
|
---|
[279] | 4650 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
| 4651 | for (j = 0; j < len && p < end; j++)
|
---|
| 4652 | to->s[i++] = *p++;
|
---|
| 4653 | }
|
---|
| 4654 |
|
---|
| 4655 | to->len = i;
|
---|
| 4656 | }
|
---|
| 4657 |
|
---|
| 4658 | static void
|
---|
| 4659 | alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
|
---|
| 4660 | {
|
---|
| 4661 | int i, j, len;
|
---|
| 4662 |
|
---|
| 4663 | if (add->len == 0 || to->len == 0) {
|
---|
| 4664 | clear_opt_exact_info(to);
|
---|
| 4665 | return ;
|
---|
| 4666 | }
|
---|
| 4667 |
|
---|
| 4668 | if (! is_equal_mml(&to->mmd, &add->mmd)) {
|
---|
| 4669 | clear_opt_exact_info(to);
|
---|
| 4670 | return ;
|
---|
| 4671 | }
|
---|
| 4672 |
|
---|
| 4673 | for (i = 0; i < to->len && i < add->len; ) {
|
---|
| 4674 | if (to->s[i] != add->s[i]) break;
|
---|
[331] | 4675 | len = enclen(env->enc, to->s + i, to->s + to->len);
|
---|
[279] | 4676 |
|
---|
| 4677 | for (j = 1; j < len; j++) {
|
---|
| 4678 | if (to->s[i+j] != add->s[i+j]) break;
|
---|
| 4679 | }
|
---|
| 4680 | if (j < len) break;
|
---|
| 4681 | i += len;
|
---|
| 4682 | }
|
---|
| 4683 |
|
---|
| 4684 | if (! add->reach_end || i < add->len || i < to->len) {
|
---|
| 4685 | to->reach_end = 0;
|
---|
| 4686 | }
|
---|
| 4687 | to->len = i;
|
---|
| 4688 | if (to->ignore_case < 0)
|
---|
| 4689 | to->ignore_case = add->ignore_case;
|
---|
| 4690 | else if (add->ignore_case >= 0)
|
---|
| 4691 | to->ignore_case |= add->ignore_case;
|
---|
| 4692 |
|
---|
| 4693 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
| 4694 | if (! to->reach_end) to->anc.right_anchor = 0;
|
---|
| 4695 | }
|
---|
| 4696 |
|
---|
| 4697 | static void
|
---|
| 4698 | select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
|
---|
| 4699 | {
|
---|
| 4700 | int v1, v2;
|
---|
| 4701 |
|
---|
| 4702 | v1 = now->len;
|
---|
| 4703 | v2 = alt->len;
|
---|
| 4704 |
|
---|
| 4705 | if (v2 == 0) {
|
---|
| 4706 | return ;
|
---|
| 4707 | }
|
---|
| 4708 | else if (v1 == 0) {
|
---|
| 4709 | copy_opt_exact_info(now, alt);
|
---|
| 4710 | return ;
|
---|
| 4711 | }
|
---|
| 4712 | else if (v1 <= 2 && v2 <= 2) {
|
---|
| 4713 | /* ByteValTable[x] is big value --> low price */
|
---|
| 4714 | v2 = map_position_value(enc, now->s[0]);
|
---|
| 4715 | v1 = map_position_value(enc, alt->s[0]);
|
---|
| 4716 |
|
---|
| 4717 | if (now->len > 1) v1 += 5;
|
---|
| 4718 | if (alt->len > 1) v2 += 5;
|
---|
| 4719 | }
|
---|
| 4720 |
|
---|
| 4721 | if (now->ignore_case <= 0) v1 *= 2;
|
---|
| 4722 | if (alt->ignore_case <= 0) v2 *= 2;
|
---|
| 4723 |
|
---|
| 4724 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
| 4725 | copy_opt_exact_info(now, alt);
|
---|
| 4726 | }
|
---|
| 4727 |
|
---|
| 4728 | static void
|
---|
| 4729 | clear_opt_map_info(OptMapInfo* map)
|
---|
| 4730 | {
|
---|
| 4731 | static const OptMapInfo clean_info = {
|
---|
| 4732 | {0, 0}, {0, 0}, 0,
|
---|
| 4733 | {
|
---|
| 4734 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4735 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4736 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4737 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4738 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4739 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4740 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4741 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4742 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4743 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4744 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4745 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4746 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4747 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4748 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4749 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
---|
| 4750 | }
|
---|
| 4751 | };
|
---|
| 4752 |
|
---|
| 4753 | xmemcpy(map, &clean_info, sizeof(OptMapInfo));
|
---|
| 4754 | }
|
---|
| 4755 |
|
---|
| 4756 | static void
|
---|
| 4757 | copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
|
---|
| 4758 | {
|
---|
| 4759 | *to = *from;
|
---|
| 4760 | }
|
---|
| 4761 |
|
---|
| 4762 | static void
|
---|
| 4763 | add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
|
---|
| 4764 | {
|
---|
| 4765 | if (map->map[c] == 0) {
|
---|
| 4766 | map->map[c] = 1;
|
---|
| 4767 | map->value += map_position_value(enc, c);
|
---|
| 4768 | }
|
---|
| 4769 | }
|
---|
| 4770 |
|
---|
| 4771 | static int
|
---|
| 4772 | add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
---|
| 4773 | OnigEncoding enc, OnigCaseFoldType case_fold_flag)
|
---|
| 4774 | {
|
---|
| 4775 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 4776 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
| 4777 | int i, n;
|
---|
| 4778 |
|
---|
| 4779 | add_char_opt_map_info(map, p[0], enc);
|
---|
| 4780 |
|
---|
| 4781 | case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
|
---|
| 4782 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
|
---|
| 4783 | if (n < 0) return n;
|
---|
| 4784 |
|
---|
| 4785 | for (i = 0; i < n; i++) {
|
---|
| 4786 | ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
|
---|
| 4787 | add_char_opt_map_info(map, buf[0], enc);
|
---|
| 4788 | }
|
---|
| 4789 |
|
---|
| 4790 | return 0;
|
---|
| 4791 | }
|
---|
| 4792 |
|
---|
| 4793 | static void
|
---|
| 4794 | select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
|
---|
| 4795 | {
|
---|
| 4796 | const int z = 1<<15; /* 32768: something big value */
|
---|
| 4797 |
|
---|
| 4798 | int v1, v2;
|
---|
| 4799 |
|
---|
| 4800 | if (alt->value == 0) return ;
|
---|
| 4801 | if (now->value == 0) {
|
---|
| 4802 | copy_opt_map_info(now, alt);
|
---|
| 4803 | return ;
|
---|
| 4804 | }
|
---|
| 4805 |
|
---|
| 4806 | v1 = z / now->value;
|
---|
| 4807 | v2 = z / alt->value;
|
---|
| 4808 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
| 4809 | copy_opt_map_info(now, alt);
|
---|
| 4810 | }
|
---|
| 4811 |
|
---|
| 4812 | static int
|
---|
| 4813 | comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
|
---|
| 4814 | {
|
---|
| 4815 | #define COMP_EM_BASE 20
|
---|
| 4816 | int ve, vm;
|
---|
| 4817 |
|
---|
| 4818 | if (m->value <= 0) return -1;
|
---|
| 4819 |
|
---|
| 4820 | ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
|
---|
| 4821 | vm = COMP_EM_BASE * 5 * 2 / m->value;
|
---|
| 4822 | return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
|
---|
| 4823 | }
|
---|
| 4824 |
|
---|
| 4825 | static void
|
---|
| 4826 | alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
|
---|
| 4827 | {
|
---|
| 4828 | int i, val;
|
---|
| 4829 |
|
---|
| 4830 | /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
|
---|
| 4831 | if (to->value == 0) return ;
|
---|
| 4832 | if (add->value == 0 || to->mmd.max < add->mmd.min) {
|
---|
| 4833 | clear_opt_map_info(to);
|
---|
| 4834 | return ;
|
---|
| 4835 | }
|
---|
| 4836 |
|
---|
| 4837 | alt_merge_mml(&to->mmd, &add->mmd);
|
---|
| 4838 |
|
---|
| 4839 | val = 0;
|
---|
| 4840 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
| 4841 | if (add->map[i])
|
---|
| 4842 | to->map[i] = 1;
|
---|
| 4843 |
|
---|
| 4844 | if (to->map[i])
|
---|
| 4845 | val += map_position_value(enc, i);
|
---|
| 4846 | }
|
---|
| 4847 | to->value = val;
|
---|
| 4848 |
|
---|
| 4849 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
| 4850 | }
|
---|
| 4851 |
|
---|
| 4852 | static void
|
---|
| 4853 | set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
|
---|
| 4854 | {
|
---|
| 4855 | copy_mml(&(opt->exb.mmd), mmd);
|
---|
| 4856 | copy_mml(&(opt->expr.mmd), mmd);
|
---|
| 4857 | copy_mml(&(opt->map.mmd), mmd);
|
---|
| 4858 | }
|
---|
| 4859 |
|
---|
| 4860 | static void
|
---|
| 4861 | clear_node_opt_info(NodeOptInfo* opt)
|
---|
| 4862 | {
|
---|
| 4863 | clear_mml(&opt->len);
|
---|
| 4864 | clear_opt_anc_info(&opt->anc);
|
---|
| 4865 | clear_opt_exact_info(&opt->exb);
|
---|
| 4866 | clear_opt_exact_info(&opt->exm);
|
---|
| 4867 | clear_opt_exact_info(&opt->expr);
|
---|
| 4868 | clear_opt_map_info(&opt->map);
|
---|
| 4869 | }
|
---|
| 4870 |
|
---|
| 4871 | static void
|
---|
| 4872 | copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
|
---|
| 4873 | {
|
---|
| 4874 | *to = *from;
|
---|
| 4875 | }
|
---|
| 4876 |
|
---|
| 4877 | static void
|
---|
| 4878 | concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
|
---|
| 4879 | {
|
---|
| 4880 | int exb_reach, exm_reach;
|
---|
| 4881 | OptAncInfo tanc;
|
---|
| 4882 |
|
---|
| 4883 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
|
---|
| 4884 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
| 4885 |
|
---|
| 4886 | if (add->exb.len > 0 && to->len.max == 0) {
|
---|
| 4887 | concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
|
---|
| 4888 | to->len.max, add->len.max);
|
---|
| 4889 | copy_opt_anc_info(&add->exb.anc, &tanc);
|
---|
| 4890 | }
|
---|
| 4891 |
|
---|
| 4892 | if (add->map.value > 0 && to->len.max == 0) {
|
---|
| 4893 | if (add->map.mmd.max == 0)
|
---|
| 4894 | add->map.anc.left_anchor |= to->anc.left_anchor;
|
---|
| 4895 | }
|
---|
| 4896 |
|
---|
| 4897 | exb_reach = to->exb.reach_end;
|
---|
| 4898 | exm_reach = to->exm.reach_end;
|
---|
| 4899 |
|
---|
| 4900 | if (add->len.max != 0)
|
---|
| 4901 | to->exb.reach_end = to->exm.reach_end = 0;
|
---|
| 4902 |
|
---|
| 4903 | if (add->exb.len > 0) {
|
---|
| 4904 | if (exb_reach) {
|
---|
| 4905 | concat_opt_exact_info(&to->exb, &add->exb, enc);
|
---|
| 4906 | clear_opt_exact_info(&add->exb);
|
---|
| 4907 | }
|
---|
| 4908 | else if (exm_reach) {
|
---|
| 4909 | concat_opt_exact_info(&to->exm, &add->exb, enc);
|
---|
| 4910 | clear_opt_exact_info(&add->exb);
|
---|
| 4911 | }
|
---|
| 4912 | }
|
---|
| 4913 | select_opt_exact_info(enc, &to->exm, &add->exb);
|
---|
| 4914 | select_opt_exact_info(enc, &to->exm, &add->exm);
|
---|
| 4915 |
|
---|
| 4916 | if (to->expr.len > 0) {
|
---|
| 4917 | if (add->len.max > 0) {
|
---|
| 4918 | if (to->expr.len > (int )add->len.max)
|
---|
| 4919 | to->expr.len = (int )add->len.max;
|
---|
| 4920 |
|
---|
| 4921 | if (to->expr.mmd.max == 0)
|
---|
| 4922 | select_opt_exact_info(enc, &to->exb, &to->expr);
|
---|
| 4923 | else
|
---|
| 4924 | select_opt_exact_info(enc, &to->exm, &to->expr);
|
---|
| 4925 | }
|
---|
| 4926 | }
|
---|
| 4927 | else if (add->expr.len > 0) {
|
---|
| 4928 | copy_opt_exact_info(&to->expr, &add->expr);
|
---|
| 4929 | }
|
---|
| 4930 |
|
---|
| 4931 | select_opt_map_info(&to->map, &add->map);
|
---|
| 4932 |
|
---|
| 4933 | add_mml(&to->len, &add->len);
|
---|
| 4934 | }
|
---|
| 4935 |
|
---|
| 4936 | static void
|
---|
| 4937 | alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
|
---|
| 4938 | {
|
---|
| 4939 | alt_merge_opt_anc_info (&to->anc, &add->anc);
|
---|
| 4940 | alt_merge_opt_exact_info(&to->exb, &add->exb, env);
|
---|
| 4941 | alt_merge_opt_exact_info(&to->exm, &add->exm, env);
|
---|
| 4942 | alt_merge_opt_exact_info(&to->expr, &add->expr, env);
|
---|
| 4943 | alt_merge_opt_map_info(env->enc, &to->map, &add->map);
|
---|
| 4944 |
|
---|
| 4945 | alt_merge_mml(&to->len, &add->len);
|
---|
| 4946 | }
|
---|
| 4947 |
|
---|
| 4948 |
|
---|
| 4949 | #define MAX_NODE_OPT_INFO_REF_COUNT 5
|
---|
| 4950 |
|
---|
| 4951 | static int
|
---|
| 4952 | optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
---|
| 4953 | {
|
---|
| 4954 | int type;
|
---|
| 4955 | int r = 0;
|
---|
| 4956 |
|
---|
| 4957 | clear_node_opt_info(opt);
|
---|
| 4958 | set_bound_node_opt_info(opt, &env->mmd);
|
---|
| 4959 |
|
---|
| 4960 | type = NTYPE(node);
|
---|
| 4961 | switch (type) {
|
---|
| 4962 | case NT_LIST:
|
---|
| 4963 | {
|
---|
| 4964 | OptEnv nenv;
|
---|
| 4965 | NodeOptInfo nopt;
|
---|
| 4966 | Node* nd = node;
|
---|
| 4967 |
|
---|
| 4968 | copy_opt_env(&nenv, env);
|
---|
| 4969 | do {
|
---|
| 4970 | r = optimize_node_left(NCAR(nd), &nopt, &nenv);
|
---|
| 4971 | if (r == 0) {
|
---|
| 4972 | add_mml(&nenv.mmd, &nopt.len);
|
---|
| 4973 | concat_left_node_opt_info(env->enc, opt, &nopt);
|
---|
| 4974 | }
|
---|
| 4975 | } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
| 4976 | }
|
---|
| 4977 | break;
|
---|
| 4978 |
|
---|
| 4979 | case NT_ALT:
|
---|
| 4980 | {
|
---|
| 4981 | NodeOptInfo nopt;
|
---|
| 4982 | Node* nd = node;
|
---|
| 4983 |
|
---|
| 4984 | do {
|
---|
| 4985 | r = optimize_node_left(NCAR(nd), &nopt, env);
|
---|
| 4986 | if (r == 0) {
|
---|
| 4987 | if (nd == node) copy_node_opt_info(opt, &nopt);
|
---|
| 4988 | else alt_merge_node_opt_info(opt, &nopt, env);
|
---|
| 4989 | }
|
---|
| 4990 | } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
| 4991 | }
|
---|
| 4992 | break;
|
---|
| 4993 |
|
---|
| 4994 | case NT_STR:
|
---|
| 4995 | {
|
---|
| 4996 | StrNode* sn = NSTR(node);
|
---|
| 4997 | OnigDistance slen = sn->end - sn->s;
|
---|
| 4998 | int is_raw = NSTRING_IS_RAW(node);
|
---|
| 4999 |
|
---|
| 5000 | if (! NSTRING_IS_AMBIG(node)) {
|
---|
| 5001 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
| 5002 | is_raw, env->enc);
|
---|
| 5003 | opt->exb.ignore_case = 0;
|
---|
| 5004 | if (slen > 0) {
|
---|
| 5005 | add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
|
---|
| 5006 | }
|
---|
[331] | 5007 | set_mml(&opt->len, slen, slen);
|
---|
[279] | 5008 | }
|
---|
| 5009 | else {
|
---|
[331] | 5010 | OnigDistance max;
|
---|
[279] | 5011 |
|
---|
| 5012 | if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
|
---|
[331] | 5013 | int n = onigenc_strlen(env->enc, sn->s, sn->end);
|
---|
| 5014 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
|
---|
[279] | 5015 | }
|
---|
| 5016 | else {
|
---|
| 5017 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
| 5018 | is_raw, env->enc);
|
---|
| 5019 | opt->exb.ignore_case = 1;
|
---|
| 5020 |
|
---|
| 5021 | if (slen > 0) {
|
---|
| 5022 | r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
|
---|
| 5023 | env->enc, env->case_fold_flag);
|
---|
| 5024 | if (r != 0) break;
|
---|
| 5025 | }
|
---|
| 5026 |
|
---|
| 5027 | max = slen;
|
---|
| 5028 | }
|
---|
| 5029 |
|
---|
[331] | 5030 | set_mml(&opt->len, slen, max);
|
---|
[279] | 5031 | }
|
---|
| 5032 |
|
---|
| 5033 | if ((OnigDistance )opt->exb.len == slen)
|
---|
| 5034 | opt->exb.reach_end = 1;
|
---|
| 5035 | }
|
---|
| 5036 | break;
|
---|
| 5037 |
|
---|
| 5038 | case NT_CCLASS:
|
---|
| 5039 | {
|
---|
| 5040 | int i, z;
|
---|
| 5041 | CClassNode* cc = NCCLASS(node);
|
---|
| 5042 |
|
---|
| 5043 | /* no need to check ignore case. (set in setup_tree()) */
|
---|
| 5044 |
|
---|
| 5045 | if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
|
---|
[331] | 5046 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
[279] | 5047 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 5048 |
|
---|
| 5049 | set_mml(&opt->len, min, max);
|
---|
| 5050 | }
|
---|
| 5051 | else {
|
---|
[331] | 5052 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 5053 | z = BITSET_AT(cc->bs, i);
|
---|
| 5054 | if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
|
---|
| 5055 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
| 5056 | }
|
---|
| 5057 | }
|
---|
[279] | 5058 | set_mml(&opt->len, 1, 1);
|
---|
| 5059 | }
|
---|
| 5060 | }
|
---|
| 5061 | break;
|
---|
| 5062 |
|
---|
| 5063 | case NT_CTYPE:
|
---|
| 5064 | {
|
---|
| 5065 | int i, min, max;
|
---|
| 5066 | int maxcode;
|
---|
| 5067 |
|
---|
| 5068 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 5069 |
|
---|
| 5070 | if (max == 1) {
|
---|
[331] | 5071 | min = 1;
|
---|
[279] | 5072 |
|
---|
| 5073 | maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
|
---|
| 5074 | switch (NCTYPE(node)->ctype) {
|
---|
| 5075 | case ONIGENC_CTYPE_WORD:
|
---|
| 5076 | if (NCTYPE(node)->not != 0) {
|
---|
| 5077 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 5078 | if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
|
---|
| 5079 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
| 5080 | }
|
---|
| 5081 | }
|
---|
| 5082 | }
|
---|
| 5083 | else {
|
---|
| 5084 | for (i = 0; i < maxcode; i++) {
|
---|
| 5085 | if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
|
---|
| 5086 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
| 5087 | }
|
---|
| 5088 | }
|
---|
| 5089 | }
|
---|
| 5090 | break;
|
---|
| 5091 | }
|
---|
| 5092 | }
|
---|
| 5093 | else {
|
---|
[331] | 5094 | min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
[279] | 5095 | }
|
---|
| 5096 | set_mml(&opt->len, min, max);
|
---|
| 5097 | }
|
---|
| 5098 | break;
|
---|
| 5099 |
|
---|
| 5100 | case NT_CANY:
|
---|
| 5101 | {
|
---|
| 5102 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
| 5103 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 5104 | set_mml(&opt->len, min, max);
|
---|
| 5105 | }
|
---|
| 5106 | break;
|
---|
| 5107 |
|
---|
| 5108 | case NT_ANCHOR:
|
---|
| 5109 | switch (NANCHOR(node)->type) {
|
---|
| 5110 | case ANCHOR_BEGIN_BUF:
|
---|
| 5111 | case ANCHOR_BEGIN_POSITION:
|
---|
| 5112 | case ANCHOR_BEGIN_LINE:
|
---|
| 5113 | case ANCHOR_END_BUF:
|
---|
| 5114 | case ANCHOR_SEMI_END_BUF:
|
---|
| 5115 | case ANCHOR_END_LINE:
|
---|
| 5116 | case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
|
---|
| 5117 | case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
|
---|
| 5118 | add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
|
---|
| 5119 | break;
|
---|
| 5120 |
|
---|
| 5121 | case ANCHOR_PREC_READ:
|
---|
| 5122 | {
|
---|
| 5123 | NodeOptInfo nopt;
|
---|
| 5124 |
|
---|
| 5125 | r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
|
---|
| 5126 | if (r == 0) {
|
---|
| 5127 | if (nopt.exb.len > 0)
|
---|
| 5128 | copy_opt_exact_info(&opt->expr, &nopt.exb);
|
---|
| 5129 | else if (nopt.exm.len > 0)
|
---|
| 5130 | copy_opt_exact_info(&opt->expr, &nopt.exm);
|
---|
| 5131 |
|
---|
| 5132 | opt->expr.reach_end = 0;
|
---|
| 5133 |
|
---|
| 5134 | if (nopt.map.value > 0)
|
---|
| 5135 | copy_opt_map_info(&opt->map, &nopt.map);
|
---|
| 5136 | }
|
---|
| 5137 | }
|
---|
| 5138 | break;
|
---|
| 5139 |
|
---|
| 5140 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 5141 | break;
|
---|
| 5142 | }
|
---|
| 5143 | break;
|
---|
| 5144 |
|
---|
| 5145 | case NT_BREF:
|
---|
| 5146 | {
|
---|
| 5147 | int i;
|
---|
| 5148 | int* backs;
|
---|
| 5149 | OnigDistance min, max, tmin, tmax;
|
---|
| 5150 | Node** nodes = SCANENV_MEM_NODES(env->scan_env);
|
---|
| 5151 | BRefNode* br = NBREF(node);
|
---|
| 5152 |
|
---|
| 5153 | if (br->state & NST_RECURSION) {
|
---|
| 5154 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
| 5155 | break;
|
---|
| 5156 | }
|
---|
| 5157 | backs = BACKREFS_P(br);
|
---|
| 5158 | r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
|
---|
| 5159 | if (r != 0) break;
|
---|
| 5160 | r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
|
---|
| 5161 | if (r != 0) break;
|
---|
| 5162 | for (i = 1; i < br->back_num; i++) {
|
---|
| 5163 | r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
|
---|
| 5164 | if (r != 0) break;
|
---|
| 5165 | r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
|
---|
| 5166 | if (r != 0) break;
|
---|
| 5167 | if (min > tmin) min = tmin;
|
---|
| 5168 | if (max < tmax) max = tmax;
|
---|
| 5169 | }
|
---|
| 5170 | if (r == 0) set_mml(&opt->len, min, max);
|
---|
| 5171 | }
|
---|
| 5172 | break;
|
---|
| 5173 |
|
---|
| 5174 | #ifdef USE_SUBEXP_CALL
|
---|
| 5175 | case NT_CALL:
|
---|
| 5176 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
| 5177 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
| 5178 | else {
|
---|
| 5179 | OnigOptionType save = env->options;
|
---|
| 5180 | env->options = NENCLOSE(NCALL(node)->target)->option;
|
---|
| 5181 | r = optimize_node_left(NCALL(node)->target, opt, env);
|
---|
| 5182 | env->options = save;
|
---|
| 5183 | }
|
---|
| 5184 | break;
|
---|
| 5185 | #endif
|
---|
| 5186 |
|
---|
| 5187 | case NT_QTFR:
|
---|
| 5188 | {
|
---|
| 5189 | int i;
|
---|
| 5190 | OnigDistance min, max;
|
---|
| 5191 | NodeOptInfo nopt;
|
---|
| 5192 | QtfrNode* qn = NQTFR(node);
|
---|
| 5193 |
|
---|
| 5194 | r = optimize_node_left(qn->target, &nopt, env);
|
---|
| 5195 | if (r) break;
|
---|
| 5196 |
|
---|
| 5197 | if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 5198 | if (env->mmd.max == 0 &&
|
---|
| 5199 | NTYPE(qn->target) == NT_CANY && qn->greedy) {
|
---|
| 5200 | if (IS_MULTILINE(env->options))
|
---|
| 5201 | /* implicit anchor: /.*a/ ==> /\A.*a/ */
|
---|
| 5202 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
|
---|
| 5203 | else
|
---|
| 5204 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
|
---|
| 5205 | }
|
---|
| 5206 | }
|
---|
| 5207 | else {
|
---|
| 5208 | if (qn->lower > 0) {
|
---|
| 5209 | copy_node_opt_info(opt, &nopt);
|
---|
| 5210 | if (nopt.exb.len > 0) {
|
---|
| 5211 | if (nopt.exb.reach_end) {
|
---|
| 5212 | for (i = 2; i <= qn->lower &&
|
---|
[331] | 5213 | ! is_full_opt_exact_info(&opt->exb); i++) {
|
---|
[279] | 5214 | concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
|
---|
| 5215 | }
|
---|
| 5216 | if (i < qn->lower) {
|
---|
| 5217 | opt->exb.reach_end = 0;
|
---|
| 5218 | }
|
---|
| 5219 | }
|
---|
| 5220 | }
|
---|
| 5221 |
|
---|
| 5222 | if (qn->lower != qn->upper) {
|
---|
| 5223 | opt->exb.reach_end = 0;
|
---|
| 5224 | opt->exm.reach_end = 0;
|
---|
| 5225 | }
|
---|
| 5226 | if (qn->lower > 1)
|
---|
| 5227 | opt->exm.reach_end = 0;
|
---|
| 5228 | }
|
---|
| 5229 | }
|
---|
| 5230 |
|
---|
| 5231 | min = distance_multiply(nopt.len.min, qn->lower);
|
---|
| 5232 | if (IS_REPEAT_INFINITE(qn->upper))
|
---|
| 5233 | max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
|
---|
| 5234 | else
|
---|
| 5235 | max = distance_multiply(nopt.len.max, qn->upper);
|
---|
| 5236 |
|
---|
| 5237 | set_mml(&opt->len, min, max);
|
---|
| 5238 | }
|
---|
| 5239 | break;
|
---|
| 5240 |
|
---|
| 5241 | case NT_ENCLOSE:
|
---|
| 5242 | {
|
---|
| 5243 | EncloseNode* en = NENCLOSE(node);
|
---|
| 5244 |
|
---|
| 5245 | switch (en->type) {
|
---|
| 5246 | case ENCLOSE_OPTION:
|
---|
| 5247 | {
|
---|
| 5248 | OnigOptionType save = env->options;
|
---|
| 5249 |
|
---|
| 5250 | env->options = en->option;
|
---|
| 5251 | r = optimize_node_left(en->target, opt, env);
|
---|
| 5252 | env->options = save;
|
---|
| 5253 | }
|
---|
| 5254 | break;
|
---|
| 5255 |
|
---|
| 5256 | case ENCLOSE_MEMORY:
|
---|
| 5257 | #ifdef USE_SUBEXP_CALL
|
---|
| 5258 | en->opt_count++;
|
---|
| 5259 | if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
|
---|
| 5260 | OnigDistance min, max;
|
---|
| 5261 |
|
---|
| 5262 | min = 0;
|
---|
| 5263 | max = ONIG_INFINITE_DISTANCE;
|
---|
| 5264 | if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
|
---|
| 5265 | if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
|
---|
| 5266 | set_mml(&opt->len, min, max);
|
---|
| 5267 | }
|
---|
| 5268 | else
|
---|
| 5269 | #endif
|
---|
| 5270 | {
|
---|
| 5271 | r = optimize_node_left(en->target, opt, env);
|
---|
| 5272 |
|
---|
| 5273 | if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
|
---|
| 5274 | if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
|
---|
| 5275 | remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
|
---|
| 5276 | }
|
---|
| 5277 | }
|
---|
| 5278 | break;
|
---|
| 5279 |
|
---|
| 5280 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 5281 | case ENCLOSE_CONDITION:
|
---|
| 5282 | r = optimize_node_left(en->target, opt, env);
|
---|
| 5283 | break;
|
---|
[331] | 5284 |
|
---|
| 5285 | case ENCLOSE_ABSENT:
|
---|
| 5286 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
| 5287 | break;
|
---|
[279] | 5288 | }
|
---|
| 5289 | }
|
---|
| 5290 | break;
|
---|
| 5291 |
|
---|
| 5292 | default:
|
---|
| 5293 | #ifdef ONIG_DEBUG
|
---|
| 5294 | fprintf(stderr, "optimize_node_left: undefined node type %d\n",
|
---|
| 5295 | NTYPE(node));
|
---|
| 5296 | #endif
|
---|
| 5297 | r = ONIGERR_TYPE_BUG;
|
---|
| 5298 | break;
|
---|
| 5299 | }
|
---|
| 5300 |
|
---|
| 5301 | return r;
|
---|
| 5302 | }
|
---|
| 5303 |
|
---|
| 5304 | static int
|
---|
| 5305 | set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
|
---|
| 5306 | {
|
---|
| 5307 | int r;
|
---|
| 5308 | int allow_reverse;
|
---|
| 5309 |
|
---|
| 5310 | if (e->len == 0) return 0;
|
---|
| 5311 |
|
---|
| 5312 | reg->exact = (UChar* )xmalloc(e->len);
|
---|
| 5313 | CHECK_NULL_RETURN_MEMERR(reg->exact);
|
---|
| 5314 | xmemcpy(reg->exact, e->s, e->len);
|
---|
| 5315 | reg->exact_end = reg->exact + e->len;
|
---|
| 5316 |
|
---|
| 5317 | allow_reverse =
|
---|
| 5318 | ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
|
---|
| 5319 |
|
---|
| 5320 | if (e->ignore_case > 0) {
|
---|
| 5321 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
| 5322 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
| 5323 | reg->map, &(reg->int_map), 1);
|
---|
| 5324 | if (r == 0) {
|
---|
| 5325 | reg->optimize = (allow_reverse != 0
|
---|
| 5326 | ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
|
---|
| 5327 | }
|
---|
| 5328 | else {
|
---|
| 5329 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
| 5330 | }
|
---|
| 5331 | }
|
---|
| 5332 | else {
|
---|
| 5333 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
| 5334 | }
|
---|
| 5335 | }
|
---|
| 5336 | else {
|
---|
| 5337 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
| 5338 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
[331] | 5339 | reg->map, &(reg->int_map), 0);
|
---|
| 5340 | if (r == 0) {
|
---|
| 5341 | reg->optimize = (allow_reverse != 0
|
---|
[279] | 5342 | ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
|
---|
[331] | 5343 | }
|
---|
| 5344 | else {
|
---|
| 5345 | reg->optimize = ONIG_OPTIMIZE_EXACT;
|
---|
| 5346 | }
|
---|
[279] | 5347 | }
|
---|
| 5348 | else {
|
---|
| 5349 | reg->optimize = ONIG_OPTIMIZE_EXACT;
|
---|
| 5350 | }
|
---|
| 5351 | }
|
---|
| 5352 |
|
---|
| 5353 | reg->dmin = e->mmd.min;
|
---|
| 5354 | reg->dmax = e->mmd.max;
|
---|
| 5355 |
|
---|
| 5356 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
| 5357 | reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
|
---|
| 5358 | }
|
---|
| 5359 |
|
---|
| 5360 | return 0;
|
---|
| 5361 | }
|
---|
| 5362 |
|
---|
| 5363 | static void
|
---|
| 5364 | set_optimize_map_info(regex_t* reg, OptMapInfo* m)
|
---|
| 5365 | {
|
---|
| 5366 | int i;
|
---|
| 5367 |
|
---|
| 5368 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
| 5369 | reg->map[i] = m->map[i];
|
---|
| 5370 |
|
---|
| 5371 | reg->optimize = ONIG_OPTIMIZE_MAP;
|
---|
| 5372 | reg->dmin = m->mmd.min;
|
---|
| 5373 | reg->dmax = m->mmd.max;
|
---|
| 5374 |
|
---|
| 5375 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
| 5376 | reg->threshold_len = (int )(reg->dmin + 1);
|
---|
| 5377 | }
|
---|
| 5378 | }
|
---|
| 5379 |
|
---|
| 5380 | static void
|
---|
| 5381 | set_sub_anchor(regex_t* reg, OptAncInfo* anc)
|
---|
| 5382 | {
|
---|
| 5383 | reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
|
---|
| 5384 | reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
|
---|
| 5385 | }
|
---|
| 5386 |
|
---|
| 5387 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
| 5388 | static void print_optimize_info(FILE* f, regex_t* reg);
|
---|
| 5389 | #endif
|
---|
| 5390 |
|
---|
| 5391 | static int
|
---|
| 5392 | set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
|
---|
| 5393 | {
|
---|
| 5394 |
|
---|
| 5395 | int r;
|
---|
| 5396 | NodeOptInfo opt;
|
---|
| 5397 | OptEnv env;
|
---|
| 5398 |
|
---|
| 5399 | env.enc = reg->enc;
|
---|
| 5400 | env.options = reg->options;
|
---|
| 5401 | env.case_fold_flag = reg->case_fold_flag;
|
---|
| 5402 | env.scan_env = scan_env;
|
---|
| 5403 | clear_mml(&env.mmd);
|
---|
| 5404 |
|
---|
| 5405 | r = optimize_node_left(node, &opt, &env);
|
---|
| 5406 | if (r) return r;
|
---|
| 5407 |
|
---|
| 5408 | reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
|
---|
| 5409 | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
|
---|
| 5410 | ANCHOR_LOOK_BEHIND);
|
---|
| 5411 |
|
---|
[331] | 5412 | if ((opt.anc.left_anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) != 0)
|
---|
| 5413 | reg->anchor &= ~ANCHOR_ANYCHAR_STAR_ML;
|
---|
| 5414 |
|
---|
[279] | 5415 | reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
|
---|
| 5416 | ANCHOR_PREC_READ_NOT);
|
---|
| 5417 |
|
---|
| 5418 | if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
|
---|
| 5419 | reg->anchor_dmin = opt.len.min;
|
---|
| 5420 | reg->anchor_dmax = opt.len.max;
|
---|
| 5421 | }
|
---|
| 5422 |
|
---|
| 5423 | if (opt.exb.len > 0 || opt.exm.len > 0) {
|
---|
| 5424 | select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
|
---|
| 5425 | if (opt.map.value > 0 &&
|
---|
| 5426 | comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
|
---|
| 5427 | goto set_map;
|
---|
| 5428 | }
|
---|
| 5429 | else {
|
---|
| 5430 | r = set_optimize_exact_info(reg, &opt.exb);
|
---|
| 5431 | set_sub_anchor(reg, &opt.exb.anc);
|
---|
| 5432 | }
|
---|
| 5433 | }
|
---|
| 5434 | else if (opt.map.value > 0) {
|
---|
| 5435 | set_map:
|
---|
| 5436 | set_optimize_map_info(reg, &opt.map);
|
---|
| 5437 | set_sub_anchor(reg, &opt.map.anc);
|
---|
| 5438 | }
|
---|
| 5439 | else {
|
---|
| 5440 | reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
|
---|
| 5441 | if (opt.len.max == 0)
|
---|
| 5442 | reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
|
---|
| 5443 | }
|
---|
| 5444 |
|
---|
| 5445 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
| 5446 | print_optimize_info(stderr, reg);
|
---|
| 5447 | #endif
|
---|
| 5448 | return r;
|
---|
| 5449 | }
|
---|
| 5450 |
|
---|
| 5451 | static void
|
---|
| 5452 | clear_optimize_info(regex_t* reg)
|
---|
| 5453 | {
|
---|
| 5454 | reg->optimize = ONIG_OPTIMIZE_NONE;
|
---|
| 5455 | reg->anchor = 0;
|
---|
| 5456 | reg->anchor_dmin = 0;
|
---|
| 5457 | reg->anchor_dmax = 0;
|
---|
| 5458 | reg->sub_anchor = 0;
|
---|
| 5459 | reg->exact_end = (UChar* )NULL;
|
---|
| 5460 | reg->threshold_len = 0;
|
---|
| 5461 | if (IS_NOT_NULL(reg->exact)) {
|
---|
| 5462 | xfree(reg->exact);
|
---|
| 5463 | reg->exact = (UChar* )NULL;
|
---|
| 5464 | }
|
---|
| 5465 | }
|
---|
| 5466 |
|
---|
| 5467 | #ifdef ONIG_DEBUG
|
---|
| 5468 |
|
---|
| 5469 | static void print_enc_string(FILE* fp, OnigEncoding enc,
|
---|
| 5470 | const UChar *s, const UChar *end)
|
---|
| 5471 | {
|
---|
| 5472 | fprintf(fp, "\nPATTERN: /");
|
---|
| 5473 |
|
---|
| 5474 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
| 5475 | const UChar *p;
|
---|
| 5476 | OnigCodePoint code;
|
---|
| 5477 |
|
---|
| 5478 | p = s;
|
---|
| 5479 | while (p < end) {
|
---|
| 5480 | code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
---|
| 5481 | if (code >= 0x80) {
|
---|
| 5482 | fprintf(fp, " 0x%04x ", (int )code);
|
---|
| 5483 | }
|
---|
| 5484 | else {
|
---|
| 5485 | fputc((int )code, fp);
|
---|
| 5486 | }
|
---|
| 5487 |
|
---|
[331] | 5488 | p += enclen(enc, p, end);
|
---|
[279] | 5489 | }
|
---|
| 5490 | }
|
---|
| 5491 | else {
|
---|
| 5492 | while (s < end) {
|
---|
| 5493 | fputc((int )*s, fp);
|
---|
| 5494 | s++;
|
---|
| 5495 | }
|
---|
| 5496 | }
|
---|
| 5497 |
|
---|
| 5498 | fprintf(fp, "/ (%s)\n", enc->name);
|
---|
| 5499 | }
|
---|
| 5500 | #endif /* ONIG_DEBUG */
|
---|
| 5501 |
|
---|
| 5502 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
| 5503 | static void
|
---|
| 5504 | print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
|
---|
| 5505 | {
|
---|
| 5506 | if (a == ONIG_INFINITE_DISTANCE)
|
---|
| 5507 | fputs("inf", f);
|
---|
| 5508 | else
|
---|
| 5509 | fprintf(f, "(%"PRIuPTR")", a);
|
---|
| 5510 |
|
---|
| 5511 | fputs("-", f);
|
---|
| 5512 |
|
---|
| 5513 | if (b == ONIG_INFINITE_DISTANCE)
|
---|
| 5514 | fputs("inf", f);
|
---|
| 5515 | else
|
---|
| 5516 | fprintf(f, "(%"PRIuPTR")", b);
|
---|
| 5517 | }
|
---|
| 5518 |
|
---|
| 5519 | static void
|
---|
| 5520 | print_anchor(FILE* f, int anchor)
|
---|
| 5521 | {
|
---|
| 5522 | int q = 0;
|
---|
| 5523 |
|
---|
| 5524 | fprintf(f, "[");
|
---|
| 5525 |
|
---|
| 5526 | if (anchor & ANCHOR_BEGIN_BUF) {
|
---|
| 5527 | fprintf(f, "begin-buf");
|
---|
| 5528 | q = 1;
|
---|
| 5529 | }
|
---|
| 5530 | if (anchor & ANCHOR_BEGIN_LINE) {
|
---|
| 5531 | if (q) fprintf(f, ", ");
|
---|
| 5532 | q = 1;
|
---|
| 5533 | fprintf(f, "begin-line");
|
---|
| 5534 | }
|
---|
| 5535 | if (anchor & ANCHOR_BEGIN_POSITION) {
|
---|
| 5536 | if (q) fprintf(f, ", ");
|
---|
| 5537 | q = 1;
|
---|
| 5538 | fprintf(f, "begin-pos");
|
---|
| 5539 | }
|
---|
| 5540 | if (anchor & ANCHOR_END_BUF) {
|
---|
| 5541 | if (q) fprintf(f, ", ");
|
---|
| 5542 | q = 1;
|
---|
| 5543 | fprintf(f, "end-buf");
|
---|
| 5544 | }
|
---|
| 5545 | if (anchor & ANCHOR_SEMI_END_BUF) {
|
---|
| 5546 | if (q) fprintf(f, ", ");
|
---|
| 5547 | q = 1;
|
---|
| 5548 | fprintf(f, "semi-end-buf");
|
---|
| 5549 | }
|
---|
| 5550 | if (anchor & ANCHOR_END_LINE) {
|
---|
| 5551 | if (q) fprintf(f, ", ");
|
---|
| 5552 | q = 1;
|
---|
| 5553 | fprintf(f, "end-line");
|
---|
| 5554 | }
|
---|
| 5555 | if (anchor & ANCHOR_ANYCHAR_STAR) {
|
---|
| 5556 | if (q) fprintf(f, ", ");
|
---|
| 5557 | q = 1;
|
---|
| 5558 | fprintf(f, "anychar-star");
|
---|
| 5559 | }
|
---|
| 5560 | if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
|
---|
| 5561 | if (q) fprintf(f, ", ");
|
---|
| 5562 | fprintf(f, "anychar-star-ml");
|
---|
| 5563 | }
|
---|
| 5564 |
|
---|
| 5565 | fprintf(f, "]");
|
---|
| 5566 | }
|
---|
| 5567 |
|
---|
| 5568 | static void
|
---|
| 5569 | print_optimize_info(FILE* f, regex_t* reg)
|
---|
| 5570 | {
|
---|
| 5571 | static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
|
---|
| 5572 | "EXACT_IC", "MAP",
|
---|
| 5573 | "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
|
---|
| 5574 |
|
---|
| 5575 | fprintf(f, "optimize: %s\n", on[reg->optimize]);
|
---|
| 5576 | fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
|
---|
| 5577 | if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
|
---|
| 5578 | print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
|
---|
| 5579 | fprintf(f, "\n");
|
---|
| 5580 |
|
---|
| 5581 | if (reg->optimize) {
|
---|
| 5582 | fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
|
---|
| 5583 | fprintf(f, "\n");
|
---|
| 5584 | }
|
---|
| 5585 | fprintf(f, "\n");
|
---|
| 5586 |
|
---|
| 5587 | if (reg->exact) {
|
---|
| 5588 | UChar *p;
|
---|
| 5589 | fprintf(f, "exact: [");
|
---|
| 5590 | for (p = reg->exact; p < reg->exact_end; p++) {
|
---|
| 5591 | fputc(*p, f);
|
---|
| 5592 | }
|
---|
| 5593 | fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
|
---|
| 5594 | }
|
---|
| 5595 | else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
|
---|
| 5596 | int c, i, n = 0;
|
---|
| 5597 |
|
---|
| 5598 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
| 5599 | if (reg->map[i]) n++;
|
---|
| 5600 |
|
---|
| 5601 | fprintf(f, "map: n=%d\n", n);
|
---|
| 5602 | if (n > 0) {
|
---|
| 5603 | c = 0;
|
---|
| 5604 | fputc('[', f);
|
---|
| 5605 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
| 5606 | if (reg->map[i] != 0) {
|
---|
[331] | 5607 | if (c > 0) fputs(", ", f);
|
---|
| 5608 | c++;
|
---|
| 5609 | if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
|
---|
| 5610 | ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
|
---|
| 5611 | fputc(i, f);
|
---|
| 5612 | else
|
---|
| 5613 | fprintf(f, "%d", i);
|
---|
| 5614 | }
|
---|
[279] | 5615 | }
|
---|
| 5616 | fprintf(f, "]\n");
|
---|
| 5617 | }
|
---|
| 5618 | }
|
---|
| 5619 | }
|
---|
| 5620 | #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
|
---|
| 5621 |
|
---|
| 5622 |
|
---|
| 5623 | extern void
|
---|
| 5624 | onig_free_body(regex_t* reg)
|
---|
| 5625 | {
|
---|
| 5626 | if (IS_NOT_NULL(reg)) {
|
---|
| 5627 | if (IS_NOT_NULL(reg->p)) xfree(reg->p);
|
---|
| 5628 | if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
|
---|
| 5629 | if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
|
---|
| 5630 | if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
|
---|
| 5631 | if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
|
---|
| 5632 | if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
|
---|
| 5633 |
|
---|
| 5634 | #ifdef USE_NAMED_GROUP
|
---|
| 5635 | onig_names_free(reg);
|
---|
| 5636 | #endif
|
---|
| 5637 | }
|
---|
| 5638 | }
|
---|
| 5639 |
|
---|
| 5640 | extern void
|
---|
| 5641 | onig_free(regex_t* reg)
|
---|
| 5642 | {
|
---|
| 5643 | if (IS_NOT_NULL(reg)) {
|
---|
| 5644 | onig_free_body(reg);
|
---|
| 5645 | xfree(reg);
|
---|
| 5646 | }
|
---|
| 5647 | }
|
---|
| 5648 |
|
---|
[331] | 5649 | #ifdef RUBY
|
---|
[279] | 5650 | size_t
|
---|
| 5651 | onig_memsize(const regex_t *reg)
|
---|
| 5652 | {
|
---|
| 5653 | size_t size = sizeof(regex_t);
|
---|
| 5654 | if (IS_NULL(reg)) return 0;
|
---|
| 5655 | if (IS_NOT_NULL(reg->p)) size += reg->alloc;
|
---|
| 5656 | if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
|
---|
| 5657 | if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
| 5658 | if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
| 5659 | if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
|
---|
| 5660 | if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
|
---|
| 5661 |
|
---|
| 5662 | return size;
|
---|
| 5663 | }
|
---|
| 5664 |
|
---|
| 5665 | size_t
|
---|
| 5666 | onig_region_memsize(const OnigRegion *regs)
|
---|
| 5667 | {
|
---|
| 5668 | size_t size = sizeof(*regs);
|
---|
| 5669 | if (IS_NULL(regs)) return 0;
|
---|
| 5670 | size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
|
---|
| 5671 | return size;
|
---|
| 5672 | }
|
---|
[331] | 5673 | #endif
|
---|
[279] | 5674 |
|
---|
| 5675 | #define REGEX_TRANSFER(to,from) do {\
|
---|
| 5676 | onig_free_body(to);\
|
---|
| 5677 | xmemcpy(to, from, sizeof(regex_t));\
|
---|
| 5678 | xfree(from);\
|
---|
| 5679 | } while (0)
|
---|
| 5680 |
|
---|
[331] | 5681 | #if 0
|
---|
[279] | 5682 | extern void
|
---|
| 5683 | onig_transfer(regex_t* to, regex_t* from)
|
---|
| 5684 | {
|
---|
| 5685 | REGEX_TRANSFER(to, from);
|
---|
| 5686 | }
|
---|
[331] | 5687 | #endif
|
---|
[279] | 5688 |
|
---|
| 5689 | #ifdef ONIG_DEBUG_COMPILE
|
---|
[331] | 5690 | static void print_compiled_byte_code_list(FILE* f, regex_t* reg);
|
---|
[279] | 5691 | #endif
|
---|
| 5692 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
[331] | 5693 | static void print_tree(FILE* f, Node* node);
|
---|
[279] | 5694 | #endif
|
---|
| 5695 |
|
---|
[331] | 5696 | #ifdef RUBY
|
---|
[279] | 5697 | extern int
|
---|
| 5698 | onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
[331] | 5699 | OnigErrorInfo* einfo)
|
---|
[279] | 5700 | {
|
---|
[331] | 5701 | return onig_compile_ruby(reg, pattern, pattern_end, einfo, NULL, 0);
|
---|
| 5702 | }
|
---|
| 5703 | #endif
|
---|
| 5704 |
|
---|
| 5705 | #ifdef RUBY
|
---|
| 5706 | extern int
|
---|
| 5707 | onig_compile_ruby(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
| 5708 | OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
|
---|
| 5709 | #else
|
---|
| 5710 | extern int
|
---|
| 5711 | onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
| 5712 | OnigErrorInfo* einfo)
|
---|
| 5713 | #endif
|
---|
| 5714 | {
|
---|
[279] | 5715 | #define COMPILE_INIT_SIZE 20
|
---|
| 5716 |
|
---|
| 5717 | int r;
|
---|
| 5718 | OnigDistance init_size;
|
---|
| 5719 | Node* root;
|
---|
| 5720 | ScanEnv scan_env = {0};
|
---|
| 5721 | #ifdef USE_SUBEXP_CALL
|
---|
| 5722 | UnsetAddrList uslist;
|
---|
| 5723 | #endif
|
---|
| 5724 |
|
---|
| 5725 | if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
|
---|
| 5726 |
|
---|
[331] | 5727 | #ifdef RUBY
|
---|
| 5728 | scan_env.sourcefile = sourcefile;
|
---|
| 5729 | scan_env.sourceline = sourceline;
|
---|
| 5730 | #endif
|
---|
[279] | 5731 |
|
---|
| 5732 | #ifdef ONIG_DEBUG
|
---|
| 5733 | print_enc_string(stderr, reg->enc, pattern, pattern_end);
|
---|
| 5734 | #endif
|
---|
| 5735 |
|
---|
| 5736 | if (reg->alloc == 0) {
|
---|
| 5737 | init_size = (pattern_end - pattern) * 2;
|
---|
| 5738 | if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
|
---|
| 5739 | r = BBUF_INIT(reg, init_size);
|
---|
| 5740 | if (r != 0) goto end;
|
---|
| 5741 | }
|
---|
| 5742 | else
|
---|
| 5743 | reg->used = 0;
|
---|
| 5744 |
|
---|
| 5745 | reg->num_mem = 0;
|
---|
| 5746 | reg->num_repeat = 0;
|
---|
| 5747 | reg->num_null_check = 0;
|
---|
| 5748 | reg->repeat_range_alloc = 0;
|
---|
| 5749 | reg->repeat_range = (OnigRepeatRange* )NULL;
|
---|
| 5750 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 5751 | reg->num_comb_exp_check = 0;
|
---|
| 5752 | #endif
|
---|
| 5753 |
|
---|
| 5754 | r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
|
---|
| 5755 | if (r != 0) goto err;
|
---|
| 5756 |
|
---|
[331] | 5757 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
| 5758 | # if 0
|
---|
| 5759 | fprintf(stderr, "ORIGINAL PARSE TREE:\n");
|
---|
| 5760 | print_tree(stderr, root);
|
---|
| 5761 | # endif
|
---|
| 5762 | #endif
|
---|
| 5763 |
|
---|
[279] | 5764 | #ifdef USE_NAMED_GROUP
|
---|
| 5765 | /* mixed use named group and no-named group */
|
---|
| 5766 | if (scan_env.num_named > 0 &&
|
---|
| 5767 | IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
| 5768 | !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 5769 | if (scan_env.num_named != scan_env.num_mem)
|
---|
| 5770 | r = disable_noname_group_capture(&root, reg, &scan_env);
|
---|
| 5771 | else
|
---|
| 5772 | r = numbered_ref_check(root);
|
---|
| 5773 |
|
---|
| 5774 | if (r != 0) goto err;
|
---|
| 5775 | }
|
---|
| 5776 | #endif
|
---|
| 5777 |
|
---|
| 5778 | #ifdef USE_SUBEXP_CALL
|
---|
| 5779 | if (scan_env.num_call > 0) {
|
---|
| 5780 | r = unset_addr_list_init(&uslist, scan_env.num_call);
|
---|
| 5781 | if (r != 0) goto err;
|
---|
| 5782 | scan_env.unset_addr_list = &uslist;
|
---|
| 5783 | r = setup_subexp_call(root, &scan_env);
|
---|
| 5784 | if (r != 0) goto err_unset;
|
---|
| 5785 | r = subexp_recursive_check_trav(root, &scan_env);
|
---|
| 5786 | if (r < 0) goto err_unset;
|
---|
| 5787 | r = subexp_inf_recursive_check_trav(root, &scan_env);
|
---|
| 5788 | if (r != 0) goto err_unset;
|
---|
| 5789 |
|
---|
| 5790 | reg->num_call = scan_env.num_call;
|
---|
| 5791 | }
|
---|
| 5792 | else
|
---|
| 5793 | reg->num_call = 0;
|
---|
| 5794 | #endif
|
---|
| 5795 |
|
---|
[331] | 5796 | r = setup_tree(root, reg, 0, &scan_env);
|
---|
[279] | 5797 | if (r != 0) goto err_unset;
|
---|
| 5798 |
|
---|
| 5799 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
| 5800 | print_tree(stderr, root);
|
---|
| 5801 | #endif
|
---|
| 5802 |
|
---|
| 5803 | reg->capture_history = scan_env.capture_history;
|
---|
| 5804 | reg->bt_mem_start = scan_env.bt_mem_start;
|
---|
| 5805 | reg->bt_mem_start |= reg->capture_history;
|
---|
| 5806 | if (IS_FIND_CONDITION(reg->options))
|
---|
| 5807 | BIT_STATUS_ON_ALL(reg->bt_mem_end);
|
---|
| 5808 | else {
|
---|
| 5809 | reg->bt_mem_end = scan_env.bt_mem_end;
|
---|
| 5810 | reg->bt_mem_end |= reg->capture_history;
|
---|
| 5811 | }
|
---|
| 5812 |
|
---|
| 5813 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 5814 | if (scan_env.backrefed_mem == 0
|
---|
[331] | 5815 | # ifdef USE_SUBEXP_CALL
|
---|
[279] | 5816 | || scan_env.num_call == 0
|
---|
[331] | 5817 | # endif
|
---|
[279] | 5818 | ) {
|
---|
| 5819 | setup_comb_exp_check(root, 0, &scan_env);
|
---|
[331] | 5820 | # ifdef USE_SUBEXP_CALL
|
---|
[279] | 5821 | if (scan_env.has_recursion != 0) {
|
---|
| 5822 | scan_env.num_comb_exp_check = 0;
|
---|
| 5823 | }
|
---|
| 5824 | else
|
---|
[331] | 5825 | # endif
|
---|
[279] | 5826 | if (scan_env.comb_exp_max_regnum > 0) {
|
---|
| 5827 | int i;
|
---|
| 5828 | for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
|
---|
| 5829 | if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
|
---|
| 5830 | scan_env.num_comb_exp_check = 0;
|
---|
| 5831 | break;
|
---|
| 5832 | }
|
---|
| 5833 | }
|
---|
| 5834 | }
|
---|
| 5835 | }
|
---|
| 5836 |
|
---|
| 5837 | reg->num_comb_exp_check = scan_env.num_comb_exp_check;
|
---|
| 5838 | #endif
|
---|
| 5839 |
|
---|
| 5840 | clear_optimize_info(reg);
|
---|
| 5841 | #ifndef ONIG_DONT_OPTIMIZE
|
---|
| 5842 | r = set_optimize_info_from_tree(root, reg, &scan_env);
|
---|
| 5843 | if (r != 0) goto err_unset;
|
---|
| 5844 | #endif
|
---|
| 5845 |
|
---|
| 5846 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
|
---|
| 5847 | xfree(scan_env.mem_nodes_dynamic);
|
---|
| 5848 | scan_env.mem_nodes_dynamic = (Node** )NULL;
|
---|
| 5849 | }
|
---|
| 5850 |
|
---|
| 5851 | r = compile_tree(root, reg);
|
---|
| 5852 | if (r == 0) {
|
---|
| 5853 | r = add_opcode(reg, OP_END);
|
---|
| 5854 | #ifdef USE_SUBEXP_CALL
|
---|
| 5855 | if (scan_env.num_call > 0) {
|
---|
| 5856 | r = unset_addr_list_fix(&uslist, reg);
|
---|
| 5857 | unset_addr_list_end(&uslist);
|
---|
| 5858 | if (r) goto err;
|
---|
| 5859 | }
|
---|
| 5860 | #endif
|
---|
| 5861 |
|
---|
| 5862 | if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
|
---|
| 5863 | reg->stack_pop_level = STACK_POP_LEVEL_ALL;
|
---|
| 5864 | else {
|
---|
| 5865 | if (reg->bt_mem_start != 0)
|
---|
| 5866 | reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
|
---|
| 5867 | else
|
---|
| 5868 | reg->stack_pop_level = STACK_POP_LEVEL_FREE;
|
---|
| 5869 | }
|
---|
| 5870 | }
|
---|
| 5871 | #ifdef USE_SUBEXP_CALL
|
---|
| 5872 | else if (scan_env.num_call > 0) {
|
---|
| 5873 | unset_addr_list_end(&uslist);
|
---|
| 5874 | }
|
---|
| 5875 | #endif
|
---|
| 5876 | onig_node_free(root);
|
---|
| 5877 |
|
---|
| 5878 | #ifdef ONIG_DEBUG_COMPILE
|
---|
[331] | 5879 | # ifdef USE_NAMED_GROUP
|
---|
[279] | 5880 | onig_print_names(stderr, reg);
|
---|
[331] | 5881 | # endif
|
---|
[279] | 5882 | print_compiled_byte_code_list(stderr, reg);
|
---|
| 5883 | #endif
|
---|
| 5884 |
|
---|
| 5885 | end:
|
---|
| 5886 | return r;
|
---|
| 5887 |
|
---|
| 5888 | err_unset:
|
---|
| 5889 | #ifdef USE_SUBEXP_CALL
|
---|
| 5890 | if (scan_env.num_call > 0) {
|
---|
| 5891 | unset_addr_list_end(&uslist);
|
---|
| 5892 | }
|
---|
| 5893 | #endif
|
---|
| 5894 | err:
|
---|
| 5895 | if (IS_NOT_NULL(scan_env.error)) {
|
---|
| 5896 | if (IS_NOT_NULL(einfo)) {
|
---|
| 5897 | einfo->enc = scan_env.enc;
|
---|
| 5898 | einfo->par = scan_env.error;
|
---|
| 5899 | einfo->par_end = scan_env.error_end;
|
---|
| 5900 | }
|
---|
| 5901 | }
|
---|
| 5902 |
|
---|
| 5903 | onig_node_free(root);
|
---|
| 5904 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
|
---|
| 5905 | xfree(scan_env.mem_nodes_dynamic);
|
---|
| 5906 | return r;
|
---|
| 5907 | }
|
---|
| 5908 |
|
---|
| 5909 | static int onig_inited = 0;
|
---|
| 5910 |
|
---|
| 5911 | extern int
|
---|
| 5912 | onig_reg_init(regex_t* reg, OnigOptionType option,
|
---|
| 5913 | OnigCaseFoldType case_fold_flag,
|
---|
[331] | 5914 | OnigEncoding enc, const OnigSyntaxType* syntax)
|
---|
[279] | 5915 | {
|
---|
| 5916 | if (! onig_inited)
|
---|
| 5917 | onig_init();
|
---|
| 5918 |
|
---|
| 5919 | if (IS_NULL(reg))
|
---|
| 5920 | return ONIGERR_INVALID_ARGUMENT;
|
---|
| 5921 |
|
---|
| 5922 | if (ONIGENC_IS_UNDEF(enc))
|
---|
| 5923 | return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
|
---|
| 5924 |
|
---|
| 5925 | if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
|
---|
| 5926 | == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 5927 | return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
|
---|
| 5928 | }
|
---|
| 5929 |
|
---|
| 5930 | if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
|
---|
| 5931 | option |= syntax->options;
|
---|
| 5932 | option &= ~ONIG_OPTION_SINGLELINE;
|
---|
| 5933 | }
|
---|
| 5934 | else
|
---|
| 5935 | option |= syntax->options;
|
---|
| 5936 |
|
---|
| 5937 | (reg)->enc = enc;
|
---|
| 5938 | (reg)->options = option;
|
---|
| 5939 | (reg)->syntax = syntax;
|
---|
| 5940 | (reg)->optimize = 0;
|
---|
| 5941 | (reg)->exact = (UChar* )NULL;
|
---|
| 5942 | (reg)->int_map = (int* )NULL;
|
---|
| 5943 | (reg)->int_map_backward = (int* )NULL;
|
---|
| 5944 | (reg)->chain = (regex_t* )NULL;
|
---|
| 5945 |
|
---|
| 5946 | (reg)->p = (UChar* )NULL;
|
---|
| 5947 | (reg)->alloc = 0;
|
---|
| 5948 | (reg)->used = 0;
|
---|
| 5949 | (reg)->name_table = (void* )NULL;
|
---|
| 5950 |
|
---|
| 5951 | (reg)->case_fold_flag = case_fold_flag;
|
---|
| 5952 | return 0;
|
---|
| 5953 | }
|
---|
| 5954 |
|
---|
| 5955 | extern int
|
---|
| 5956 | onig_new_without_alloc(regex_t* reg, const UChar* pattern,
|
---|
| 5957 | const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
|
---|
[331] | 5958 | const OnigSyntaxType* syntax, OnigErrorInfo* einfo)
|
---|
[279] | 5959 | {
|
---|
| 5960 | int r;
|
---|
| 5961 |
|
---|
| 5962 | r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
| 5963 | if (r) return r;
|
---|
| 5964 |
|
---|
| 5965 | r = onig_compile(reg, pattern, pattern_end, einfo);
|
---|
| 5966 | return r;
|
---|
| 5967 | }
|
---|
| 5968 |
|
---|
| 5969 | extern int
|
---|
| 5970 | onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
---|
[331] | 5971 | OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
|
---|
[279] | 5972 | OnigErrorInfo* einfo)
|
---|
| 5973 | {
|
---|
| 5974 | int r;
|
---|
| 5975 |
|
---|
| 5976 | *reg = (regex_t* )xmalloc(sizeof(regex_t));
|
---|
| 5977 | if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
---|
| 5978 |
|
---|
| 5979 | r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
| 5980 | if (r) goto err;
|
---|
| 5981 |
|
---|
| 5982 | r = onig_compile(*reg, pattern, pattern_end, einfo);
|
---|
| 5983 | if (r) {
|
---|
| 5984 | err:
|
---|
| 5985 | onig_free(*reg);
|
---|
| 5986 | *reg = NULL;
|
---|
| 5987 | }
|
---|
| 5988 | return r;
|
---|
| 5989 | }
|
---|
| 5990 |
|
---|
[331] | 5991 | extern int
|
---|
| 5992 | onig_initialize(OnigEncoding encodings[] ARG_UNUSED, int n ARG_UNUSED)
|
---|
| 5993 | {
|
---|
| 5994 | return onig_init();
|
---|
| 5995 | }
|
---|
[279] | 5996 |
|
---|
| 5997 | extern int
|
---|
| 5998 | onig_init(void)
|
---|
| 5999 | {
|
---|
| 6000 | if (onig_inited != 0)
|
---|
| 6001 | return 0;
|
---|
| 6002 |
|
---|
| 6003 | onig_inited = 1;
|
---|
| 6004 |
|
---|
[331] | 6005 | #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
|
---|
| 6006 | _CrtSetDbgFlag(_CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF);
|
---|
| 6007 | #endif
|
---|
| 6008 |
|
---|
[279] | 6009 | onigenc_init();
|
---|
| 6010 | /* onigenc_set_default_caseconv_table((UChar* )0); */
|
---|
| 6011 |
|
---|
| 6012 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
| 6013 | onig_statistics_init();
|
---|
| 6014 | #endif
|
---|
| 6015 |
|
---|
| 6016 | return 0;
|
---|
| 6017 | }
|
---|
| 6018 |
|
---|
| 6019 |
|
---|
| 6020 | static OnigEndCallListItemType* EndCallTop;
|
---|
| 6021 |
|
---|
| 6022 | extern void onig_add_end_call(void (*func)(void))
|
---|
| 6023 | {
|
---|
| 6024 | OnigEndCallListItemType* item;
|
---|
| 6025 |
|
---|
| 6026 | item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
|
---|
| 6027 | if (item == 0) return ;
|
---|
| 6028 |
|
---|
| 6029 | item->next = EndCallTop;
|
---|
| 6030 | item->func = func;
|
---|
| 6031 |
|
---|
| 6032 | EndCallTop = item;
|
---|
| 6033 | }
|
---|
| 6034 |
|
---|
| 6035 | static void
|
---|
| 6036 | exec_end_call_list(void)
|
---|
| 6037 | {
|
---|
| 6038 | OnigEndCallListItemType* prev;
|
---|
| 6039 | void (*func)(void);
|
---|
| 6040 |
|
---|
| 6041 | while (EndCallTop != 0) {
|
---|
| 6042 | func = EndCallTop->func;
|
---|
| 6043 | (*func)();
|
---|
| 6044 |
|
---|
| 6045 | prev = EndCallTop;
|
---|
| 6046 | EndCallTop = EndCallTop->next;
|
---|
| 6047 | xfree(prev);
|
---|
| 6048 | }
|
---|
| 6049 | }
|
---|
| 6050 |
|
---|
| 6051 | extern int
|
---|
| 6052 | onig_end(void)
|
---|
| 6053 | {
|
---|
| 6054 | exec_end_call_list();
|
---|
| 6055 |
|
---|
| 6056 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
| 6057 | onig_print_statistics(stderr);
|
---|
| 6058 | #endif
|
---|
| 6059 |
|
---|
[331] | 6060 | #if defined(ONIG_DEBUG_MEMLEAK) && defined(_MSC_VER)
|
---|
| 6061 | _CrtDumpMemoryLeaks();
|
---|
[279] | 6062 | #endif
|
---|
| 6063 |
|
---|
| 6064 | onig_inited = 0;
|
---|
| 6065 |
|
---|
| 6066 | return 0;
|
---|
| 6067 | }
|
---|
| 6068 |
|
---|
| 6069 | extern int
|
---|
| 6070 | onig_is_in_code_range(const UChar* p, OnigCodePoint code)
|
---|
| 6071 | {
|
---|
| 6072 | OnigCodePoint n, *data;
|
---|
| 6073 | OnigCodePoint low, high, x;
|
---|
| 6074 |
|
---|
| 6075 | GET_CODE_POINT(n, p);
|
---|
| 6076 | data = (OnigCodePoint* )p;
|
---|
| 6077 | data++;
|
---|
| 6078 |
|
---|
| 6079 | for (low = 0, high = n; low < high; ) {
|
---|
| 6080 | x = (low + high) >> 1;
|
---|
| 6081 | if (code > data[x * 2 + 1])
|
---|
| 6082 | low = x + 1;
|
---|
| 6083 | else
|
---|
| 6084 | high = x;
|
---|
| 6085 | }
|
---|
| 6086 |
|
---|
| 6087 | return ((low < n && code >= data[low * 2]) ? 1 : 0);
|
---|
| 6088 | }
|
---|
| 6089 |
|
---|
| 6090 | extern int
|
---|
| 6091 | onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
|
---|
| 6092 | {
|
---|
| 6093 | int found;
|
---|
| 6094 |
|
---|
| 6095 | if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
|
---|
| 6096 | if (IS_NULL(cc->mbuf)) {
|
---|
| 6097 | found = 0;
|
---|
| 6098 | }
|
---|
| 6099 | else {
|
---|
| 6100 | found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
|
---|
| 6101 | }
|
---|
| 6102 | }
|
---|
| 6103 | else {
|
---|
| 6104 | found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
|
---|
| 6105 | }
|
---|
| 6106 |
|
---|
| 6107 | if (IS_NCCLASS_NOT(cc))
|
---|
| 6108 | return !found;
|
---|
| 6109 | else
|
---|
| 6110 | return found;
|
---|
| 6111 | }
|
---|
| 6112 |
|
---|
| 6113 | extern int
|
---|
| 6114 | onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
|
---|
| 6115 | {
|
---|
| 6116 | int len;
|
---|
| 6117 |
|
---|
| 6118 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
| 6119 | len = 2;
|
---|
| 6120 | }
|
---|
| 6121 | else {
|
---|
| 6122 | len = ONIGENC_CODE_TO_MBCLEN(enc, code);
|
---|
| 6123 | }
|
---|
| 6124 | return onig_is_code_in_cc_len(len, code, cc);
|
---|
| 6125 | }
|
---|
| 6126 |
|
---|
| 6127 |
|
---|
| 6128 | #ifdef ONIG_DEBUG
|
---|
| 6129 |
|
---|
| 6130 | /* arguments type */
|
---|
[331] | 6131 | # define ARG_SPECIAL -1
|
---|
| 6132 | # define ARG_NON 0
|
---|
| 6133 | # define ARG_RELADDR 1
|
---|
| 6134 | # define ARG_ABSADDR 2
|
---|
| 6135 | # define ARG_LENGTH 3
|
---|
| 6136 | # define ARG_MEMNUM 4
|
---|
| 6137 | # define ARG_OPTION 5
|
---|
| 6138 | # define ARG_STATE_CHECK 6
|
---|
[279] | 6139 |
|
---|
| 6140 | OnigOpInfoType OnigOpInfo[] = {
|
---|
| 6141 | { OP_FINISH, "finish", ARG_NON },
|
---|
| 6142 | { OP_END, "end", ARG_NON },
|
---|
| 6143 | { OP_EXACT1, "exact1", ARG_SPECIAL },
|
---|
| 6144 | { OP_EXACT2, "exact2", ARG_SPECIAL },
|
---|
| 6145 | { OP_EXACT3, "exact3", ARG_SPECIAL },
|
---|
| 6146 | { OP_EXACT4, "exact4", ARG_SPECIAL },
|
---|
| 6147 | { OP_EXACT5, "exact5", ARG_SPECIAL },
|
---|
| 6148 | { OP_EXACTN, "exactn", ARG_SPECIAL },
|
---|
| 6149 | { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
|
---|
| 6150 | { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
|
---|
| 6151 | { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
|
---|
| 6152 | { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
|
---|
| 6153 | { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
|
---|
| 6154 | { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
|
---|
| 6155 | { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
|
---|
| 6156 | { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
|
---|
| 6157 | { OP_CCLASS, "cclass", ARG_SPECIAL },
|
---|
| 6158 | { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
|
---|
| 6159 | { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
|
---|
| 6160 | { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
|
---|
| 6161 | { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
|
---|
| 6162 | { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
|
---|
| 6163 | { OP_ANYCHAR, "anychar", ARG_NON },
|
---|
| 6164 | { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
|
---|
| 6165 | { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
|
---|
| 6166 | { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
|
---|
| 6167 | { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
|
---|
| 6168 | { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
|
---|
| 6169 | { OP_WORD, "word", ARG_NON },
|
---|
| 6170 | { OP_NOT_WORD, "not-word", ARG_NON },
|
---|
| 6171 | { OP_WORD_BOUND, "word-bound", ARG_NON },
|
---|
| 6172 | { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
|
---|
| 6173 | { OP_WORD_BEGIN, "word-begin", ARG_NON },
|
---|
| 6174 | { OP_WORD_END, "word-end", ARG_NON },
|
---|
| 6175 | { OP_ASCII_WORD, "ascii-word", ARG_NON },
|
---|
| 6176 | { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
|
---|
| 6177 | { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
|
---|
| 6178 | { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
|
---|
| 6179 | { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
|
---|
| 6180 | { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
|
---|
| 6181 | { OP_BEGIN_BUF, "begin-buf", ARG_NON },
|
---|
| 6182 | { OP_END_BUF, "end-buf", ARG_NON },
|
---|
| 6183 | { OP_BEGIN_LINE, "begin-line", ARG_NON },
|
---|
| 6184 | { OP_END_LINE, "end-line", ARG_NON },
|
---|
| 6185 | { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
|
---|
| 6186 | { OP_BEGIN_POSITION, "begin-position", ARG_NON },
|
---|
| 6187 | { OP_BACKREF1, "backref1", ARG_NON },
|
---|
| 6188 | { OP_BACKREF2, "backref2", ARG_NON },
|
---|
| 6189 | { OP_BACKREFN, "backrefn", ARG_MEMNUM },
|
---|
| 6190 | { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
|
---|
| 6191 | { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
|
---|
| 6192 | { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
|
---|
| 6193 | { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
|
---|
| 6194 | { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
|
---|
| 6195 | { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
|
---|
| 6196 | { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
|
---|
| 6197 | { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
|
---|
| 6198 | { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
|
---|
| 6199 | { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
|
---|
| 6200 | { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
|
---|
| 6201 | { OP_SET_OPTION, "set-option", ARG_OPTION },
|
---|
| 6202 | { OP_KEEP, "keep", ARG_NON },
|
---|
| 6203 | { OP_FAIL, "fail", ARG_NON },
|
---|
| 6204 | { OP_JUMP, "jump", ARG_RELADDR },
|
---|
| 6205 | { OP_PUSH, "push", ARG_RELADDR },
|
---|
| 6206 | { OP_POP, "pop", ARG_NON },
|
---|
| 6207 | { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
|
---|
| 6208 | { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
|
---|
| 6209 | { OP_REPEAT, "repeat", ARG_SPECIAL },
|
---|
| 6210 | { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
|
---|
| 6211 | { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
|
---|
| 6212 | { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
|
---|
| 6213 | { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
|
---|
| 6214 | { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
|
---|
| 6215 | { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
|
---|
| 6216 | { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
|
---|
| 6217 | { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
|
---|
| 6218 | { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
|
---|
| 6219 | { OP_PUSH_POS, "push-pos", ARG_NON },
|
---|
| 6220 | { OP_POP_POS, "pop-pos", ARG_NON },
|
---|
| 6221 | { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
|
---|
| 6222 | { OP_FAIL_POS, "fail-pos", ARG_NON },
|
---|
| 6223 | { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
|
---|
| 6224 | { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
|
---|
| 6225 | { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
|
---|
| 6226 | { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
|
---|
| 6227 | { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
|
---|
[331] | 6228 | { OP_PUSH_ABSENT_POS, "push-absent-pos", ARG_NON },
|
---|
| 6229 | { OP_ABSENT, "absent", ARG_RELADDR },
|
---|
| 6230 | { OP_ABSENT_END, "absent-end", ARG_NON },
|
---|
[279] | 6231 | { OP_CALL, "call", ARG_ABSADDR },
|
---|
| 6232 | { OP_RETURN, "return", ARG_NON },
|
---|
| 6233 | { OP_CONDITION, "condition", ARG_SPECIAL },
|
---|
| 6234 | { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
|
---|
| 6235 | { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
|
---|
| 6236 | { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
|
---|
| 6237 | { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
|
---|
| 6238 | { OP_STATE_CHECK_ANYCHAR_ML_STAR,
|
---|
| 6239 | "state-check-anychar-ml*", ARG_STATE_CHECK },
|
---|
| 6240 | { -1, "", ARG_NON }
|
---|
| 6241 | };
|
---|
| 6242 |
|
---|
| 6243 | static const char*
|
---|
| 6244 | op2name(int opcode)
|
---|
| 6245 | {
|
---|
| 6246 | int i;
|
---|
| 6247 |
|
---|
| 6248 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
| 6249 | if (opcode == OnigOpInfo[i].opcode)
|
---|
| 6250 | return OnigOpInfo[i].name;
|
---|
| 6251 | }
|
---|
| 6252 | return "";
|
---|
| 6253 | }
|
---|
| 6254 |
|
---|
| 6255 | static int
|
---|
| 6256 | op2arg_type(int opcode)
|
---|
| 6257 | {
|
---|
| 6258 | int i;
|
---|
| 6259 |
|
---|
| 6260 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
| 6261 | if (opcode == OnigOpInfo[i].opcode)
|
---|
| 6262 | return OnigOpInfo[i].arg_type;
|
---|
| 6263 | }
|
---|
| 6264 | return ARG_SPECIAL;
|
---|
| 6265 | }
|
---|
| 6266 |
|
---|
[331] | 6267 | # ifdef ONIG_DEBUG_PARSE_TREE
|
---|
[279] | 6268 | static void
|
---|
| 6269 | Indent(FILE* f, int indent)
|
---|
| 6270 | {
|
---|
| 6271 | int i;
|
---|
| 6272 | for (i = 0; i < indent; i++) putc(' ', f);
|
---|
| 6273 | }
|
---|
[331] | 6274 | # endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
[279] | 6275 |
|
---|
| 6276 | static void
|
---|
| 6277 | p_string(FILE* f, ptrdiff_t len, UChar* s)
|
---|
| 6278 | {
|
---|
| 6279 | fputs(":", f);
|
---|
| 6280 | while (len-- > 0) { fputc(*s++, f); }
|
---|
| 6281 | }
|
---|
| 6282 |
|
---|
| 6283 | static void
|
---|
| 6284 | p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
|
---|
| 6285 | {
|
---|
| 6286 | int x = len * mb_len;
|
---|
| 6287 |
|
---|
| 6288 | fprintf(f, ":%d:", len);
|
---|
| 6289 | while (x-- > 0) { fputc(*s++, f); }
|
---|
| 6290 | }
|
---|
| 6291 |
|
---|
| 6292 | extern void
|
---|
[331] | 6293 | onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar* bpend, UChar** nextp,
|
---|
[279] | 6294 | OnigEncoding enc)
|
---|
| 6295 | {
|
---|
| 6296 | int i, n, arg_type;
|
---|
| 6297 | RelAddrType addr;
|
---|
| 6298 | LengthType len;
|
---|
| 6299 | MemNumType mem;
|
---|
| 6300 | StateCheckNumType scn;
|
---|
| 6301 | OnigCodePoint code;
|
---|
| 6302 | UChar *q;
|
---|
| 6303 |
|
---|
| 6304 | fprintf(f, "[%s", op2name(*bp));
|
---|
| 6305 | arg_type = op2arg_type(*bp);
|
---|
| 6306 | if (arg_type != ARG_SPECIAL) {
|
---|
| 6307 | bp++;
|
---|
| 6308 | switch (arg_type) {
|
---|
| 6309 | case ARG_NON:
|
---|
| 6310 | break;
|
---|
| 6311 | case ARG_RELADDR:
|
---|
| 6312 | GET_RELADDR_INC(addr, bp);
|
---|
[331] | 6313 | fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
|
---|
[279] | 6314 | break;
|
---|
| 6315 | case ARG_ABSADDR:
|
---|
| 6316 | GET_ABSADDR_INC(addr, bp);
|
---|
| 6317 | fprintf(f, ":(%d)", addr);
|
---|
| 6318 | break;
|
---|
| 6319 | case ARG_LENGTH:
|
---|
| 6320 | GET_LENGTH_INC(len, bp);
|
---|
| 6321 | fprintf(f, ":%d", len);
|
---|
| 6322 | break;
|
---|
| 6323 | case ARG_MEMNUM:
|
---|
| 6324 | mem = *((MemNumType* )bp);
|
---|
| 6325 | bp += SIZE_MEMNUM;
|
---|
| 6326 | fprintf(f, ":%d", mem);
|
---|
| 6327 | break;
|
---|
| 6328 | case ARG_OPTION:
|
---|
| 6329 | {
|
---|
| 6330 | OnigOptionType option = *((OnigOptionType* )bp);
|
---|
| 6331 | bp += SIZE_OPTION;
|
---|
| 6332 | fprintf(f, ":%d", option);
|
---|
| 6333 | }
|
---|
| 6334 | break;
|
---|
| 6335 |
|
---|
| 6336 | case ARG_STATE_CHECK:
|
---|
| 6337 | scn = *((StateCheckNumType* )bp);
|
---|
| 6338 | bp += SIZE_STATE_CHECK_NUM;
|
---|
| 6339 | fprintf(f, ":%d", scn);
|
---|
| 6340 | break;
|
---|
| 6341 | }
|
---|
| 6342 | }
|
---|
| 6343 | else {
|
---|
| 6344 | switch (*bp++) {
|
---|
| 6345 | case OP_EXACT1:
|
---|
| 6346 | case OP_ANYCHAR_STAR_PEEK_NEXT:
|
---|
| 6347 | case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
|
---|
| 6348 | p_string(f, 1, bp++); break;
|
---|
| 6349 | case OP_EXACT2:
|
---|
| 6350 | p_string(f, 2, bp); bp += 2; break;
|
---|
| 6351 | case OP_EXACT3:
|
---|
| 6352 | p_string(f, 3, bp); bp += 3; break;
|
---|
| 6353 | case OP_EXACT4:
|
---|
| 6354 | p_string(f, 4, bp); bp += 4; break;
|
---|
| 6355 | case OP_EXACT5:
|
---|
| 6356 | p_string(f, 5, bp); bp += 5; break;
|
---|
| 6357 | case OP_EXACTN:
|
---|
| 6358 | GET_LENGTH_INC(len, bp);
|
---|
| 6359 | p_len_string(f, len, 1, bp);
|
---|
| 6360 | bp += len;
|
---|
| 6361 | break;
|
---|
| 6362 |
|
---|
| 6363 | case OP_EXACTMB2N1:
|
---|
| 6364 | p_string(f, 2, bp); bp += 2; break;
|
---|
| 6365 | case OP_EXACTMB2N2:
|
---|
| 6366 | p_string(f, 4, bp); bp += 4; break;
|
---|
| 6367 | case OP_EXACTMB2N3:
|
---|
| 6368 | p_string(f, 6, bp); bp += 6; break;
|
---|
| 6369 | case OP_EXACTMB2N:
|
---|
| 6370 | GET_LENGTH_INC(len, bp);
|
---|
| 6371 | p_len_string(f, len, 2, bp);
|
---|
| 6372 | bp += len * 2;
|
---|
| 6373 | break;
|
---|
| 6374 | case OP_EXACTMB3N:
|
---|
| 6375 | GET_LENGTH_INC(len, bp);
|
---|
| 6376 | p_len_string(f, len, 3, bp);
|
---|
| 6377 | bp += len * 3;
|
---|
| 6378 | break;
|
---|
| 6379 | case OP_EXACTMBN:
|
---|
| 6380 | {
|
---|
| 6381 | int mb_len;
|
---|
| 6382 |
|
---|
| 6383 | GET_LENGTH_INC(mb_len, bp);
|
---|
| 6384 | GET_LENGTH_INC(len, bp);
|
---|
| 6385 | fprintf(f, ":%d:%d:", mb_len, len);
|
---|
| 6386 | n = len * mb_len;
|
---|
| 6387 | while (n-- > 0) { fputc(*bp++, f); }
|
---|
| 6388 | }
|
---|
| 6389 | break;
|
---|
| 6390 |
|
---|
| 6391 | case OP_EXACT1_IC:
|
---|
[331] | 6392 | len = enclen(enc, bp, bpend);
|
---|
[279] | 6393 | p_string(f, len, bp);
|
---|
| 6394 | bp += len;
|
---|
| 6395 | break;
|
---|
| 6396 | case OP_EXACTN_IC:
|
---|
| 6397 | GET_LENGTH_INC(len, bp);
|
---|
| 6398 | p_len_string(f, len, 1, bp);
|
---|
| 6399 | bp += len;
|
---|
| 6400 | break;
|
---|
| 6401 |
|
---|
| 6402 | case OP_CCLASS:
|
---|
| 6403 | n = bitset_on_num((BitSetRef )bp);
|
---|
| 6404 | bp += SIZE_BITSET;
|
---|
| 6405 | fprintf(f, ":%d", n);
|
---|
| 6406 | break;
|
---|
| 6407 |
|
---|
| 6408 | case OP_CCLASS_NOT:
|
---|
| 6409 | n = bitset_on_num((BitSetRef )bp);
|
---|
| 6410 | bp += SIZE_BITSET;
|
---|
| 6411 | fprintf(f, ":%d", n);
|
---|
| 6412 | break;
|
---|
| 6413 |
|
---|
| 6414 | case OP_CCLASS_MB:
|
---|
| 6415 | case OP_CCLASS_MB_NOT:
|
---|
| 6416 | GET_LENGTH_INC(len, bp);
|
---|
| 6417 | q = bp;
|
---|
[331] | 6418 | # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
[279] | 6419 | ALIGNMENT_RIGHT(q);
|
---|
[331] | 6420 | # endif
|
---|
[279] | 6421 | GET_CODE_POINT(code, q);
|
---|
| 6422 | bp += len;
|
---|
| 6423 | fprintf(f, ":%d:%d", (int )code, len);
|
---|
| 6424 | break;
|
---|
| 6425 |
|
---|
| 6426 | case OP_CCLASS_MIX:
|
---|
| 6427 | case OP_CCLASS_MIX_NOT:
|
---|
| 6428 | n = bitset_on_num((BitSetRef )bp);
|
---|
| 6429 | bp += SIZE_BITSET;
|
---|
| 6430 | GET_LENGTH_INC(len, bp);
|
---|
| 6431 | q = bp;
|
---|
[331] | 6432 | # ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
[279] | 6433 | ALIGNMENT_RIGHT(q);
|
---|
[331] | 6434 | # endif
|
---|
[279] | 6435 | GET_CODE_POINT(code, q);
|
---|
| 6436 | bp += len;
|
---|
| 6437 | fprintf(f, ":%d:%d:%d", n, (int )code, len);
|
---|
| 6438 | break;
|
---|
| 6439 |
|
---|
| 6440 | case OP_BACKREFN_IC:
|
---|
| 6441 | mem = *((MemNumType* )bp);
|
---|
| 6442 | bp += SIZE_MEMNUM;
|
---|
| 6443 | fprintf(f, ":%d", mem);
|
---|
| 6444 | break;
|
---|
| 6445 |
|
---|
| 6446 | case OP_BACKREF_MULTI_IC:
|
---|
| 6447 | case OP_BACKREF_MULTI:
|
---|
| 6448 | fputs(" ", f);
|
---|
| 6449 | GET_LENGTH_INC(len, bp);
|
---|
| 6450 | for (i = 0; i < len; i++) {
|
---|
| 6451 | GET_MEMNUM_INC(mem, bp);
|
---|
| 6452 | if (i > 0) fputs(", ", f);
|
---|
| 6453 | fprintf(f, "%d", mem);
|
---|
| 6454 | }
|
---|
| 6455 | break;
|
---|
| 6456 |
|
---|
| 6457 | case OP_BACKREF_WITH_LEVEL:
|
---|
| 6458 | {
|
---|
| 6459 | OnigOptionType option;
|
---|
| 6460 | LengthType level;
|
---|
| 6461 |
|
---|
| 6462 | GET_OPTION_INC(option, bp);
|
---|
| 6463 | fprintf(f, ":%d", option);
|
---|
| 6464 | GET_LENGTH_INC(level, bp);
|
---|
| 6465 | fprintf(f, ":%d", level);
|
---|
| 6466 |
|
---|
| 6467 | fputs(" ", f);
|
---|
| 6468 | GET_LENGTH_INC(len, bp);
|
---|
| 6469 | for (i = 0; i < len; i++) {
|
---|
| 6470 | GET_MEMNUM_INC(mem, bp);
|
---|
| 6471 | if (i > 0) fputs(", ", f);
|
---|
| 6472 | fprintf(f, "%d", mem);
|
---|
| 6473 | }
|
---|
| 6474 | }
|
---|
| 6475 | break;
|
---|
| 6476 |
|
---|
| 6477 | case OP_REPEAT:
|
---|
| 6478 | case OP_REPEAT_NG:
|
---|
| 6479 | {
|
---|
| 6480 | mem = *((MemNumType* )bp);
|
---|
| 6481 | bp += SIZE_MEMNUM;
|
---|
| 6482 | addr = *((RelAddrType* )bp);
|
---|
| 6483 | bp += SIZE_RELADDR;
|
---|
| 6484 | fprintf(f, ":%d:%d", mem, addr);
|
---|
| 6485 | }
|
---|
| 6486 | break;
|
---|
| 6487 |
|
---|
| 6488 | case OP_PUSH_OR_JUMP_EXACT1:
|
---|
| 6489 | case OP_PUSH_IF_PEEK_NEXT:
|
---|
| 6490 | addr = *((RelAddrType* )bp);
|
---|
| 6491 | bp += SIZE_RELADDR;
|
---|
[331] | 6492 | fprintf(f, ":(%s%d)", (addr >= 0) ? "+" : "", addr);
|
---|
[279] | 6493 | p_string(f, 1, bp);
|
---|
| 6494 | bp += 1;
|
---|
| 6495 | break;
|
---|
| 6496 |
|
---|
| 6497 | case OP_LOOK_BEHIND:
|
---|
| 6498 | GET_LENGTH_INC(len, bp);
|
---|
| 6499 | fprintf(f, ":%d", len);
|
---|
| 6500 | break;
|
---|
| 6501 |
|
---|
| 6502 | case OP_PUSH_LOOK_BEHIND_NOT:
|
---|
| 6503 | GET_RELADDR_INC(addr, bp);
|
---|
| 6504 | GET_LENGTH_INC(len, bp);
|
---|
[331] | 6505 | fprintf(f, ":%d:(%s%d)", len, (addr >= 0) ? "+" : "", addr);
|
---|
[279] | 6506 | break;
|
---|
| 6507 |
|
---|
| 6508 | case OP_STATE_CHECK_PUSH:
|
---|
| 6509 | case OP_STATE_CHECK_PUSH_OR_JUMP:
|
---|
| 6510 | scn = *((StateCheckNumType* )bp);
|
---|
| 6511 | bp += SIZE_STATE_CHECK_NUM;
|
---|
| 6512 | addr = *((RelAddrType* )bp);
|
---|
| 6513 | bp += SIZE_RELADDR;
|
---|
[331] | 6514 | fprintf(f, ":%d:(%s%d)", scn, (addr >= 0) ? "+" : "", addr);
|
---|
[279] | 6515 | break;
|
---|
| 6516 |
|
---|
| 6517 | case OP_CONDITION:
|
---|
| 6518 | GET_MEMNUM_INC(mem, bp);
|
---|
| 6519 | GET_RELADDR_INC(addr, bp);
|
---|
[331] | 6520 | fprintf(f, ":%d:(%s%d)", mem, (addr >= 0) ? "+" : "", addr);
|
---|
[279] | 6521 | break;
|
---|
| 6522 |
|
---|
| 6523 | default:
|
---|
| 6524 | fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
|
---|
[331] | 6525 | bp[-1]);
|
---|
[279] | 6526 | }
|
---|
| 6527 | }
|
---|
| 6528 | fputs("]", f);
|
---|
| 6529 | if (nextp) *nextp = bp;
|
---|
| 6530 | }
|
---|
| 6531 |
|
---|
[331] | 6532 | # ifdef ONIG_DEBUG_COMPILE
|
---|
[279] | 6533 | static void
|
---|
| 6534 | print_compiled_byte_code_list(FILE* f, regex_t* reg)
|
---|
| 6535 | {
|
---|
| 6536 | int ncode;
|
---|
| 6537 | UChar* bp = reg->p;
|
---|
| 6538 | UChar* end = reg->p + reg->used;
|
---|
| 6539 |
|
---|
| 6540 | fprintf(f, "code length: %d", reg->used);
|
---|
| 6541 |
|
---|
| 6542 | ncode = -1;
|
---|
| 6543 | while (bp < end) {
|
---|
| 6544 | ncode++;
|
---|
| 6545 | if (ncode % 5 == 0)
|
---|
| 6546 | fprintf(f, "\n%ld:", bp - reg->p);
|
---|
| 6547 | else
|
---|
| 6548 | fprintf(f, " %ld:", bp - reg->p);
|
---|
[331] | 6549 | onig_print_compiled_byte_code(f, bp, end, &bp, reg->enc);
|
---|
[279] | 6550 | }
|
---|
| 6551 |
|
---|
| 6552 | fprintf(f, "\n");
|
---|
| 6553 | }
|
---|
[331] | 6554 | # endif /* ONIG_DEBUG_COMPILE */
|
---|
[279] | 6555 |
|
---|
[331] | 6556 | # ifdef ONIG_DEBUG_PARSE_TREE
|
---|
[279] | 6557 | static void
|
---|
| 6558 | print_indent_tree(FILE* f, Node* node, int indent)
|
---|
| 6559 | {
|
---|
| 6560 | int i, type, container_p = 0;
|
---|
| 6561 | int add = 3;
|
---|
| 6562 | UChar* p;
|
---|
| 6563 |
|
---|
| 6564 | Indent(f, indent);
|
---|
| 6565 | if (IS_NULL(node)) {
|
---|
| 6566 | fprintf(f, "ERROR: null node!!!\n");
|
---|
| 6567 | exit (0);
|
---|
| 6568 | }
|
---|
| 6569 |
|
---|
| 6570 | type = NTYPE(node);
|
---|
| 6571 | switch (type) {
|
---|
| 6572 | case NT_LIST:
|
---|
| 6573 | case NT_ALT:
|
---|
| 6574 | if (NTYPE(node) == NT_LIST)
|
---|
| 6575 | fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
|
---|
| 6576 | else
|
---|
| 6577 | fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
|
---|
| 6578 |
|
---|
| 6579 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
| 6580 | while (IS_NOT_NULL(node = NCDR(node))) {
|
---|
| 6581 | if (NTYPE(node) != type) {
|
---|
| 6582 | fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
|
---|
| 6583 | exit(0);
|
---|
| 6584 | }
|
---|
| 6585 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
| 6586 | }
|
---|
| 6587 | break;
|
---|
| 6588 |
|
---|
| 6589 | case NT_STR:
|
---|
| 6590 | fprintf(f, "<string%s:%"PRIxPTR">",
|
---|
| 6591 | (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
|
---|
| 6592 | for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
|
---|
| 6593 | if (*p >= 0x20 && *p < 0x7f)
|
---|
| 6594 | fputc(*p, f);
|
---|
| 6595 | else {
|
---|
| 6596 | fprintf(f, " 0x%02x", *p);
|
---|
| 6597 | }
|
---|
| 6598 | }
|
---|
| 6599 | break;
|
---|
| 6600 |
|
---|
| 6601 | case NT_CCLASS:
|
---|
| 6602 | fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
|
---|
[331] | 6603 | if (IS_NCCLASS_NOT(NCCLASS(node))) fputs("not ", f);
|
---|
[279] | 6604 | if (NCCLASS(node)->mbuf) {
|
---|
| 6605 | BBuf* bbuf = NCCLASS(node)->mbuf;
|
---|
[331] | 6606 | OnigCodePoint* data = (OnigCodePoint* )bbuf->p;
|
---|
| 6607 | OnigCodePoint* end = (OnigCodePoint* )(bbuf->p + bbuf->used);
|
---|
| 6608 | fprintf(f, "%d", *data++);
|
---|
| 6609 | for (; data < end; data+=2) {
|
---|
| 6610 | fprintf(f, ",");
|
---|
| 6611 | fprintf(f, "%04x-%04x", data[0], data[1]);
|
---|
[279] | 6612 | }
|
---|
| 6613 | }
|
---|
| 6614 | break;
|
---|
| 6615 |
|
---|
| 6616 | case NT_CTYPE:
|
---|
| 6617 | fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
|
---|
| 6618 | switch (NCTYPE(node)->ctype) {
|
---|
| 6619 | case ONIGENC_CTYPE_WORD:
|
---|
| 6620 | if (NCTYPE(node)->not != 0)
|
---|
| 6621 | fputs("not word", f);
|
---|
| 6622 | else
|
---|
| 6623 | fputs("word", f);
|
---|
| 6624 | break;
|
---|
| 6625 |
|
---|
| 6626 | default:
|
---|
| 6627 | fprintf(f, "ERROR: undefined ctype.\n");
|
---|
| 6628 | exit(0);
|
---|
| 6629 | }
|
---|
| 6630 | break;
|
---|
| 6631 |
|
---|
| 6632 | case NT_CANY:
|
---|
| 6633 | fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
|
---|
| 6634 | break;
|
---|
| 6635 |
|
---|
| 6636 | case NT_ANCHOR:
|
---|
| 6637 | fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
|
---|
| 6638 | switch (NANCHOR(node)->type) {
|
---|
| 6639 | case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
|
---|
| 6640 | case ANCHOR_END_BUF: fputs("end buf", f); break;
|
---|
| 6641 | case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
|
---|
| 6642 | case ANCHOR_END_LINE: fputs("end line", f); break;
|
---|
| 6643 | case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
|
---|
| 6644 | case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
|
---|
| 6645 |
|
---|
| 6646 | case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
|
---|
| 6647 | case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
|
---|
[331] | 6648 | # ifdef USE_WORD_BEGIN_END
|
---|
[279] | 6649 | case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
|
---|
| 6650 | case ANCHOR_WORD_END: fputs("word end", f); break;
|
---|
[331] | 6651 | # endif
|
---|
[279] | 6652 | case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
|
---|
| 6653 | case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
|
---|
| 6654 | case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
|
---|
| 6655 | case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
|
---|
| 6656 | case ANCHOR_KEEP: fputs("keep",f); break;
|
---|
| 6657 |
|
---|
| 6658 | default:
|
---|
| 6659 | fprintf(f, "ERROR: undefined anchor type.\n");
|
---|
| 6660 | break;
|
---|
| 6661 | }
|
---|
| 6662 | break;
|
---|
| 6663 |
|
---|
| 6664 | case NT_BREF:
|
---|
| 6665 | {
|
---|
| 6666 | int* p;
|
---|
| 6667 | BRefNode* br = NBREF(node);
|
---|
| 6668 | p = BACKREFS_P(br);
|
---|
| 6669 | fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
|
---|
| 6670 | for (i = 0; i < br->back_num; i++) {
|
---|
| 6671 | if (i > 0) fputs(", ", f);
|
---|
| 6672 | fprintf(f, "%d", p[i]);
|
---|
| 6673 | }
|
---|
| 6674 | }
|
---|
| 6675 | break;
|
---|
| 6676 |
|
---|
[331] | 6677 | # ifdef USE_SUBEXP_CALL
|
---|
[279] | 6678 | case NT_CALL:
|
---|
| 6679 | {
|
---|
| 6680 | CallNode* cn = NCALL(node);
|
---|
| 6681 | fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
|
---|
| 6682 | p_string(f, cn->name_end - cn->name, cn->name);
|
---|
| 6683 | }
|
---|
| 6684 | break;
|
---|
[331] | 6685 | # endif
|
---|
[279] | 6686 |
|
---|
| 6687 | case NT_QTFR:
|
---|
| 6688 | fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
|
---|
| 6689 | NQTFR(node)->lower, NQTFR(node)->upper,
|
---|
| 6690 | (NQTFR(node)->greedy ? "" : "?"));
|
---|
| 6691 | print_indent_tree(f, NQTFR(node)->target, indent + add);
|
---|
| 6692 | break;
|
---|
| 6693 |
|
---|
| 6694 | case NT_ENCLOSE:
|
---|
| 6695 | fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
|
---|
| 6696 | switch (NENCLOSE(node)->type) {
|
---|
| 6697 | case ENCLOSE_OPTION:
|
---|
| 6698 | fprintf(f, "option:%d", NENCLOSE(node)->option);
|
---|
| 6699 | break;
|
---|
| 6700 | case ENCLOSE_MEMORY:
|
---|
| 6701 | fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
|
---|
| 6702 | break;
|
---|
| 6703 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 6704 | fprintf(f, "stop-bt");
|
---|
| 6705 | break;
|
---|
| 6706 | case ENCLOSE_CONDITION:
|
---|
| 6707 | fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
|
---|
| 6708 | break;
|
---|
[331] | 6709 | case ENCLOSE_ABSENT:
|
---|
| 6710 | fprintf(f, "absent");
|
---|
| 6711 | break;
|
---|
[279] | 6712 |
|
---|
| 6713 | default:
|
---|
| 6714 | break;
|
---|
| 6715 | }
|
---|
| 6716 | fprintf(f, "\n");
|
---|
| 6717 | print_indent_tree(f, NENCLOSE(node)->target, indent + add);
|
---|
| 6718 | break;
|
---|
| 6719 |
|
---|
| 6720 | default:
|
---|
| 6721 | fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
|
---|
| 6722 | break;
|
---|
| 6723 | }
|
---|
| 6724 |
|
---|
| 6725 | if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
|
---|
| 6726 | type != NT_ENCLOSE)
|
---|
| 6727 | fprintf(f, "\n");
|
---|
| 6728 |
|
---|
| 6729 | if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
|
---|
| 6730 |
|
---|
| 6731 | fflush(f);
|
---|
| 6732 | }
|
---|
| 6733 |
|
---|
| 6734 | static void
|
---|
| 6735 | print_tree(FILE* f, Node* node)
|
---|
| 6736 | {
|
---|
| 6737 | print_indent_tree(f, node, 0);
|
---|
| 6738 | }
|
---|
[331] | 6739 | # endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
[279] | 6740 | #endif /* ONIG_DEBUG */
|
---|