[279] | 1 | /**********************************************************************
|
---|
| 2 | regcomp.c - Onigmo (Oniguruma-mod) (regular expression library)
|
---|
| 3 | **********************************************************************/
|
---|
| 4 | /*-
|
---|
| 5 | * Copyright (c) 2002-2013 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
|
---|
| 6 | * Copyright (c) 2011-2014 K.Takata <kentkt AT csc DOT jp>
|
---|
| 7 | * All rights reserved.
|
---|
| 8 | *
|
---|
| 9 | * Redistribution and use in source and binary forms, with or without
|
---|
| 10 | * modification, are permitted provided that the following conditions
|
---|
| 11 | * are met:
|
---|
| 12 | * 1. Redistributions of source code must retain the above copyright
|
---|
| 13 | * notice, this list of conditions and the following disclaimer.
|
---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
| 15 | * notice, this list of conditions and the following disclaimer in the
|
---|
| 16 | * documentation and/or other materials provided with the distribution.
|
---|
| 17 | *
|
---|
| 18 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
---|
| 19 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
---|
| 20 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
---|
| 21 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
---|
| 22 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
---|
| 23 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
---|
| 24 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
---|
| 25 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
---|
| 26 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
---|
| 27 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
| 28 | * SUCH DAMAGE.
|
---|
| 29 | */
|
---|
| 30 |
|
---|
| 31 | #include "regparse.h"
|
---|
| 32 |
|
---|
| 33 | #if defined(USE_MULTI_THREAD_SYSTEM) \
|
---|
| 34 | && defined(USE_DEFAULT_MULTI_THREAD_SYSTEM)
|
---|
| 35 | #ifdef _WIN32
|
---|
| 36 | CRITICAL_SECTION gOnigMutex;
|
---|
| 37 | #else
|
---|
| 38 | pthread_mutex_t gOnigMutex;
|
---|
| 39 | #endif
|
---|
| 40 | #endif
|
---|
| 41 |
|
---|
| 42 | OnigCaseFoldType OnigDefaultCaseFoldFlag = ONIGENC_CASE_FOLD_MIN;
|
---|
| 43 |
|
---|
| 44 | extern OnigCaseFoldType
|
---|
| 45 | onig_get_default_case_fold_flag(void)
|
---|
| 46 | {
|
---|
| 47 | return OnigDefaultCaseFoldFlag;
|
---|
| 48 | }
|
---|
| 49 |
|
---|
| 50 | extern int
|
---|
| 51 | onig_set_default_case_fold_flag(OnigCaseFoldType case_fold_flag)
|
---|
| 52 | {
|
---|
| 53 | OnigDefaultCaseFoldFlag = case_fold_flag;
|
---|
| 54 | return 0;
|
---|
| 55 | }
|
---|
| 56 |
|
---|
| 57 |
|
---|
| 58 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 59 | static unsigned char PadBuf[WORD_ALIGNMENT_SIZE];
|
---|
| 60 | #endif
|
---|
| 61 |
|
---|
| 62 | #if 0
|
---|
| 63 | static UChar*
|
---|
| 64 | str_dup(UChar* s, UChar* end)
|
---|
| 65 | {
|
---|
| 66 | ptrdiff_t len = end - s;
|
---|
| 67 |
|
---|
| 68 | if (len > 0) {
|
---|
| 69 | UChar* r = (UChar* )xmalloc(len + 1);
|
---|
| 70 | CHECK_NULL_RETURN(r);
|
---|
| 71 | xmemcpy(r, s, len);
|
---|
| 72 | r[len] = (UChar )0;
|
---|
| 73 | return r;
|
---|
| 74 | }
|
---|
| 75 | else return NULL;
|
---|
| 76 | }
|
---|
| 77 | #endif
|
---|
| 78 |
|
---|
| 79 | static void
|
---|
| 80 | swap_node(Node* a, Node* b)
|
---|
| 81 | {
|
---|
| 82 | Node c;
|
---|
| 83 | c = *a; *a = *b; *b = c;
|
---|
| 84 |
|
---|
| 85 | if (NTYPE(a) == NT_STR) {
|
---|
| 86 | StrNode* sn = NSTR(a);
|
---|
| 87 | if (sn->capa == 0) {
|
---|
| 88 | size_t len = sn->end - sn->s;
|
---|
| 89 | sn->s = sn->buf;
|
---|
| 90 | sn->end = sn->s + len;
|
---|
| 91 | }
|
---|
| 92 | }
|
---|
| 93 |
|
---|
| 94 | if (NTYPE(b) == NT_STR) {
|
---|
| 95 | StrNode* sn = NSTR(b);
|
---|
| 96 | if (sn->capa == 0) {
|
---|
| 97 | size_t len = sn->end - sn->s;
|
---|
| 98 | sn->s = sn->buf;
|
---|
| 99 | sn->end = sn->s + len;
|
---|
| 100 | }
|
---|
| 101 | }
|
---|
| 102 | }
|
---|
| 103 |
|
---|
| 104 | static OnigDistance
|
---|
| 105 | distance_add(OnigDistance d1, OnigDistance d2)
|
---|
| 106 | {
|
---|
| 107 | if (d1 == ONIG_INFINITE_DISTANCE || d2 == ONIG_INFINITE_DISTANCE)
|
---|
| 108 | return ONIG_INFINITE_DISTANCE;
|
---|
| 109 | else {
|
---|
| 110 | if (d1 <= ONIG_INFINITE_DISTANCE - d2) return d1 + d2;
|
---|
| 111 | else return ONIG_INFINITE_DISTANCE;
|
---|
| 112 | }
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | static OnigDistance
|
---|
| 116 | distance_multiply(OnigDistance d, int m)
|
---|
| 117 | {
|
---|
| 118 | if (m == 0) return 0;
|
---|
| 119 |
|
---|
| 120 | if (d < ONIG_INFINITE_DISTANCE / m)
|
---|
| 121 | return d * m;
|
---|
| 122 | else
|
---|
| 123 | return ONIG_INFINITE_DISTANCE;
|
---|
| 124 | }
|
---|
| 125 |
|
---|
| 126 | static int
|
---|
| 127 | bitset_is_empty(BitSetRef bs)
|
---|
| 128 | {
|
---|
| 129 | int i;
|
---|
| 130 | for (i = 0; i < BITSET_SIZE; i++) {
|
---|
| 131 | if (bs[i] != 0) return 0;
|
---|
| 132 | }
|
---|
| 133 | return 1;
|
---|
| 134 | }
|
---|
| 135 |
|
---|
| 136 | #ifdef ONIG_DEBUG
|
---|
| 137 | static int
|
---|
| 138 | bitset_on_num(BitSetRef bs)
|
---|
| 139 | {
|
---|
| 140 | int i, n;
|
---|
| 141 |
|
---|
| 142 | n = 0;
|
---|
| 143 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 144 | if (BITSET_AT(bs, i)) n++;
|
---|
| 145 | }
|
---|
| 146 | return n;
|
---|
| 147 | }
|
---|
| 148 | #endif
|
---|
| 149 |
|
---|
| 150 | extern int
|
---|
| 151 | onig_bbuf_init(BBuf* buf, OnigDistance size)
|
---|
| 152 | {
|
---|
| 153 | if (size <= 0) {
|
---|
| 154 | size = 0;
|
---|
| 155 | buf->p = NULL;
|
---|
| 156 | }
|
---|
| 157 | else {
|
---|
| 158 | buf->p = (UChar* )xmalloc(size);
|
---|
| 159 | if (IS_NULL(buf->p)) return(ONIGERR_MEMORY);
|
---|
| 160 | }
|
---|
| 161 |
|
---|
| 162 | buf->alloc = (unsigned int )size;
|
---|
| 163 | buf->used = 0;
|
---|
| 164 | return 0;
|
---|
| 165 | }
|
---|
| 166 |
|
---|
| 167 |
|
---|
| 168 | #ifdef USE_SUBEXP_CALL
|
---|
| 169 |
|
---|
| 170 | static int
|
---|
| 171 | unset_addr_list_init(UnsetAddrList* uslist, int size)
|
---|
| 172 | {
|
---|
| 173 | UnsetAddr* p;
|
---|
| 174 |
|
---|
| 175 | p = (UnsetAddr* )xmalloc(sizeof(UnsetAddr)* size);
|
---|
| 176 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 177 | uslist->num = 0;
|
---|
| 178 | uslist->alloc = size;
|
---|
| 179 | uslist->us = p;
|
---|
| 180 | return 0;
|
---|
| 181 | }
|
---|
| 182 |
|
---|
| 183 | static void
|
---|
| 184 | unset_addr_list_end(UnsetAddrList* uslist)
|
---|
| 185 | {
|
---|
| 186 | if (IS_NOT_NULL(uslist->us))
|
---|
| 187 | xfree(uslist->us);
|
---|
| 188 | }
|
---|
| 189 |
|
---|
| 190 | static int
|
---|
| 191 | unset_addr_list_add(UnsetAddrList* uslist, int offset, struct _Node* node)
|
---|
| 192 | {
|
---|
| 193 | UnsetAddr* p;
|
---|
| 194 | int size;
|
---|
| 195 |
|
---|
| 196 | if (uslist->num >= uslist->alloc) {
|
---|
| 197 | size = uslist->alloc * 2;
|
---|
| 198 | p = (UnsetAddr* )xrealloc(uslist->us, sizeof(UnsetAddr) * size);
|
---|
| 199 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 200 | uslist->alloc = size;
|
---|
| 201 | uslist->us = p;
|
---|
| 202 | }
|
---|
| 203 |
|
---|
| 204 | uslist->us[uslist->num].offset = offset;
|
---|
| 205 | uslist->us[uslist->num].target = node;
|
---|
| 206 | uslist->num++;
|
---|
| 207 | return 0;
|
---|
| 208 | }
|
---|
| 209 | #endif /* USE_SUBEXP_CALL */
|
---|
| 210 |
|
---|
| 211 |
|
---|
| 212 | static int
|
---|
| 213 | add_opcode(regex_t* reg, int opcode)
|
---|
| 214 | {
|
---|
| 215 | BBUF_ADD1(reg, opcode);
|
---|
| 216 | return 0;
|
---|
| 217 | }
|
---|
| 218 |
|
---|
| 219 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 220 | static int
|
---|
| 221 | add_state_check_num(regex_t* reg, int num)
|
---|
| 222 | {
|
---|
| 223 | StateCheckNumType n = (StateCheckNumType )num;
|
---|
| 224 |
|
---|
| 225 | BBUF_ADD(reg, &n, SIZE_STATE_CHECK_NUM);
|
---|
| 226 | return 0;
|
---|
| 227 | }
|
---|
| 228 | #endif
|
---|
| 229 |
|
---|
| 230 | static int
|
---|
| 231 | add_rel_addr(regex_t* reg, int addr)
|
---|
| 232 | {
|
---|
| 233 | RelAddrType ra = (RelAddrType )addr;
|
---|
| 234 |
|
---|
| 235 | BBUF_ADD(reg, &ra, SIZE_RELADDR);
|
---|
| 236 | return 0;
|
---|
| 237 | }
|
---|
| 238 |
|
---|
| 239 | static int
|
---|
| 240 | add_abs_addr(regex_t* reg, int addr)
|
---|
| 241 | {
|
---|
| 242 | AbsAddrType ra = (AbsAddrType )addr;
|
---|
| 243 |
|
---|
| 244 | BBUF_ADD(reg, &ra, SIZE_ABSADDR);
|
---|
| 245 | return 0;
|
---|
| 246 | }
|
---|
| 247 |
|
---|
| 248 | static int
|
---|
| 249 | add_length(regex_t* reg, OnigDistance len)
|
---|
| 250 | {
|
---|
| 251 | LengthType l = (LengthType )len;
|
---|
| 252 |
|
---|
| 253 | BBUF_ADD(reg, &l, SIZE_LENGTH);
|
---|
| 254 | return 0;
|
---|
| 255 | }
|
---|
| 256 |
|
---|
| 257 | static int
|
---|
| 258 | add_mem_num(regex_t* reg, int num)
|
---|
| 259 | {
|
---|
| 260 | MemNumType n = (MemNumType )num;
|
---|
| 261 |
|
---|
| 262 | BBUF_ADD(reg, &n, SIZE_MEMNUM);
|
---|
| 263 | return 0;
|
---|
| 264 | }
|
---|
| 265 |
|
---|
| 266 | static int
|
---|
| 267 | add_pointer(regex_t* reg, void* addr)
|
---|
| 268 | {
|
---|
| 269 | PointerType ptr = (PointerType )addr;
|
---|
| 270 |
|
---|
| 271 | BBUF_ADD(reg, &ptr, SIZE_POINTER);
|
---|
| 272 | return 0;
|
---|
| 273 | }
|
---|
| 274 |
|
---|
| 275 | static int
|
---|
| 276 | add_option(regex_t* reg, OnigOptionType option)
|
---|
| 277 | {
|
---|
| 278 | BBUF_ADD(reg, &option, SIZE_OPTION);
|
---|
| 279 | return 0;
|
---|
| 280 | }
|
---|
| 281 |
|
---|
| 282 | static int
|
---|
| 283 | add_opcode_rel_addr(regex_t* reg, int opcode, int addr)
|
---|
| 284 | {
|
---|
| 285 | int r;
|
---|
| 286 |
|
---|
| 287 | r = add_opcode(reg, opcode);
|
---|
| 288 | if (r) return r;
|
---|
| 289 | r = add_rel_addr(reg, addr);
|
---|
| 290 | return r;
|
---|
| 291 | }
|
---|
| 292 |
|
---|
| 293 | static int
|
---|
| 294 | add_bytes(regex_t* reg, UChar* bytes, OnigDistance len)
|
---|
| 295 | {
|
---|
| 296 | BBUF_ADD(reg, bytes, len);
|
---|
| 297 | return 0;
|
---|
| 298 | }
|
---|
| 299 |
|
---|
| 300 | static int
|
---|
| 301 | add_bitset(regex_t* reg, BitSetRef bs)
|
---|
| 302 | {
|
---|
| 303 | BBUF_ADD(reg, bs, SIZE_BITSET);
|
---|
| 304 | return 0;
|
---|
| 305 | }
|
---|
| 306 |
|
---|
| 307 | static int
|
---|
| 308 | add_opcode_option(regex_t* reg, int opcode, OnigOptionType option)
|
---|
| 309 | {
|
---|
| 310 | int r;
|
---|
| 311 |
|
---|
| 312 | r = add_opcode(reg, opcode);
|
---|
| 313 | if (r) return r;
|
---|
| 314 | r = add_option(reg, option);
|
---|
| 315 | return r;
|
---|
| 316 | }
|
---|
| 317 |
|
---|
| 318 | static int compile_length_tree(Node* node, regex_t* reg);
|
---|
| 319 | static int compile_tree(Node* node, regex_t* reg);
|
---|
| 320 |
|
---|
| 321 |
|
---|
| 322 | #define IS_NEED_STR_LEN_OP_EXACT(op) \
|
---|
| 323 | ((op) == OP_EXACTN || (op) == OP_EXACTMB2N ||\
|
---|
| 324 | (op) == OP_EXACTMB3N || (op) == OP_EXACTMBN || (op) == OP_EXACTN_IC)
|
---|
| 325 |
|
---|
| 326 | static int
|
---|
| 327 | select_str_opcode(int mb_len, OnigDistance byte_len, int ignore_case)
|
---|
| 328 | {
|
---|
| 329 | int op;
|
---|
| 330 | OnigDistance str_len = (byte_len + mb_len - 1) / mb_len;
|
---|
| 331 |
|
---|
| 332 | if (ignore_case) {
|
---|
| 333 | switch (str_len) {
|
---|
| 334 | case 1: op = OP_EXACT1_IC; break;
|
---|
| 335 | default: op = OP_EXACTN_IC; break;
|
---|
| 336 | }
|
---|
| 337 | }
|
---|
| 338 | else {
|
---|
| 339 | switch (mb_len) {
|
---|
| 340 | case 1:
|
---|
| 341 | switch (str_len) {
|
---|
| 342 | case 1: op = OP_EXACT1; break;
|
---|
| 343 | case 2: op = OP_EXACT2; break;
|
---|
| 344 | case 3: op = OP_EXACT3; break;
|
---|
| 345 | case 4: op = OP_EXACT4; break;
|
---|
| 346 | case 5: op = OP_EXACT5; break;
|
---|
| 347 | default: op = OP_EXACTN; break;
|
---|
| 348 | }
|
---|
| 349 | break;
|
---|
| 350 |
|
---|
| 351 | case 2:
|
---|
| 352 | switch (str_len) {
|
---|
| 353 | case 1: op = OP_EXACTMB2N1; break;
|
---|
| 354 | case 2: op = OP_EXACTMB2N2; break;
|
---|
| 355 | case 3: op = OP_EXACTMB2N3; break;
|
---|
| 356 | default: op = OP_EXACTMB2N; break;
|
---|
| 357 | }
|
---|
| 358 | break;
|
---|
| 359 |
|
---|
| 360 | case 3:
|
---|
| 361 | op = OP_EXACTMB3N;
|
---|
| 362 | break;
|
---|
| 363 |
|
---|
| 364 | default:
|
---|
| 365 | op = OP_EXACTMBN;
|
---|
| 366 | break;
|
---|
| 367 | }
|
---|
| 368 | }
|
---|
| 369 | return op;
|
---|
| 370 | }
|
---|
| 371 |
|
---|
| 372 | static int
|
---|
| 373 | compile_tree_empty_check(Node* node, regex_t* reg, int empty_info)
|
---|
| 374 | {
|
---|
| 375 | int r;
|
---|
| 376 | int saved_num_null_check = reg->num_null_check;
|
---|
| 377 |
|
---|
| 378 | if (empty_info != 0) {
|
---|
| 379 | r = add_opcode(reg, OP_NULL_CHECK_START);
|
---|
| 380 | if (r) return r;
|
---|
| 381 | r = add_mem_num(reg, reg->num_null_check); /* NULL CHECK ID */
|
---|
| 382 | if (r) return r;
|
---|
| 383 | reg->num_null_check++;
|
---|
| 384 | }
|
---|
| 385 |
|
---|
| 386 | r = compile_tree(node, reg);
|
---|
| 387 | if (r) return r;
|
---|
| 388 |
|
---|
| 389 | if (empty_info != 0) {
|
---|
| 390 | if (empty_info == NQ_TARGET_IS_EMPTY)
|
---|
| 391 | r = add_opcode(reg, OP_NULL_CHECK_END);
|
---|
| 392 | else if (empty_info == NQ_TARGET_IS_EMPTY_MEM)
|
---|
| 393 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST);
|
---|
| 394 | else if (empty_info == NQ_TARGET_IS_EMPTY_REC)
|
---|
| 395 | r = add_opcode(reg, OP_NULL_CHECK_END_MEMST_PUSH);
|
---|
| 396 |
|
---|
| 397 | if (r) return r;
|
---|
| 398 | r = add_mem_num(reg, saved_num_null_check); /* NULL CHECK ID */
|
---|
| 399 | }
|
---|
| 400 | return r;
|
---|
| 401 | }
|
---|
| 402 |
|
---|
| 403 | #ifdef USE_SUBEXP_CALL
|
---|
| 404 | static int
|
---|
| 405 | compile_call(CallNode* node, regex_t* reg)
|
---|
| 406 | {
|
---|
| 407 | int r;
|
---|
| 408 |
|
---|
| 409 | r = add_opcode(reg, OP_CALL);
|
---|
| 410 | if (r) return r;
|
---|
| 411 | r = unset_addr_list_add(node->unset_addr_list, BBUF_GET_OFFSET_POS(reg),
|
---|
| 412 | node->target);
|
---|
| 413 | if (r) return r;
|
---|
| 414 | r = add_abs_addr(reg, 0 /*dummy addr.*/);
|
---|
| 415 | return r;
|
---|
| 416 | }
|
---|
| 417 | #endif
|
---|
| 418 |
|
---|
| 419 | static int
|
---|
| 420 | compile_tree_n_times(Node* node, int n, regex_t* reg)
|
---|
| 421 | {
|
---|
| 422 | int i, r;
|
---|
| 423 |
|
---|
| 424 | for (i = 0; i < n; i++) {
|
---|
| 425 | r = compile_tree(node, reg);
|
---|
| 426 | if (r) return r;
|
---|
| 427 | }
|
---|
| 428 | return 0;
|
---|
| 429 | }
|
---|
| 430 |
|
---|
| 431 | static int
|
---|
| 432 | add_compile_string_length(UChar* s ARG_UNUSED, int mb_len, OnigDistance byte_len,
|
---|
| 433 | regex_t* reg ARG_UNUSED, int ignore_case)
|
---|
| 434 | {
|
---|
| 435 | int len;
|
---|
| 436 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
| 437 |
|
---|
| 438 | len = SIZE_OPCODE;
|
---|
| 439 |
|
---|
| 440 | if (op == OP_EXACTMBN) len += SIZE_LENGTH;
|
---|
| 441 | if (IS_NEED_STR_LEN_OP_EXACT(op))
|
---|
| 442 | len += SIZE_LENGTH;
|
---|
| 443 |
|
---|
| 444 | len += (int )byte_len;
|
---|
| 445 | return len;
|
---|
| 446 | }
|
---|
| 447 |
|
---|
| 448 | static int
|
---|
| 449 | add_compile_string(UChar* s, int mb_len, OnigDistance byte_len,
|
---|
| 450 | regex_t* reg, int ignore_case)
|
---|
| 451 | {
|
---|
| 452 | int op = select_str_opcode(mb_len, byte_len, ignore_case);
|
---|
| 453 | add_opcode(reg, op);
|
---|
| 454 |
|
---|
| 455 | if (op == OP_EXACTMBN)
|
---|
| 456 | add_length(reg, mb_len);
|
---|
| 457 |
|
---|
| 458 | if (IS_NEED_STR_LEN_OP_EXACT(op)) {
|
---|
| 459 | if (op == OP_EXACTN_IC)
|
---|
| 460 | add_length(reg, byte_len);
|
---|
| 461 | else
|
---|
| 462 | add_length(reg, byte_len / mb_len);
|
---|
| 463 | }
|
---|
| 464 |
|
---|
| 465 | add_bytes(reg, s, byte_len);
|
---|
| 466 | return 0;
|
---|
| 467 | }
|
---|
| 468 |
|
---|
| 469 |
|
---|
| 470 | static int
|
---|
| 471 | compile_length_string_node(Node* node, regex_t* reg)
|
---|
| 472 | {
|
---|
| 473 | int rlen, r, len, prev_len, blen, ambig;
|
---|
| 474 | OnigEncoding enc = reg->enc;
|
---|
| 475 | UChar *p, *prev;
|
---|
| 476 | StrNode* sn;
|
---|
| 477 |
|
---|
| 478 | sn = NSTR(node);
|
---|
| 479 | if (sn->end <= sn->s)
|
---|
| 480 | return 0;
|
---|
| 481 |
|
---|
| 482 | ambig = NSTRING_IS_AMBIG(node);
|
---|
| 483 |
|
---|
| 484 | p = prev = sn->s;
|
---|
| 485 | prev_len = enclen(enc, p);
|
---|
| 486 | p += prev_len;
|
---|
| 487 | blen = prev_len;
|
---|
| 488 | rlen = 0;
|
---|
| 489 |
|
---|
| 490 | for (; p < sn->end; ) {
|
---|
| 491 | len = enclen(enc, p);
|
---|
| 492 | if (len == prev_len || ambig) {
|
---|
| 493 | blen += len;
|
---|
| 494 | }
|
---|
| 495 | else {
|
---|
| 496 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
| 497 | rlen += r;
|
---|
| 498 | prev = p;
|
---|
| 499 | blen = len;
|
---|
| 500 | prev_len = len;
|
---|
| 501 | }
|
---|
| 502 | p += len;
|
---|
| 503 | }
|
---|
| 504 | r = add_compile_string_length(prev, prev_len, blen, reg, ambig);
|
---|
| 505 | rlen += r;
|
---|
| 506 | return rlen;
|
---|
| 507 | }
|
---|
| 508 |
|
---|
| 509 | static int
|
---|
| 510 | compile_length_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
| 511 | {
|
---|
| 512 | if (sn->end <= sn->s)
|
---|
| 513 | return 0;
|
---|
| 514 |
|
---|
| 515 | return add_compile_string_length(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
| 516 | }
|
---|
| 517 |
|
---|
| 518 | static int
|
---|
| 519 | compile_string_node(Node* node, regex_t* reg)
|
---|
| 520 | {
|
---|
| 521 | int r, len, prev_len, blen, ambig;
|
---|
| 522 | OnigEncoding enc = reg->enc;
|
---|
| 523 | UChar *p, *prev, *end;
|
---|
| 524 | StrNode* sn;
|
---|
| 525 |
|
---|
| 526 | sn = NSTR(node);
|
---|
| 527 | if (sn->end <= sn->s)
|
---|
| 528 | return 0;
|
---|
| 529 |
|
---|
| 530 | end = sn->end;
|
---|
| 531 | ambig = NSTRING_IS_AMBIG(node);
|
---|
| 532 |
|
---|
| 533 | p = prev = sn->s;
|
---|
| 534 | prev_len = enclen(enc, p);
|
---|
| 535 | p += prev_len;
|
---|
| 536 | blen = prev_len;
|
---|
| 537 |
|
---|
| 538 | for (; p < end; ) {
|
---|
| 539 | len = enclen(enc, p);
|
---|
| 540 | if (len == prev_len || ambig) {
|
---|
| 541 | blen += len;
|
---|
| 542 | }
|
---|
| 543 | else {
|
---|
| 544 | r = add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
| 545 | if (r) return r;
|
---|
| 546 |
|
---|
| 547 | prev = p;
|
---|
| 548 | blen = len;
|
---|
| 549 | prev_len = len;
|
---|
| 550 | }
|
---|
| 551 |
|
---|
| 552 | p += len;
|
---|
| 553 | }
|
---|
| 554 | return add_compile_string(prev, prev_len, blen, reg, ambig);
|
---|
| 555 | }
|
---|
| 556 |
|
---|
| 557 | static int
|
---|
| 558 | compile_string_raw_node(StrNode* sn, regex_t* reg)
|
---|
| 559 | {
|
---|
| 560 | if (sn->end <= sn->s)
|
---|
| 561 | return 0;
|
---|
| 562 |
|
---|
| 563 | return add_compile_string(sn->s, 1 /* sb */, sn->end - sn->s, reg, 0);
|
---|
| 564 | }
|
---|
| 565 |
|
---|
| 566 | static int
|
---|
| 567 | add_multi_byte_cclass(BBuf* mbuf, regex_t* reg)
|
---|
| 568 | {
|
---|
| 569 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 570 | add_length(reg, mbuf->used);
|
---|
| 571 | return add_bytes(reg, mbuf->p, mbuf->used);
|
---|
| 572 | #else
|
---|
| 573 | int r, pad_size;
|
---|
| 574 | UChar* p = BBUF_GET_ADD_ADDRESS(reg) + SIZE_LENGTH;
|
---|
| 575 |
|
---|
| 576 | GET_ALIGNMENT_PAD_SIZE(p, pad_size);
|
---|
| 577 | add_length(reg, mbuf->used + (WORD_ALIGNMENT_SIZE - 1));
|
---|
| 578 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
| 579 |
|
---|
| 580 | r = add_bytes(reg, mbuf->p, mbuf->used);
|
---|
| 581 |
|
---|
| 582 | /* padding for return value from compile_length_cclass_node() to be fix. */
|
---|
| 583 | pad_size = (WORD_ALIGNMENT_SIZE - 1) - pad_size;
|
---|
| 584 | if (pad_size != 0) add_bytes(reg, PadBuf, pad_size);
|
---|
| 585 | return r;
|
---|
| 586 | #endif
|
---|
| 587 | }
|
---|
| 588 |
|
---|
| 589 | static int
|
---|
| 590 | compile_length_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
| 591 | {
|
---|
| 592 | int len;
|
---|
| 593 |
|
---|
| 594 | if (IS_NCCLASS_SHARE(cc)) {
|
---|
| 595 | len = SIZE_OPCODE + SIZE_POINTER;
|
---|
| 596 | return len;
|
---|
| 597 | }
|
---|
| 598 |
|
---|
| 599 | if (IS_NULL(cc->mbuf)) {
|
---|
| 600 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
| 601 | }
|
---|
| 602 | else {
|
---|
| 603 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
| 604 | len = SIZE_OPCODE;
|
---|
| 605 | }
|
---|
| 606 | else {
|
---|
| 607 | len = SIZE_OPCODE + SIZE_BITSET;
|
---|
| 608 | }
|
---|
| 609 | #ifdef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 610 | len += SIZE_LENGTH + cc->mbuf->used;
|
---|
| 611 | #else
|
---|
| 612 | len += SIZE_LENGTH + cc->mbuf->used + (WORD_ALIGNMENT_SIZE - 1);
|
---|
| 613 | #endif
|
---|
| 614 | }
|
---|
| 615 |
|
---|
| 616 | return len;
|
---|
| 617 | }
|
---|
| 618 |
|
---|
| 619 | static int
|
---|
| 620 | compile_cclass_node(CClassNode* cc, regex_t* reg)
|
---|
| 621 | {
|
---|
| 622 | int r;
|
---|
| 623 |
|
---|
| 624 | if (IS_NCCLASS_SHARE(cc)) {
|
---|
| 625 | add_opcode(reg, OP_CCLASS_NODE);
|
---|
| 626 | r = add_pointer(reg, cc);
|
---|
| 627 | return r;
|
---|
| 628 | }
|
---|
| 629 |
|
---|
| 630 | if (IS_NULL(cc->mbuf)) {
|
---|
| 631 | if (IS_NCCLASS_NOT(cc))
|
---|
| 632 | add_opcode(reg, OP_CCLASS_NOT);
|
---|
| 633 | else
|
---|
| 634 | add_opcode(reg, OP_CCLASS);
|
---|
| 635 |
|
---|
| 636 | r = add_bitset(reg, cc->bs);
|
---|
| 637 | }
|
---|
| 638 | else {
|
---|
| 639 | if (ONIGENC_MBC_MINLEN(reg->enc) > 1 || bitset_is_empty(cc->bs)) {
|
---|
| 640 | if (IS_NCCLASS_NOT(cc))
|
---|
| 641 | add_opcode(reg, OP_CCLASS_MB_NOT);
|
---|
| 642 | else
|
---|
| 643 | add_opcode(reg, OP_CCLASS_MB);
|
---|
| 644 |
|
---|
| 645 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
| 646 | }
|
---|
| 647 | else {
|
---|
| 648 | if (IS_NCCLASS_NOT(cc))
|
---|
| 649 | add_opcode(reg, OP_CCLASS_MIX_NOT);
|
---|
| 650 | else
|
---|
| 651 | add_opcode(reg, OP_CCLASS_MIX);
|
---|
| 652 |
|
---|
| 653 | r = add_bitset(reg, cc->bs);
|
---|
| 654 | if (r) return r;
|
---|
| 655 | r = add_multi_byte_cclass(cc->mbuf, reg);
|
---|
| 656 | }
|
---|
| 657 | }
|
---|
| 658 |
|
---|
| 659 | return r;
|
---|
| 660 | }
|
---|
| 661 |
|
---|
| 662 | static int
|
---|
| 663 | entry_repeat_range(regex_t* reg, int id, int lower, int upper)
|
---|
| 664 | {
|
---|
| 665 | #define REPEAT_RANGE_ALLOC 4
|
---|
| 666 |
|
---|
| 667 | OnigRepeatRange* p;
|
---|
| 668 |
|
---|
| 669 | if (reg->repeat_range_alloc == 0) {
|
---|
| 670 | p = (OnigRepeatRange* )xmalloc(sizeof(OnigRepeatRange) * REPEAT_RANGE_ALLOC);
|
---|
| 671 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 672 | reg->repeat_range = p;
|
---|
| 673 | reg->repeat_range_alloc = REPEAT_RANGE_ALLOC;
|
---|
| 674 | }
|
---|
| 675 | else if (reg->repeat_range_alloc <= id) {
|
---|
| 676 | int n;
|
---|
| 677 | n = reg->repeat_range_alloc + REPEAT_RANGE_ALLOC;
|
---|
| 678 | p = (OnigRepeatRange* )xrealloc(reg->repeat_range,
|
---|
| 679 | sizeof(OnigRepeatRange) * n);
|
---|
| 680 | CHECK_NULL_RETURN_MEMERR(p);
|
---|
| 681 | reg->repeat_range = p;
|
---|
| 682 | reg->repeat_range_alloc = n;
|
---|
| 683 | }
|
---|
| 684 | else {
|
---|
| 685 | p = reg->repeat_range;
|
---|
| 686 | }
|
---|
| 687 |
|
---|
| 688 | p[id].lower = lower;
|
---|
| 689 | p[id].upper = (IS_REPEAT_INFINITE(upper) ? 0x7fffffff : upper);
|
---|
| 690 | return 0;
|
---|
| 691 | }
|
---|
| 692 |
|
---|
| 693 | static int
|
---|
| 694 | compile_range_repeat_node(QtfrNode* qn, int target_len, int empty_info,
|
---|
| 695 | regex_t* reg)
|
---|
| 696 | {
|
---|
| 697 | int r;
|
---|
| 698 | int num_repeat = reg->num_repeat;
|
---|
| 699 |
|
---|
| 700 | r = add_opcode(reg, qn->greedy ? OP_REPEAT : OP_REPEAT_NG);
|
---|
| 701 | if (r) return r;
|
---|
| 702 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
| 703 | reg->num_repeat++;
|
---|
| 704 | if (r) return r;
|
---|
| 705 | r = add_rel_addr(reg, target_len + SIZE_OP_REPEAT_INC);
|
---|
| 706 | if (r) return r;
|
---|
| 707 |
|
---|
| 708 | r = entry_repeat_range(reg, num_repeat, qn->lower, qn->upper);
|
---|
| 709 | if (r) return r;
|
---|
| 710 |
|
---|
| 711 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 712 | if (r) return r;
|
---|
| 713 |
|
---|
| 714 | if (
|
---|
| 715 | #ifdef USE_SUBEXP_CALL
|
---|
| 716 | reg->num_call > 0 ||
|
---|
| 717 | #endif
|
---|
| 718 | IS_QUANTIFIER_IN_REPEAT(qn)) {
|
---|
| 719 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC_SG : OP_REPEAT_INC_NG_SG);
|
---|
| 720 | }
|
---|
| 721 | else {
|
---|
| 722 | r = add_opcode(reg, qn->greedy ? OP_REPEAT_INC : OP_REPEAT_INC_NG);
|
---|
| 723 | }
|
---|
| 724 | if (r) return r;
|
---|
| 725 | r = add_mem_num(reg, num_repeat); /* OP_REPEAT ID */
|
---|
| 726 | return r;
|
---|
| 727 | }
|
---|
| 728 |
|
---|
| 729 | static int
|
---|
| 730 | is_anychar_star_quantifier(QtfrNode* qn)
|
---|
| 731 | {
|
---|
| 732 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper) &&
|
---|
| 733 | NTYPE(qn->target) == NT_CANY)
|
---|
| 734 | return 1;
|
---|
| 735 | else
|
---|
| 736 | return 0;
|
---|
| 737 | }
|
---|
| 738 |
|
---|
| 739 | #define QUANTIFIER_EXPAND_LIMIT_SIZE 50
|
---|
| 740 | #define CKN_ON (ckn > 0)
|
---|
| 741 |
|
---|
| 742 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 743 |
|
---|
| 744 | static int
|
---|
| 745 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 746 | {
|
---|
| 747 | int len, mod_tlen, cklen;
|
---|
| 748 | int ckn;
|
---|
| 749 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 750 | int empty_info = qn->target_empty_info;
|
---|
| 751 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 752 |
|
---|
| 753 | if (tlen < 0) return tlen;
|
---|
| 754 |
|
---|
| 755 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
| 756 |
|
---|
| 757 | cklen = (CKN_ON ? SIZE_STATE_CHECK_NUM: 0);
|
---|
| 758 |
|
---|
| 759 | /* anychar repeat */
|
---|
| 760 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
| 761 | if (qn->greedy && infinite) {
|
---|
| 762 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON)
|
---|
| 763 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower + cklen;
|
---|
| 764 | else
|
---|
| 765 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower + cklen;
|
---|
| 766 | }
|
---|
| 767 | }
|
---|
| 768 |
|
---|
| 769 | if (empty_info != 0)
|
---|
| 770 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 771 | else
|
---|
| 772 | mod_tlen = tlen;
|
---|
| 773 |
|
---|
| 774 | if (infinite && qn->lower <= 1) {
|
---|
| 775 | if (qn->greedy) {
|
---|
| 776 | if (qn->lower == 1)
|
---|
| 777 | len = SIZE_OP_JUMP;
|
---|
| 778 | else
|
---|
| 779 | len = 0;
|
---|
| 780 |
|
---|
| 781 | len += SIZE_OP_PUSH + cklen + mod_tlen + SIZE_OP_JUMP;
|
---|
| 782 | }
|
---|
| 783 | else {
|
---|
| 784 | if (qn->lower == 0)
|
---|
| 785 | len = SIZE_OP_JUMP;
|
---|
| 786 | else
|
---|
| 787 | len = 0;
|
---|
| 788 |
|
---|
| 789 | len += mod_tlen + SIZE_OP_PUSH + cklen;
|
---|
| 790 | }
|
---|
| 791 | }
|
---|
| 792 | else if (qn->upper == 0) {
|
---|
| 793 | if (qn->is_refered != 0) /* /(?<n>..){0}/ */
|
---|
| 794 | len = SIZE_OP_JUMP + tlen;
|
---|
| 795 | else
|
---|
| 796 | len = 0;
|
---|
| 797 | }
|
---|
| 798 | else if (qn->upper == 1 && qn->greedy) {
|
---|
| 799 | if (qn->lower == 0) {
|
---|
| 800 | if (CKN_ON) {
|
---|
| 801 | len = SIZE_OP_STATE_CHECK_PUSH + tlen;
|
---|
| 802 | }
|
---|
| 803 | else {
|
---|
| 804 | len = SIZE_OP_PUSH + tlen;
|
---|
| 805 | }
|
---|
| 806 | }
|
---|
| 807 | else {
|
---|
| 808 | len = tlen;
|
---|
| 809 | }
|
---|
| 810 | }
|
---|
| 811 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 812 | len = SIZE_OP_PUSH + cklen + SIZE_OP_JUMP + tlen;
|
---|
| 813 | }
|
---|
| 814 | else {
|
---|
| 815 | len = SIZE_OP_REPEAT_INC
|
---|
| 816 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
| 817 | if (CKN_ON)
|
---|
| 818 | len += SIZE_OP_STATE_CHECK;
|
---|
| 819 | }
|
---|
| 820 |
|
---|
| 821 | return len;
|
---|
| 822 | }
|
---|
| 823 |
|
---|
| 824 | static int
|
---|
| 825 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 826 | {
|
---|
| 827 | int r, mod_tlen;
|
---|
| 828 | int ckn;
|
---|
| 829 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 830 | int empty_info = qn->target_empty_info;
|
---|
| 831 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 832 |
|
---|
| 833 | if (tlen < 0) return tlen;
|
---|
| 834 |
|
---|
| 835 | ckn = ((reg->num_comb_exp_check > 0) ? qn->comb_exp_check_num : 0);
|
---|
| 836 |
|
---|
| 837 | if (is_anychar_star_quantifier(qn)) {
|
---|
| 838 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 839 | if (r) return r;
|
---|
| 840 | if (IS_NOT_NULL(qn->next_head_exact) && !CKN_ON) {
|
---|
| 841 | if (IS_MULTILINE(reg->options))
|
---|
| 842 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
| 843 | else
|
---|
| 844 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
| 845 | if (r) return r;
|
---|
| 846 | if (CKN_ON) {
|
---|
| 847 | r = add_state_check_num(reg, ckn);
|
---|
| 848 | if (r) return r;
|
---|
| 849 | }
|
---|
| 850 |
|
---|
| 851 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
| 852 | }
|
---|
| 853 | else {
|
---|
| 854 | if (IS_MULTILINE(reg->options)) {
|
---|
| 855 | r = add_opcode(reg, (CKN_ON ?
|
---|
| 856 | OP_STATE_CHECK_ANYCHAR_ML_STAR
|
---|
| 857 | : OP_ANYCHAR_ML_STAR));
|
---|
| 858 | }
|
---|
| 859 | else {
|
---|
| 860 | r = add_opcode(reg, (CKN_ON ?
|
---|
| 861 | OP_STATE_CHECK_ANYCHAR_STAR
|
---|
| 862 | : OP_ANYCHAR_STAR));
|
---|
| 863 | }
|
---|
| 864 | if (r) return r;
|
---|
| 865 | if (CKN_ON)
|
---|
| 866 | r = add_state_check_num(reg, ckn);
|
---|
| 867 |
|
---|
| 868 | return r;
|
---|
| 869 | }
|
---|
| 870 | }
|
---|
| 871 |
|
---|
| 872 | if (empty_info != 0)
|
---|
| 873 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 874 | else
|
---|
| 875 | mod_tlen = tlen;
|
---|
| 876 |
|
---|
| 877 | if (infinite && qn->lower <= 1) {
|
---|
| 878 | if (qn->greedy) {
|
---|
| 879 | if (qn->lower == 1) {
|
---|
| 880 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 881 | (CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH));
|
---|
| 882 | if (r) return r;
|
---|
| 883 | }
|
---|
| 884 |
|
---|
| 885 | if (CKN_ON) {
|
---|
| 886 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
| 887 | if (r) return r;
|
---|
| 888 | r = add_state_check_num(reg, ckn);
|
---|
| 889 | if (r) return r;
|
---|
| 890 | r = add_rel_addr(reg, mod_tlen + SIZE_OP_JUMP);
|
---|
| 891 | }
|
---|
| 892 | else {
|
---|
| 893 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
| 894 | }
|
---|
| 895 | if (r) return r;
|
---|
| 896 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 897 | if (r) return r;
|
---|
| 898 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 899 | -(mod_tlen + (int )SIZE_OP_JUMP
|
---|
| 900 | + (int )(CKN_ON ? SIZE_OP_STATE_CHECK_PUSH : SIZE_OP_PUSH)));
|
---|
| 901 | }
|
---|
| 902 | else {
|
---|
| 903 | if (qn->lower == 0) {
|
---|
| 904 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
| 905 | if (r) return r;
|
---|
| 906 | }
|
---|
| 907 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 908 | if (r) return r;
|
---|
| 909 | if (CKN_ON) {
|
---|
| 910 | r = add_opcode(reg, OP_STATE_CHECK_PUSH_OR_JUMP);
|
---|
| 911 | if (r) return r;
|
---|
| 912 | r = add_state_check_num(reg, ckn);
|
---|
| 913 | if (r) return r;
|
---|
| 914 | r = add_rel_addr(reg,
|
---|
| 915 | -(mod_tlen + (int )SIZE_OP_STATE_CHECK_PUSH_OR_JUMP));
|
---|
| 916 | }
|
---|
| 917 | else
|
---|
| 918 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
| 919 | }
|
---|
| 920 | }
|
---|
| 921 | else if (qn->upper == 0) {
|
---|
| 922 | if (qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
| 923 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 924 | if (r) return r;
|
---|
| 925 | r = compile_tree(qn->target, reg);
|
---|
| 926 | }
|
---|
| 927 | else
|
---|
| 928 | r = 0;
|
---|
| 929 | }
|
---|
| 930 | else if (qn->upper == 1 && qn->greedy) {
|
---|
| 931 | if (qn->lower == 0) {
|
---|
| 932 | if (CKN_ON) {
|
---|
| 933 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
| 934 | if (r) return r;
|
---|
| 935 | r = add_state_check_num(reg, ckn);
|
---|
| 936 | if (r) return r;
|
---|
| 937 | r = add_rel_addr(reg, tlen);
|
---|
| 938 | }
|
---|
| 939 | else {
|
---|
| 940 | r = add_opcode_rel_addr(reg, OP_PUSH, tlen);
|
---|
| 941 | }
|
---|
| 942 | if (r) return r;
|
---|
| 943 | }
|
---|
| 944 |
|
---|
| 945 | r = compile_tree(qn->target, reg);
|
---|
| 946 | }
|
---|
| 947 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 948 | if (CKN_ON) {
|
---|
| 949 | r = add_opcode(reg, OP_STATE_CHECK_PUSH);
|
---|
| 950 | if (r) return r;
|
---|
| 951 | r = add_state_check_num(reg, ckn);
|
---|
| 952 | if (r) return r;
|
---|
| 953 | r = add_rel_addr(reg, SIZE_OP_JUMP);
|
---|
| 954 | }
|
---|
| 955 | else {
|
---|
| 956 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
| 957 | }
|
---|
| 958 |
|
---|
| 959 | if (r) return r;
|
---|
| 960 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 961 | if (r) return r;
|
---|
| 962 | r = compile_tree(qn->target, reg);
|
---|
| 963 | }
|
---|
| 964 | else {
|
---|
| 965 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
| 966 | if (CKN_ON) {
|
---|
| 967 | if (r) return r;
|
---|
| 968 | r = add_opcode(reg, OP_STATE_CHECK);
|
---|
| 969 | if (r) return r;
|
---|
| 970 | r = add_state_check_num(reg, ckn);
|
---|
| 971 | }
|
---|
| 972 | }
|
---|
| 973 | return r;
|
---|
| 974 | }
|
---|
| 975 |
|
---|
| 976 | #else /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
| 977 |
|
---|
| 978 | static int
|
---|
| 979 | compile_length_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 980 | {
|
---|
| 981 | int len, mod_tlen;
|
---|
| 982 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 983 | int empty_info = qn->target_empty_info;
|
---|
| 984 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 985 |
|
---|
| 986 | if (tlen < 0) return tlen;
|
---|
| 987 |
|
---|
| 988 | /* anychar repeat */
|
---|
| 989 | if (NTYPE(qn->target) == NT_CANY) {
|
---|
| 990 | if (qn->greedy && infinite) {
|
---|
| 991 | if (IS_NOT_NULL(qn->next_head_exact))
|
---|
| 992 | return SIZE_OP_ANYCHAR_STAR_PEEK_NEXT + tlen * qn->lower;
|
---|
| 993 | else
|
---|
| 994 | return SIZE_OP_ANYCHAR_STAR + tlen * qn->lower;
|
---|
| 995 | }
|
---|
| 996 | }
|
---|
| 997 |
|
---|
| 998 | if (empty_info != 0)
|
---|
| 999 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 1000 | else
|
---|
| 1001 | mod_tlen = tlen;
|
---|
| 1002 |
|
---|
| 1003 | if (infinite &&
|
---|
| 1004 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 1005 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
| 1006 | len = SIZE_OP_JUMP;
|
---|
| 1007 | }
|
---|
| 1008 | else {
|
---|
| 1009 | len = tlen * qn->lower;
|
---|
| 1010 | }
|
---|
| 1011 |
|
---|
| 1012 | if (qn->greedy) {
|
---|
| 1013 | if (IS_NOT_NULL(qn->head_exact))
|
---|
| 1014 | len += SIZE_OP_PUSH_OR_JUMP_EXACT1 + mod_tlen + SIZE_OP_JUMP;
|
---|
| 1015 | else if (IS_NOT_NULL(qn->next_head_exact))
|
---|
| 1016 | len += SIZE_OP_PUSH_IF_PEEK_NEXT + mod_tlen + SIZE_OP_JUMP;
|
---|
| 1017 | else
|
---|
| 1018 | len += SIZE_OP_PUSH + mod_tlen + SIZE_OP_JUMP;
|
---|
| 1019 | }
|
---|
| 1020 | else
|
---|
| 1021 | len += SIZE_OP_JUMP + mod_tlen + SIZE_OP_PUSH;
|
---|
| 1022 | }
|
---|
| 1023 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
| 1024 | len = SIZE_OP_JUMP + tlen;
|
---|
| 1025 | }
|
---|
| 1026 | else if (!infinite && qn->greedy &&
|
---|
| 1027 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
| 1028 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 1029 | len = tlen * qn->lower;
|
---|
| 1030 | len += (SIZE_OP_PUSH + tlen) * (qn->upper - qn->lower);
|
---|
| 1031 | }
|
---|
| 1032 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 1033 | len = SIZE_OP_PUSH + SIZE_OP_JUMP + tlen;
|
---|
| 1034 | }
|
---|
| 1035 | else {
|
---|
| 1036 | len = SIZE_OP_REPEAT_INC
|
---|
| 1037 | + mod_tlen + SIZE_OPCODE + SIZE_RELADDR + SIZE_MEMNUM;
|
---|
| 1038 | }
|
---|
| 1039 |
|
---|
| 1040 | return len;
|
---|
| 1041 | }
|
---|
| 1042 |
|
---|
| 1043 | static int
|
---|
| 1044 | compile_quantifier_node(QtfrNode* qn, regex_t* reg)
|
---|
| 1045 | {
|
---|
| 1046 | int i, r, mod_tlen;
|
---|
| 1047 | int infinite = IS_REPEAT_INFINITE(qn->upper);
|
---|
| 1048 | int empty_info = qn->target_empty_info;
|
---|
| 1049 | int tlen = compile_length_tree(qn->target, reg);
|
---|
| 1050 |
|
---|
| 1051 | if (tlen < 0) return tlen;
|
---|
| 1052 |
|
---|
| 1053 | if (is_anychar_star_quantifier(qn)) {
|
---|
| 1054 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1055 | if (r) return r;
|
---|
| 1056 | if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
| 1057 | if (IS_MULTILINE(reg->options))
|
---|
| 1058 | r = add_opcode(reg, OP_ANYCHAR_ML_STAR_PEEK_NEXT);
|
---|
| 1059 | else
|
---|
| 1060 | r = add_opcode(reg, OP_ANYCHAR_STAR_PEEK_NEXT);
|
---|
| 1061 | if (r) return r;
|
---|
| 1062 | return add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
| 1063 | }
|
---|
| 1064 | else {
|
---|
| 1065 | if (IS_MULTILINE(reg->options))
|
---|
| 1066 | return add_opcode(reg, OP_ANYCHAR_ML_STAR);
|
---|
| 1067 | else
|
---|
| 1068 | return add_opcode(reg, OP_ANYCHAR_STAR);
|
---|
| 1069 | }
|
---|
| 1070 | }
|
---|
| 1071 |
|
---|
| 1072 | if (empty_info != 0)
|
---|
| 1073 | mod_tlen = tlen + (SIZE_OP_NULL_CHECK_START + SIZE_OP_NULL_CHECK_END);
|
---|
| 1074 | else
|
---|
| 1075 | mod_tlen = tlen;
|
---|
| 1076 |
|
---|
| 1077 | if (infinite &&
|
---|
| 1078 | (qn->lower <= 1 || tlen * qn->lower <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 1079 | if (qn->lower == 1 && tlen > QUANTIFIER_EXPAND_LIMIT_SIZE) {
|
---|
| 1080 | if (qn->greedy) {
|
---|
| 1081 | if (IS_NOT_NULL(qn->head_exact))
|
---|
| 1082 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_OR_JUMP_EXACT1);
|
---|
| 1083 | else if (IS_NOT_NULL(qn->next_head_exact))
|
---|
| 1084 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH_IF_PEEK_NEXT);
|
---|
| 1085 | else
|
---|
| 1086 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_PUSH);
|
---|
| 1087 | }
|
---|
| 1088 | else {
|
---|
| 1089 | r = add_opcode_rel_addr(reg, OP_JUMP, SIZE_OP_JUMP);
|
---|
| 1090 | }
|
---|
| 1091 | if (r) return r;
|
---|
| 1092 | }
|
---|
| 1093 | else {
|
---|
| 1094 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1095 | if (r) return r;
|
---|
| 1096 | }
|
---|
| 1097 |
|
---|
| 1098 | if (qn->greedy) {
|
---|
| 1099 | if (IS_NOT_NULL(qn->head_exact)) {
|
---|
| 1100 | r = add_opcode_rel_addr(reg, OP_PUSH_OR_JUMP_EXACT1,
|
---|
| 1101 | mod_tlen + SIZE_OP_JUMP);
|
---|
| 1102 | if (r) return r;
|
---|
| 1103 | add_bytes(reg, NSTR(qn->head_exact)->s, 1);
|
---|
| 1104 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1105 | if (r) return r;
|
---|
| 1106 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1107 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_OR_JUMP_EXACT1));
|
---|
| 1108 | }
|
---|
| 1109 | else if (IS_NOT_NULL(qn->next_head_exact)) {
|
---|
| 1110 | r = add_opcode_rel_addr(reg, OP_PUSH_IF_PEEK_NEXT,
|
---|
| 1111 | mod_tlen + SIZE_OP_JUMP);
|
---|
| 1112 | if (r) return r;
|
---|
| 1113 | add_bytes(reg, NSTR(qn->next_head_exact)->s, 1);
|
---|
| 1114 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1115 | if (r) return r;
|
---|
| 1116 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1117 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH_IF_PEEK_NEXT));
|
---|
| 1118 | }
|
---|
| 1119 | else {
|
---|
| 1120 | r = add_opcode_rel_addr(reg, OP_PUSH, mod_tlen + SIZE_OP_JUMP);
|
---|
| 1121 | if (r) return r;
|
---|
| 1122 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1123 | if (r) return r;
|
---|
| 1124 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1125 | -(mod_tlen + (int )SIZE_OP_JUMP + (int )SIZE_OP_PUSH));
|
---|
| 1126 | }
|
---|
| 1127 | }
|
---|
| 1128 | else {
|
---|
| 1129 | r = add_opcode_rel_addr(reg, OP_JUMP, mod_tlen);
|
---|
| 1130 | if (r) return r;
|
---|
| 1131 | r = compile_tree_empty_check(qn->target, reg, empty_info);
|
---|
| 1132 | if (r) return r;
|
---|
| 1133 | r = add_opcode_rel_addr(reg, OP_PUSH, -(mod_tlen + (int )SIZE_OP_PUSH));
|
---|
| 1134 | }
|
---|
| 1135 | }
|
---|
| 1136 | else if (qn->upper == 0 && qn->is_refered != 0) { /* /(?<n>..){0}/ */
|
---|
| 1137 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 1138 | if (r) return r;
|
---|
| 1139 | r = compile_tree(qn->target, reg);
|
---|
| 1140 | }
|
---|
| 1141 | else if (!infinite && qn->greedy &&
|
---|
| 1142 | (qn->upper == 1 || (tlen + SIZE_OP_PUSH) * qn->upper
|
---|
| 1143 | <= QUANTIFIER_EXPAND_LIMIT_SIZE)) {
|
---|
| 1144 | int n = qn->upper - qn->lower;
|
---|
| 1145 |
|
---|
| 1146 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1147 | if (r) return r;
|
---|
| 1148 |
|
---|
| 1149 | for (i = 0; i < n; i++) {
|
---|
| 1150 | r = add_opcode_rel_addr(reg, OP_PUSH,
|
---|
| 1151 | (n - i) * tlen + (n - i - 1) * SIZE_OP_PUSH);
|
---|
| 1152 | if (r) return r;
|
---|
| 1153 | r = compile_tree(qn->target, reg);
|
---|
| 1154 | if (r) return r;
|
---|
| 1155 | }
|
---|
| 1156 | }
|
---|
| 1157 | else if (!qn->greedy && qn->upper == 1 && qn->lower == 0) { /* '??' */
|
---|
| 1158 | r = add_opcode_rel_addr(reg, OP_PUSH, SIZE_OP_JUMP);
|
---|
| 1159 | if (r) return r;
|
---|
| 1160 | r = add_opcode_rel_addr(reg, OP_JUMP, tlen);
|
---|
| 1161 | if (r) return r;
|
---|
| 1162 | r = compile_tree(qn->target, reg);
|
---|
| 1163 | }
|
---|
| 1164 | else {
|
---|
| 1165 | r = compile_range_repeat_node(qn, mod_tlen, empty_info, reg);
|
---|
| 1166 | }
|
---|
| 1167 | return r;
|
---|
| 1168 | }
|
---|
| 1169 | #endif /* USE_COMBINATION_EXPLOSION_CHECK */
|
---|
| 1170 |
|
---|
| 1171 | static int
|
---|
| 1172 | compile_length_option_node(EncloseNode* node, regex_t* reg)
|
---|
| 1173 | {
|
---|
| 1174 | int tlen;
|
---|
| 1175 | OnigOptionType prev = reg->options;
|
---|
| 1176 |
|
---|
| 1177 | reg->options = node->option;
|
---|
| 1178 | tlen = compile_length_tree(node->target, reg);
|
---|
| 1179 | reg->options = prev;
|
---|
| 1180 |
|
---|
| 1181 | if (tlen < 0) return tlen;
|
---|
| 1182 |
|
---|
| 1183 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
| 1184 | return SIZE_OP_SET_OPTION_PUSH + SIZE_OP_SET_OPTION + SIZE_OP_FAIL
|
---|
| 1185 | + tlen + SIZE_OP_SET_OPTION;
|
---|
| 1186 | }
|
---|
| 1187 | else
|
---|
| 1188 | return tlen;
|
---|
| 1189 | }
|
---|
| 1190 |
|
---|
| 1191 | static int
|
---|
| 1192 | compile_option_node(EncloseNode* node, regex_t* reg)
|
---|
| 1193 | {
|
---|
| 1194 | int r;
|
---|
| 1195 | OnigOptionType prev = reg->options;
|
---|
| 1196 |
|
---|
| 1197 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
| 1198 | r = add_opcode_option(reg, OP_SET_OPTION_PUSH, node->option);
|
---|
| 1199 | if (r) return r;
|
---|
| 1200 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
| 1201 | if (r) return r;
|
---|
| 1202 | r = add_opcode(reg, OP_FAIL);
|
---|
| 1203 | if (r) return r;
|
---|
| 1204 | }
|
---|
| 1205 |
|
---|
| 1206 | reg->options = node->option;
|
---|
| 1207 | r = compile_tree(node->target, reg);
|
---|
| 1208 | reg->options = prev;
|
---|
| 1209 |
|
---|
| 1210 | if (IS_DYNAMIC_OPTION(prev ^ node->option)) {
|
---|
| 1211 | if (r) return r;
|
---|
| 1212 | r = add_opcode_option(reg, OP_SET_OPTION, prev);
|
---|
| 1213 | }
|
---|
| 1214 | return r;
|
---|
| 1215 | }
|
---|
| 1216 |
|
---|
| 1217 | static int
|
---|
| 1218 | compile_length_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
| 1219 | {
|
---|
| 1220 | int len;
|
---|
| 1221 | int tlen;
|
---|
| 1222 |
|
---|
| 1223 | if (node->type == ENCLOSE_OPTION)
|
---|
| 1224 | return compile_length_option_node(node, reg);
|
---|
| 1225 |
|
---|
| 1226 | if (node->target) {
|
---|
| 1227 | tlen = compile_length_tree(node->target, reg);
|
---|
| 1228 | if (tlen < 0) return tlen;
|
---|
| 1229 | }
|
---|
| 1230 | else
|
---|
| 1231 | tlen = 0;
|
---|
| 1232 |
|
---|
| 1233 | switch (node->type) {
|
---|
| 1234 | case ENCLOSE_MEMORY:
|
---|
| 1235 | #ifdef USE_SUBEXP_CALL
|
---|
| 1236 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
| 1237 | len = SIZE_OP_MEMORY_START_PUSH + tlen
|
---|
| 1238 | + SIZE_OP_CALL + SIZE_OP_JUMP + SIZE_OP_RETURN;
|
---|
| 1239 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1240 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1241 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
| 1242 | else
|
---|
| 1243 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1244 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
| 1245 | }
|
---|
| 1246 | else
|
---|
| 1247 | #endif
|
---|
| 1248 | {
|
---|
| 1249 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
| 1250 | len = SIZE_OP_MEMORY_START_PUSH;
|
---|
| 1251 | else
|
---|
| 1252 | len = SIZE_OP_MEMORY_START;
|
---|
| 1253 |
|
---|
| 1254 | len += tlen + (BIT_STATUS_AT(reg->bt_mem_end, node->regnum)
|
---|
| 1255 | ? SIZE_OP_MEMORY_END_PUSH : SIZE_OP_MEMORY_END);
|
---|
| 1256 | }
|
---|
| 1257 | break;
|
---|
| 1258 |
|
---|
| 1259 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 1260 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
| 1261 | QtfrNode* qn = NQTFR(node->target);
|
---|
| 1262 | tlen = compile_length_tree(qn->target, reg);
|
---|
| 1263 | if (tlen < 0) return tlen;
|
---|
| 1264 |
|
---|
| 1265 | len = tlen * qn->lower
|
---|
| 1266 | + SIZE_OP_PUSH + tlen + SIZE_OP_POP + SIZE_OP_JUMP;
|
---|
| 1267 | }
|
---|
| 1268 | else {
|
---|
| 1269 | len = SIZE_OP_PUSH_STOP_BT + tlen + SIZE_OP_POP_STOP_BT;
|
---|
| 1270 | }
|
---|
| 1271 | break;
|
---|
| 1272 |
|
---|
| 1273 | case ENCLOSE_CONDITION:
|
---|
| 1274 | len = SIZE_OP_CONDITION;
|
---|
| 1275 | if (NTYPE(node->target) == NT_ALT) {
|
---|
| 1276 | Node* x = node->target;
|
---|
| 1277 |
|
---|
| 1278 | tlen = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
| 1279 | if (tlen < 0) return tlen;
|
---|
| 1280 | len += tlen + SIZE_OP_JUMP;
|
---|
| 1281 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
| 1282 | x = NCDR(x);
|
---|
| 1283 | tlen = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
| 1284 | if (tlen < 0) return tlen;
|
---|
| 1285 | len += tlen;
|
---|
| 1286 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
| 1287 | }
|
---|
| 1288 | else {
|
---|
| 1289 | return ONIGERR_PARSER_BUG;
|
---|
| 1290 | }
|
---|
| 1291 | break;
|
---|
| 1292 |
|
---|
| 1293 | default:
|
---|
| 1294 | return ONIGERR_TYPE_BUG;
|
---|
| 1295 | break;
|
---|
| 1296 | }
|
---|
| 1297 |
|
---|
| 1298 | return len;
|
---|
| 1299 | }
|
---|
| 1300 |
|
---|
| 1301 | static int get_char_length_tree(Node* node, regex_t* reg, int* len);
|
---|
| 1302 |
|
---|
| 1303 | static int
|
---|
| 1304 | compile_enclose_node(EncloseNode* node, regex_t* reg)
|
---|
| 1305 | {
|
---|
| 1306 | int r, len;
|
---|
| 1307 |
|
---|
| 1308 | if (node->type == ENCLOSE_OPTION)
|
---|
| 1309 | return compile_option_node(node, reg);
|
---|
| 1310 |
|
---|
| 1311 | switch (node->type) {
|
---|
| 1312 | case ENCLOSE_MEMORY:
|
---|
| 1313 | #ifdef USE_SUBEXP_CALL
|
---|
| 1314 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
| 1315 | r = add_opcode(reg, OP_CALL);
|
---|
| 1316 | if (r) return r;
|
---|
| 1317 | node->call_addr = BBUF_GET_OFFSET_POS(reg) + SIZE_ABSADDR + SIZE_OP_JUMP;
|
---|
| 1318 | node->state |= NST_ADDR_FIXED;
|
---|
| 1319 | r = add_abs_addr(reg, (int )node->call_addr);
|
---|
| 1320 | if (r) return r;
|
---|
| 1321 | len = compile_length_tree(node->target, reg);
|
---|
| 1322 | len += (SIZE_OP_MEMORY_START_PUSH + SIZE_OP_RETURN);
|
---|
| 1323 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1324 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1325 | ? SIZE_OP_MEMORY_END_PUSH_REC : SIZE_OP_MEMORY_END_PUSH);
|
---|
| 1326 | else
|
---|
| 1327 | len += (IS_ENCLOSE_RECURSION(node)
|
---|
| 1328 | ? SIZE_OP_MEMORY_END_REC : SIZE_OP_MEMORY_END);
|
---|
| 1329 |
|
---|
| 1330 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
| 1331 | if (r) return r;
|
---|
| 1332 | }
|
---|
| 1333 | #endif
|
---|
| 1334 | if (BIT_STATUS_AT(reg->bt_mem_start, node->regnum))
|
---|
| 1335 | r = add_opcode(reg, OP_MEMORY_START_PUSH);
|
---|
| 1336 | else
|
---|
| 1337 | r = add_opcode(reg, OP_MEMORY_START);
|
---|
| 1338 | if (r) return r;
|
---|
| 1339 | r = add_mem_num(reg, node->regnum);
|
---|
| 1340 | if (r) return r;
|
---|
| 1341 | r = compile_tree(node->target, reg);
|
---|
| 1342 | if (r) return r;
|
---|
| 1343 | #ifdef USE_SUBEXP_CALL
|
---|
| 1344 | if (IS_ENCLOSE_CALLED(node)) {
|
---|
| 1345 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1346 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
| 1347 | ? OP_MEMORY_END_PUSH_REC : OP_MEMORY_END_PUSH));
|
---|
| 1348 | else
|
---|
| 1349 | r = add_opcode(reg, (IS_ENCLOSE_RECURSION(node)
|
---|
| 1350 | ? OP_MEMORY_END_REC : OP_MEMORY_END));
|
---|
| 1351 |
|
---|
| 1352 | if (r) return r;
|
---|
| 1353 | r = add_mem_num(reg, node->regnum);
|
---|
| 1354 | if (r) return r;
|
---|
| 1355 | r = add_opcode(reg, OP_RETURN);
|
---|
| 1356 | }
|
---|
| 1357 | else
|
---|
| 1358 | #endif
|
---|
| 1359 | {
|
---|
| 1360 | if (BIT_STATUS_AT(reg->bt_mem_end, node->regnum))
|
---|
| 1361 | r = add_opcode(reg, OP_MEMORY_END_PUSH);
|
---|
| 1362 | else
|
---|
| 1363 | r = add_opcode(reg, OP_MEMORY_END);
|
---|
| 1364 | if (r) return r;
|
---|
| 1365 | r = add_mem_num(reg, node->regnum);
|
---|
| 1366 | }
|
---|
| 1367 | break;
|
---|
| 1368 |
|
---|
| 1369 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 1370 | if (IS_ENCLOSE_STOP_BT_SIMPLE_REPEAT(node)) {
|
---|
| 1371 | QtfrNode* qn = NQTFR(node->target);
|
---|
| 1372 | r = compile_tree_n_times(qn->target, qn->lower, reg);
|
---|
| 1373 | if (r) return r;
|
---|
| 1374 |
|
---|
| 1375 | len = compile_length_tree(qn->target, reg);
|
---|
| 1376 | if (len < 0) return len;
|
---|
| 1377 |
|
---|
| 1378 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_POP + SIZE_OP_JUMP);
|
---|
| 1379 | if (r) return r;
|
---|
| 1380 | r = compile_tree(qn->target, reg);
|
---|
| 1381 | if (r) return r;
|
---|
| 1382 | r = add_opcode(reg, OP_POP);
|
---|
| 1383 | if (r) return r;
|
---|
| 1384 | r = add_opcode_rel_addr(reg, OP_JUMP,
|
---|
| 1385 | -((int )SIZE_OP_PUSH + len + (int )SIZE_OP_POP + (int )SIZE_OP_JUMP));
|
---|
| 1386 | }
|
---|
| 1387 | else {
|
---|
| 1388 | r = add_opcode(reg, OP_PUSH_STOP_BT);
|
---|
| 1389 | if (r) return r;
|
---|
| 1390 | r = compile_tree(node->target, reg);
|
---|
| 1391 | if (r) return r;
|
---|
| 1392 | r = add_opcode(reg, OP_POP_STOP_BT);
|
---|
| 1393 | }
|
---|
| 1394 | break;
|
---|
| 1395 |
|
---|
| 1396 | case ENCLOSE_CONDITION:
|
---|
| 1397 | r = add_opcode(reg, OP_CONDITION);
|
---|
| 1398 | if (r) return r;
|
---|
| 1399 | r = add_mem_num(reg, node->regnum);
|
---|
| 1400 | if (r) return r;
|
---|
| 1401 |
|
---|
| 1402 | if (NTYPE(node->target) == NT_ALT) {
|
---|
| 1403 | Node* x = node->target;
|
---|
| 1404 | int len2;
|
---|
| 1405 |
|
---|
| 1406 | len = compile_length_tree(NCAR(x), reg); /* yes-node */
|
---|
| 1407 | if (len < 0) return len;
|
---|
| 1408 | if (NCDR(x) == NULL) return ONIGERR_PARSER_BUG;
|
---|
| 1409 | x = NCDR(x);
|
---|
| 1410 | len2 = compile_length_tree(NCAR(x), reg); /* no-node */
|
---|
| 1411 | if (len2 < 0) return len2;
|
---|
| 1412 | if (NCDR(x) != NULL) return ONIGERR_INVALID_CONDITION_PATTERN;
|
---|
| 1413 |
|
---|
| 1414 | x = node->target;
|
---|
| 1415 | r = add_rel_addr(reg, len + SIZE_OP_JUMP);
|
---|
| 1416 | if (r) return r;
|
---|
| 1417 | r = compile_tree(NCAR(x), reg); /* yes-node */
|
---|
| 1418 | if (r) return r;
|
---|
| 1419 | r = add_opcode_rel_addr(reg, OP_JUMP, len2);
|
---|
| 1420 | if (r) return r;
|
---|
| 1421 | x = NCDR(x);
|
---|
| 1422 | r = compile_tree(NCAR(x), reg); /* no-node */
|
---|
| 1423 | }
|
---|
| 1424 | else {
|
---|
| 1425 | return ONIGERR_PARSER_BUG;
|
---|
| 1426 | }
|
---|
| 1427 | break;
|
---|
| 1428 |
|
---|
| 1429 | default:
|
---|
| 1430 | return ONIGERR_TYPE_BUG;
|
---|
| 1431 | break;
|
---|
| 1432 | }
|
---|
| 1433 |
|
---|
| 1434 | return r;
|
---|
| 1435 | }
|
---|
| 1436 |
|
---|
| 1437 | static int
|
---|
| 1438 | compile_length_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
| 1439 | {
|
---|
| 1440 | int len;
|
---|
| 1441 | int tlen = 0;
|
---|
| 1442 |
|
---|
| 1443 | if (node->target) {
|
---|
| 1444 | tlen = compile_length_tree(node->target, reg);
|
---|
| 1445 | if (tlen < 0) return tlen;
|
---|
| 1446 | }
|
---|
| 1447 |
|
---|
| 1448 | switch (node->type) {
|
---|
| 1449 | case ANCHOR_PREC_READ:
|
---|
| 1450 | len = SIZE_OP_PUSH_POS + tlen + SIZE_OP_POP_POS;
|
---|
| 1451 | break;
|
---|
| 1452 | case ANCHOR_PREC_READ_NOT:
|
---|
| 1453 | len = SIZE_OP_PUSH_POS_NOT + tlen + SIZE_OP_FAIL_POS;
|
---|
| 1454 | break;
|
---|
| 1455 | case ANCHOR_LOOK_BEHIND:
|
---|
| 1456 | len = SIZE_OP_LOOK_BEHIND + tlen;
|
---|
| 1457 | break;
|
---|
| 1458 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 1459 | len = SIZE_OP_PUSH_LOOK_BEHIND_NOT + tlen + SIZE_OP_FAIL_LOOK_BEHIND_NOT;
|
---|
| 1460 | break;
|
---|
| 1461 |
|
---|
| 1462 | default:
|
---|
| 1463 | len = SIZE_OPCODE;
|
---|
| 1464 | break;
|
---|
| 1465 | }
|
---|
| 1466 |
|
---|
| 1467 | return len;
|
---|
| 1468 | }
|
---|
| 1469 |
|
---|
| 1470 | static int
|
---|
| 1471 | compile_anchor_node(AnchorNode* node, regex_t* reg)
|
---|
| 1472 | {
|
---|
| 1473 | int r, len;
|
---|
| 1474 |
|
---|
| 1475 | switch (node->type) {
|
---|
| 1476 | case ANCHOR_BEGIN_BUF: r = add_opcode(reg, OP_BEGIN_BUF); break;
|
---|
| 1477 | case ANCHOR_END_BUF: r = add_opcode(reg, OP_END_BUF); break;
|
---|
| 1478 | case ANCHOR_BEGIN_LINE: r = add_opcode(reg, OP_BEGIN_LINE); break;
|
---|
| 1479 | case ANCHOR_END_LINE: r = add_opcode(reg, OP_END_LINE); break;
|
---|
| 1480 | case ANCHOR_SEMI_END_BUF: r = add_opcode(reg, OP_SEMI_END_BUF); break;
|
---|
| 1481 | case ANCHOR_BEGIN_POSITION: r = add_opcode(reg, OP_BEGIN_POSITION); break;
|
---|
| 1482 |
|
---|
| 1483 | /* used for implicit anchor optimization: /.*a/ ==> /(?:^|\G).*a/ */
|
---|
| 1484 | case ANCHOR_ANYCHAR_STAR: r = add_opcode(reg, OP_BEGIN_POS_OR_LINE); break;
|
---|
| 1485 |
|
---|
| 1486 | case ANCHOR_WORD_BOUND:
|
---|
| 1487 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BOUND);
|
---|
| 1488 | else r = add_opcode(reg, OP_WORD_BOUND);
|
---|
| 1489 | break;
|
---|
| 1490 | case ANCHOR_NOT_WORD_BOUND:
|
---|
| 1491 | if (node->ascii_range) r = add_opcode(reg, OP_NOT_ASCII_WORD_BOUND);
|
---|
| 1492 | else r = add_opcode(reg, OP_NOT_WORD_BOUND);
|
---|
| 1493 | break;
|
---|
| 1494 | #ifdef USE_WORD_BEGIN_END
|
---|
| 1495 | case ANCHOR_WORD_BEGIN:
|
---|
| 1496 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_BEGIN);
|
---|
| 1497 | else r = add_opcode(reg, OP_WORD_BEGIN);
|
---|
| 1498 | break;
|
---|
| 1499 | case ANCHOR_WORD_END:
|
---|
| 1500 | if (node->ascii_range) r = add_opcode(reg, OP_ASCII_WORD_END);
|
---|
| 1501 | else r = add_opcode(reg, OP_WORD_END);
|
---|
| 1502 | break;
|
---|
| 1503 | #endif
|
---|
| 1504 | case ANCHOR_KEEP: r = add_opcode(reg, OP_KEEP); break;
|
---|
| 1505 |
|
---|
| 1506 | case ANCHOR_PREC_READ:
|
---|
| 1507 | r = add_opcode(reg, OP_PUSH_POS);
|
---|
| 1508 | if (r) return r;
|
---|
| 1509 | r = compile_tree(node->target, reg);
|
---|
| 1510 | if (r) return r;
|
---|
| 1511 | r = add_opcode(reg, OP_POP_POS);
|
---|
| 1512 | break;
|
---|
| 1513 |
|
---|
| 1514 | case ANCHOR_PREC_READ_NOT:
|
---|
| 1515 | len = compile_length_tree(node->target, reg);
|
---|
| 1516 | if (len < 0) return len;
|
---|
| 1517 | r = add_opcode_rel_addr(reg, OP_PUSH_POS_NOT, len + SIZE_OP_FAIL_POS);
|
---|
| 1518 | if (r) return r;
|
---|
| 1519 | r = compile_tree(node->target, reg);
|
---|
| 1520 | if (r) return r;
|
---|
| 1521 | r = add_opcode(reg, OP_FAIL_POS);
|
---|
| 1522 | break;
|
---|
| 1523 |
|
---|
| 1524 | case ANCHOR_LOOK_BEHIND:
|
---|
| 1525 | {
|
---|
| 1526 | int n;
|
---|
| 1527 | r = add_opcode(reg, OP_LOOK_BEHIND);
|
---|
| 1528 | if (r) return r;
|
---|
| 1529 | if (node->char_len < 0) {
|
---|
| 1530 | r = get_char_length_tree(node->target, reg, &n);
|
---|
| 1531 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 1532 | }
|
---|
| 1533 | else
|
---|
| 1534 | n = node->char_len;
|
---|
| 1535 | r = add_length(reg, n);
|
---|
| 1536 | if (r) return r;
|
---|
| 1537 | r = compile_tree(node->target, reg);
|
---|
| 1538 | }
|
---|
| 1539 | break;
|
---|
| 1540 |
|
---|
| 1541 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 1542 | {
|
---|
| 1543 | int n;
|
---|
| 1544 | len = compile_length_tree(node->target, reg);
|
---|
| 1545 | r = add_opcode_rel_addr(reg, OP_PUSH_LOOK_BEHIND_NOT,
|
---|
| 1546 | len + SIZE_OP_FAIL_LOOK_BEHIND_NOT);
|
---|
| 1547 | if (r) return r;
|
---|
| 1548 | if (node->char_len < 0) {
|
---|
| 1549 | r = get_char_length_tree(node->target, reg, &n);
|
---|
| 1550 | if (r) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 1551 | }
|
---|
| 1552 | else
|
---|
| 1553 | n = node->char_len;
|
---|
| 1554 | r = add_length(reg, n);
|
---|
| 1555 | if (r) return r;
|
---|
| 1556 | r = compile_tree(node->target, reg);
|
---|
| 1557 | if (r) return r;
|
---|
| 1558 | r = add_opcode(reg, OP_FAIL_LOOK_BEHIND_NOT);
|
---|
| 1559 | }
|
---|
| 1560 | break;
|
---|
| 1561 |
|
---|
| 1562 | default:
|
---|
| 1563 | return ONIGERR_TYPE_BUG;
|
---|
| 1564 | break;
|
---|
| 1565 | }
|
---|
| 1566 |
|
---|
| 1567 | return r;
|
---|
| 1568 | }
|
---|
| 1569 |
|
---|
| 1570 | static int
|
---|
| 1571 | compile_length_tree(Node* node, regex_t* reg)
|
---|
| 1572 | {
|
---|
| 1573 | int len, type, r;
|
---|
| 1574 |
|
---|
| 1575 | type = NTYPE(node);
|
---|
| 1576 | switch (type) {
|
---|
| 1577 | case NT_LIST:
|
---|
| 1578 | len = 0;
|
---|
| 1579 | do {
|
---|
| 1580 | r = compile_length_tree(NCAR(node), reg);
|
---|
| 1581 | if (r < 0) return r;
|
---|
| 1582 | len += r;
|
---|
| 1583 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1584 | r = len;
|
---|
| 1585 | break;
|
---|
| 1586 |
|
---|
| 1587 | case NT_ALT:
|
---|
| 1588 | {
|
---|
| 1589 | int n = 0;
|
---|
| 1590 | len = 0;
|
---|
| 1591 | do {
|
---|
| 1592 | r = compile_length_tree(NCAR(node), reg);
|
---|
| 1593 | if (r < 0) return r;
|
---|
| 1594 | len += r;
|
---|
| 1595 | n++;
|
---|
| 1596 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1597 | r = len;
|
---|
| 1598 | r += (SIZE_OP_PUSH + SIZE_OP_JUMP) * (n - 1);
|
---|
| 1599 | }
|
---|
| 1600 | break;
|
---|
| 1601 |
|
---|
| 1602 | case NT_STR:
|
---|
| 1603 | if (NSTRING_IS_RAW(node))
|
---|
| 1604 | r = compile_length_string_raw_node(NSTR(node), reg);
|
---|
| 1605 | else
|
---|
| 1606 | r = compile_length_string_node(node, reg);
|
---|
| 1607 | break;
|
---|
| 1608 |
|
---|
| 1609 | case NT_CCLASS:
|
---|
| 1610 | r = compile_length_cclass_node(NCCLASS(node), reg);
|
---|
| 1611 | break;
|
---|
| 1612 |
|
---|
| 1613 | case NT_CTYPE:
|
---|
| 1614 | case NT_CANY:
|
---|
| 1615 | r = SIZE_OPCODE;
|
---|
| 1616 | break;
|
---|
| 1617 |
|
---|
| 1618 | case NT_BREF:
|
---|
| 1619 | {
|
---|
| 1620 | BRefNode* br = NBREF(node);
|
---|
| 1621 |
|
---|
| 1622 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 1623 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
| 1624 | r = SIZE_OPCODE + SIZE_OPTION + SIZE_LENGTH +
|
---|
| 1625 | SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
| 1626 | }
|
---|
| 1627 | else
|
---|
| 1628 | #endif
|
---|
| 1629 | if (br->back_num == 1) {
|
---|
| 1630 | r = ((!IS_IGNORECASE(reg->options) && br->back_static[0] <= 2)
|
---|
| 1631 | ? SIZE_OPCODE : (SIZE_OPCODE + SIZE_MEMNUM));
|
---|
| 1632 | }
|
---|
| 1633 | else {
|
---|
| 1634 | r = SIZE_OPCODE + SIZE_LENGTH + (SIZE_MEMNUM * br->back_num);
|
---|
| 1635 | }
|
---|
| 1636 | }
|
---|
| 1637 | break;
|
---|
| 1638 |
|
---|
| 1639 | #ifdef USE_SUBEXP_CALL
|
---|
| 1640 | case NT_CALL:
|
---|
| 1641 | r = SIZE_OP_CALL;
|
---|
| 1642 | break;
|
---|
| 1643 | #endif
|
---|
| 1644 |
|
---|
| 1645 | case NT_QTFR:
|
---|
| 1646 | r = compile_length_quantifier_node(NQTFR(node), reg);
|
---|
| 1647 | break;
|
---|
| 1648 |
|
---|
| 1649 | case NT_ENCLOSE:
|
---|
| 1650 | r = compile_length_enclose_node(NENCLOSE(node), reg);
|
---|
| 1651 | break;
|
---|
| 1652 |
|
---|
| 1653 | case NT_ANCHOR:
|
---|
| 1654 | r = compile_length_anchor_node(NANCHOR(node), reg);
|
---|
| 1655 | break;
|
---|
| 1656 |
|
---|
| 1657 | default:
|
---|
| 1658 | return ONIGERR_TYPE_BUG;
|
---|
| 1659 | break;
|
---|
| 1660 | }
|
---|
| 1661 |
|
---|
| 1662 | return r;
|
---|
| 1663 | }
|
---|
| 1664 |
|
---|
| 1665 | static int
|
---|
| 1666 | compile_tree(Node* node, regex_t* reg)
|
---|
| 1667 | {
|
---|
| 1668 | int n, type, len, pos, r = 0;
|
---|
| 1669 |
|
---|
| 1670 | type = NTYPE(node);
|
---|
| 1671 | switch (type) {
|
---|
| 1672 | case NT_LIST:
|
---|
| 1673 | do {
|
---|
| 1674 | r = compile_tree(NCAR(node), reg);
|
---|
| 1675 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1676 | break;
|
---|
| 1677 |
|
---|
| 1678 | case NT_ALT:
|
---|
| 1679 | {
|
---|
| 1680 | Node* x = node;
|
---|
| 1681 | len = 0;
|
---|
| 1682 | do {
|
---|
| 1683 | len += compile_length_tree(NCAR(x), reg);
|
---|
| 1684 | if (NCDR(x) != NULL) {
|
---|
| 1685 | len += SIZE_OP_PUSH + SIZE_OP_JUMP;
|
---|
| 1686 | }
|
---|
| 1687 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
| 1688 | pos = reg->used + len; /* goal position */
|
---|
| 1689 |
|
---|
| 1690 | do {
|
---|
| 1691 | len = compile_length_tree(NCAR(node), reg);
|
---|
| 1692 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
| 1693 | r = add_opcode_rel_addr(reg, OP_PUSH, len + SIZE_OP_JUMP);
|
---|
| 1694 | if (r) break;
|
---|
| 1695 | }
|
---|
| 1696 | r = compile_tree(NCAR(node), reg);
|
---|
| 1697 | if (r) break;
|
---|
| 1698 | if (IS_NOT_NULL(NCDR(node))) {
|
---|
| 1699 | len = pos - (reg->used + SIZE_OP_JUMP);
|
---|
| 1700 | r = add_opcode_rel_addr(reg, OP_JUMP, len);
|
---|
| 1701 | if (r) break;
|
---|
| 1702 | }
|
---|
| 1703 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1704 | }
|
---|
| 1705 | break;
|
---|
| 1706 |
|
---|
| 1707 | case NT_STR:
|
---|
| 1708 | if (NSTRING_IS_RAW(node))
|
---|
| 1709 | r = compile_string_raw_node(NSTR(node), reg);
|
---|
| 1710 | else
|
---|
| 1711 | r = compile_string_node(node, reg);
|
---|
| 1712 | break;
|
---|
| 1713 |
|
---|
| 1714 | case NT_CCLASS:
|
---|
| 1715 | r = compile_cclass_node(NCCLASS(node), reg);
|
---|
| 1716 | break;
|
---|
| 1717 |
|
---|
| 1718 | case NT_CTYPE:
|
---|
| 1719 | {
|
---|
| 1720 | int op;
|
---|
| 1721 |
|
---|
| 1722 | switch (NCTYPE(node)->ctype) {
|
---|
| 1723 | case ONIGENC_CTYPE_WORD:
|
---|
| 1724 | if (NCTYPE(node)->ascii_range != 0) {
|
---|
| 1725 | if (NCTYPE(node)->not != 0) op = OP_NOT_ASCII_WORD;
|
---|
| 1726 | else op = OP_ASCII_WORD;
|
---|
| 1727 | }
|
---|
| 1728 | else {
|
---|
| 1729 | if (NCTYPE(node)->not != 0) op = OP_NOT_WORD;
|
---|
| 1730 | else op = OP_WORD;
|
---|
| 1731 | }
|
---|
| 1732 | break;
|
---|
| 1733 | default:
|
---|
| 1734 | return ONIGERR_TYPE_BUG;
|
---|
| 1735 | break;
|
---|
| 1736 | }
|
---|
| 1737 | r = add_opcode(reg, op);
|
---|
| 1738 | }
|
---|
| 1739 | break;
|
---|
| 1740 |
|
---|
| 1741 | case NT_CANY:
|
---|
| 1742 | if (IS_MULTILINE(reg->options))
|
---|
| 1743 | r = add_opcode(reg, OP_ANYCHAR_ML);
|
---|
| 1744 | else
|
---|
| 1745 | r = add_opcode(reg, OP_ANYCHAR);
|
---|
| 1746 | break;
|
---|
| 1747 |
|
---|
| 1748 | case NT_BREF:
|
---|
| 1749 | {
|
---|
| 1750 | BRefNode* br = NBREF(node);
|
---|
| 1751 |
|
---|
| 1752 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 1753 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
| 1754 | r = add_opcode(reg, OP_BACKREF_WITH_LEVEL);
|
---|
| 1755 | if (r) return r;
|
---|
| 1756 | r = add_option(reg, (reg->options & ONIG_OPTION_IGNORECASE));
|
---|
| 1757 | if (r) return r;
|
---|
| 1758 | r = add_length(reg, br->nest_level);
|
---|
| 1759 | if (r) return r;
|
---|
| 1760 |
|
---|
| 1761 | goto add_bacref_mems;
|
---|
| 1762 | }
|
---|
| 1763 | else
|
---|
| 1764 | #endif
|
---|
| 1765 | if (br->back_num == 1) {
|
---|
| 1766 | n = br->back_static[0];
|
---|
| 1767 | if (IS_IGNORECASE(reg->options)) {
|
---|
| 1768 | r = add_opcode(reg, OP_BACKREFN_IC);
|
---|
| 1769 | if (r) return r;
|
---|
| 1770 | r = add_mem_num(reg, n);
|
---|
| 1771 | }
|
---|
| 1772 | else {
|
---|
| 1773 | switch (n) {
|
---|
| 1774 | case 1: r = add_opcode(reg, OP_BACKREF1); break;
|
---|
| 1775 | case 2: r = add_opcode(reg, OP_BACKREF2); break;
|
---|
| 1776 | default:
|
---|
| 1777 | r = add_opcode(reg, OP_BACKREFN);
|
---|
| 1778 | if (r) return r;
|
---|
| 1779 | r = add_mem_num(reg, n);
|
---|
| 1780 | break;
|
---|
| 1781 | }
|
---|
| 1782 | }
|
---|
| 1783 | }
|
---|
| 1784 | else {
|
---|
| 1785 | int i;
|
---|
| 1786 | int* p;
|
---|
| 1787 |
|
---|
| 1788 | if (IS_IGNORECASE(reg->options)) {
|
---|
| 1789 | r = add_opcode(reg, OP_BACKREF_MULTI_IC);
|
---|
| 1790 | }
|
---|
| 1791 | else {
|
---|
| 1792 | r = add_opcode(reg, OP_BACKREF_MULTI);
|
---|
| 1793 | }
|
---|
| 1794 | if (r) return r;
|
---|
| 1795 |
|
---|
| 1796 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 1797 | add_bacref_mems:
|
---|
| 1798 | #endif
|
---|
| 1799 | r = add_length(reg, br->back_num);
|
---|
| 1800 | if (r) return r;
|
---|
| 1801 | p = BACKREFS_P(br);
|
---|
| 1802 | for (i = br->back_num - 1; i >= 0; i--) {
|
---|
| 1803 | r = add_mem_num(reg, p[i]);
|
---|
| 1804 | if (r) return r;
|
---|
| 1805 | }
|
---|
| 1806 | }
|
---|
| 1807 | }
|
---|
| 1808 | break;
|
---|
| 1809 |
|
---|
| 1810 | #ifdef USE_SUBEXP_CALL
|
---|
| 1811 | case NT_CALL:
|
---|
| 1812 | r = compile_call(NCALL(node), reg);
|
---|
| 1813 | break;
|
---|
| 1814 | #endif
|
---|
| 1815 |
|
---|
| 1816 | case NT_QTFR:
|
---|
| 1817 | r = compile_quantifier_node(NQTFR(node), reg);
|
---|
| 1818 | break;
|
---|
| 1819 |
|
---|
| 1820 | case NT_ENCLOSE:
|
---|
| 1821 | r = compile_enclose_node(NENCLOSE(node), reg);
|
---|
| 1822 | break;
|
---|
| 1823 |
|
---|
| 1824 | case NT_ANCHOR:
|
---|
| 1825 | r = compile_anchor_node(NANCHOR(node), reg);
|
---|
| 1826 | break;
|
---|
| 1827 |
|
---|
| 1828 | default:
|
---|
| 1829 | #ifdef ONIG_DEBUG
|
---|
| 1830 | fprintf(stderr, "compile_tree: undefined node type %d\n", NTYPE(node));
|
---|
| 1831 | #endif
|
---|
| 1832 | break;
|
---|
| 1833 | }
|
---|
| 1834 |
|
---|
| 1835 | return r;
|
---|
| 1836 | }
|
---|
| 1837 |
|
---|
| 1838 | #ifdef USE_NAMED_GROUP
|
---|
| 1839 |
|
---|
| 1840 | static int
|
---|
| 1841 | noname_disable_map(Node** plink, GroupNumRemap* map, int* counter)
|
---|
| 1842 | {
|
---|
| 1843 | int r = 0;
|
---|
| 1844 | Node* node = *plink;
|
---|
| 1845 |
|
---|
| 1846 | switch (NTYPE(node)) {
|
---|
| 1847 | case NT_LIST:
|
---|
| 1848 | case NT_ALT:
|
---|
| 1849 | do {
|
---|
| 1850 | r = noname_disable_map(&(NCAR(node)), map, counter);
|
---|
| 1851 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1852 | break;
|
---|
| 1853 |
|
---|
| 1854 | case NT_QTFR:
|
---|
| 1855 | {
|
---|
| 1856 | Node** ptarget = &(NQTFR(node)->target);
|
---|
| 1857 | Node* old = *ptarget;
|
---|
| 1858 | r = noname_disable_map(ptarget, map, counter);
|
---|
| 1859 | if (*ptarget != old && NTYPE(*ptarget) == NT_QTFR) {
|
---|
| 1860 | onig_reduce_nested_quantifier(node, *ptarget);
|
---|
| 1861 | }
|
---|
| 1862 | }
|
---|
| 1863 | break;
|
---|
| 1864 |
|
---|
| 1865 | case NT_ENCLOSE:
|
---|
| 1866 | {
|
---|
| 1867 | EncloseNode* en = NENCLOSE(node);
|
---|
| 1868 | if (en->type == ENCLOSE_MEMORY) {
|
---|
| 1869 | if (IS_ENCLOSE_NAMED_GROUP(en)) {
|
---|
| 1870 | (*counter)++;
|
---|
| 1871 | map[en->regnum].new_val = *counter;
|
---|
| 1872 | en->regnum = *counter;
|
---|
| 1873 | }
|
---|
| 1874 | else if (en->regnum != 0) {
|
---|
| 1875 | *plink = en->target;
|
---|
| 1876 | en->target = NULL_NODE;
|
---|
| 1877 | onig_node_free(node);
|
---|
| 1878 | r = noname_disable_map(plink, map, counter);
|
---|
| 1879 | break;
|
---|
| 1880 | }
|
---|
| 1881 | }
|
---|
| 1882 | r = noname_disable_map(&(en->target), map, counter);
|
---|
| 1883 | }
|
---|
| 1884 | break;
|
---|
| 1885 |
|
---|
| 1886 | case NT_ANCHOR:
|
---|
| 1887 | {
|
---|
| 1888 | AnchorNode* an = NANCHOR(node);
|
---|
| 1889 | switch (an->type) {
|
---|
| 1890 | case ANCHOR_PREC_READ:
|
---|
| 1891 | case ANCHOR_PREC_READ_NOT:
|
---|
| 1892 | case ANCHOR_LOOK_BEHIND:
|
---|
| 1893 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 1894 | r = noname_disable_map(&(an->target), map, counter);
|
---|
| 1895 | break;
|
---|
| 1896 | }
|
---|
| 1897 | }
|
---|
| 1898 | break;
|
---|
| 1899 |
|
---|
| 1900 | default:
|
---|
| 1901 | break;
|
---|
| 1902 | }
|
---|
| 1903 |
|
---|
| 1904 | return r;
|
---|
| 1905 | }
|
---|
| 1906 |
|
---|
| 1907 | static int
|
---|
| 1908 | renumber_node_backref(Node* node, GroupNumRemap* map)
|
---|
| 1909 | {
|
---|
| 1910 | int i, pos, n, old_num;
|
---|
| 1911 | int *backs;
|
---|
| 1912 | BRefNode* bn = NBREF(node);
|
---|
| 1913 |
|
---|
| 1914 | if (! IS_BACKREF_NAME_REF(bn))
|
---|
| 1915 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 1916 |
|
---|
| 1917 | old_num = bn->back_num;
|
---|
| 1918 | if (IS_NULL(bn->back_dynamic))
|
---|
| 1919 | backs = bn->back_static;
|
---|
| 1920 | else
|
---|
| 1921 | backs = bn->back_dynamic;
|
---|
| 1922 |
|
---|
| 1923 | for (i = 0, pos = 0; i < old_num; i++) {
|
---|
| 1924 | n = map[backs[i]].new_val;
|
---|
| 1925 | if (n > 0) {
|
---|
| 1926 | backs[pos] = n;
|
---|
| 1927 | pos++;
|
---|
| 1928 | }
|
---|
| 1929 | }
|
---|
| 1930 |
|
---|
| 1931 | bn->back_num = pos;
|
---|
| 1932 | return 0;
|
---|
| 1933 | }
|
---|
| 1934 |
|
---|
| 1935 | static int
|
---|
| 1936 | renumber_by_map(Node* node, GroupNumRemap* map)
|
---|
| 1937 | {
|
---|
| 1938 | int r = 0;
|
---|
| 1939 |
|
---|
| 1940 | switch (NTYPE(node)) {
|
---|
| 1941 | case NT_LIST:
|
---|
| 1942 | case NT_ALT:
|
---|
| 1943 | do {
|
---|
| 1944 | r = renumber_by_map(NCAR(node), map);
|
---|
| 1945 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1946 | break;
|
---|
| 1947 | case NT_QTFR:
|
---|
| 1948 | r = renumber_by_map(NQTFR(node)->target, map);
|
---|
| 1949 | break;
|
---|
| 1950 | case NT_ENCLOSE:
|
---|
| 1951 | {
|
---|
| 1952 | EncloseNode* en = NENCLOSE(node);
|
---|
| 1953 | if (en->type == ENCLOSE_CONDITION)
|
---|
| 1954 | en->regnum = map[en->regnum].new_val;
|
---|
| 1955 | r = renumber_by_map(en->target, map);
|
---|
| 1956 | }
|
---|
| 1957 | break;
|
---|
| 1958 |
|
---|
| 1959 | case NT_BREF:
|
---|
| 1960 | r = renumber_node_backref(node, map);
|
---|
| 1961 | break;
|
---|
| 1962 |
|
---|
| 1963 | case NT_ANCHOR:
|
---|
| 1964 | {
|
---|
| 1965 | AnchorNode* an = NANCHOR(node);
|
---|
| 1966 | switch (an->type) {
|
---|
| 1967 | case ANCHOR_PREC_READ:
|
---|
| 1968 | case ANCHOR_PREC_READ_NOT:
|
---|
| 1969 | case ANCHOR_LOOK_BEHIND:
|
---|
| 1970 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 1971 | r = renumber_by_map(an->target, map);
|
---|
| 1972 | break;
|
---|
| 1973 | }
|
---|
| 1974 | }
|
---|
| 1975 | break;
|
---|
| 1976 |
|
---|
| 1977 | default:
|
---|
| 1978 | break;
|
---|
| 1979 | }
|
---|
| 1980 |
|
---|
| 1981 | return r;
|
---|
| 1982 | }
|
---|
| 1983 |
|
---|
| 1984 | static int
|
---|
| 1985 | numbered_ref_check(Node* node)
|
---|
| 1986 | {
|
---|
| 1987 | int r = 0;
|
---|
| 1988 |
|
---|
| 1989 | switch (NTYPE(node)) {
|
---|
| 1990 | case NT_LIST:
|
---|
| 1991 | case NT_ALT:
|
---|
| 1992 | do {
|
---|
| 1993 | r = numbered_ref_check(NCAR(node));
|
---|
| 1994 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 1995 | break;
|
---|
| 1996 | case NT_QTFR:
|
---|
| 1997 | r = numbered_ref_check(NQTFR(node)->target);
|
---|
| 1998 | break;
|
---|
| 1999 | case NT_ENCLOSE:
|
---|
| 2000 | r = numbered_ref_check(NENCLOSE(node)->target);
|
---|
| 2001 | break;
|
---|
| 2002 |
|
---|
| 2003 | case NT_BREF:
|
---|
| 2004 | if (! IS_BACKREF_NAME_REF(NBREF(node)))
|
---|
| 2005 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 2006 | break;
|
---|
| 2007 |
|
---|
| 2008 | default:
|
---|
| 2009 | break;
|
---|
| 2010 | }
|
---|
| 2011 |
|
---|
| 2012 | return r;
|
---|
| 2013 | }
|
---|
| 2014 |
|
---|
| 2015 | static int
|
---|
| 2016 | disable_noname_group_capture_(Node** root, regex_t* reg, ScanEnv* env, GroupNumRemap* map)
|
---|
| 2017 | {
|
---|
| 2018 | int r, i, pos, counter;
|
---|
| 2019 | BitStatusType loc;
|
---|
| 2020 |
|
---|
| 2021 | CHECK_NULL_RETURN_MEMERR(map);
|
---|
| 2022 | for (i = 1; i <= env->num_mem; i++) {
|
---|
| 2023 | map[i].new_val = 0;
|
---|
| 2024 | }
|
---|
| 2025 | counter = 0;
|
---|
| 2026 | r = noname_disable_map(root, map, &counter);
|
---|
| 2027 | if (r != 0) return r;
|
---|
| 2028 |
|
---|
| 2029 | r = renumber_by_map(*root, map);
|
---|
| 2030 | if (r != 0) return r;
|
---|
| 2031 |
|
---|
| 2032 | for (i = 1, pos = 1; i <= env->num_mem; i++) {
|
---|
| 2033 | if (map[i].new_val > 0) {
|
---|
| 2034 | SCANENV_MEM_NODES(env)[pos] = SCANENV_MEM_NODES(env)[i];
|
---|
| 2035 | pos++;
|
---|
| 2036 | }
|
---|
| 2037 | }
|
---|
| 2038 |
|
---|
| 2039 | loc = env->capture_history;
|
---|
| 2040 | BIT_STATUS_CLEAR(env->capture_history);
|
---|
| 2041 | for (i = 1; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
|
---|
| 2042 | if (BIT_STATUS_AT(loc, i)) {
|
---|
| 2043 | BIT_STATUS_ON_AT_SIMPLE(env->capture_history, map[i].new_val);
|
---|
| 2044 | }
|
---|
| 2045 | }
|
---|
| 2046 |
|
---|
| 2047 | env->num_mem = env->num_named;
|
---|
| 2048 | reg->num_mem = env->num_named;
|
---|
| 2049 |
|
---|
| 2050 | return onig_renumber_name_table(reg, map);
|
---|
| 2051 | }
|
---|
| 2052 |
|
---|
| 2053 | static int
|
---|
| 2054 | disable_noname_group_capture(Node** root, regex_t* reg, ScanEnv* env)
|
---|
| 2055 | {
|
---|
| 2056 | GroupNumRemap* map = (GroupNumRemap*)xmalloc(sizeof(GroupNumRemap) * (env->num_mem + 1));
|
---|
| 2057 | int ret = disable_noname_group_capture_(root, reg, env, map);
|
---|
| 2058 | xfree(map);
|
---|
| 2059 | return ret;
|
---|
| 2060 | }
|
---|
| 2061 | #endif /* USE_NAMED_GROUP */
|
---|
| 2062 |
|
---|
| 2063 | #ifdef USE_SUBEXP_CALL
|
---|
| 2064 | static int
|
---|
| 2065 | unset_addr_list_fix(UnsetAddrList* uslist, regex_t* reg)
|
---|
| 2066 | {
|
---|
| 2067 | int i, offset;
|
---|
| 2068 | EncloseNode* en;
|
---|
| 2069 | AbsAddrType addr;
|
---|
| 2070 |
|
---|
| 2071 | for (i = 0; i < uslist->num; i++) {
|
---|
| 2072 | en = NENCLOSE(uslist->us[i].target);
|
---|
| 2073 | if (! IS_ENCLOSE_ADDR_FIXED(en)) return ONIGERR_PARSER_BUG;
|
---|
| 2074 | addr = en->call_addr;
|
---|
| 2075 | offset = uslist->us[i].offset;
|
---|
| 2076 |
|
---|
| 2077 | BBUF_WRITE(reg, offset, &addr, SIZE_ABSADDR);
|
---|
| 2078 | }
|
---|
| 2079 | return 0;
|
---|
| 2080 | }
|
---|
| 2081 | #endif
|
---|
| 2082 |
|
---|
| 2083 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
| 2084 | static int
|
---|
| 2085 | quantifiers_memory_node_info(Node* node)
|
---|
| 2086 | {
|
---|
| 2087 | int r = 0;
|
---|
| 2088 |
|
---|
| 2089 | switch (NTYPE(node)) {
|
---|
| 2090 | case NT_LIST:
|
---|
| 2091 | case NT_ALT:
|
---|
| 2092 | {
|
---|
| 2093 | int v;
|
---|
| 2094 | do {
|
---|
| 2095 | v = quantifiers_memory_node_info(NCAR(node));
|
---|
| 2096 | if (v > r) r = v;
|
---|
| 2097 | } while (v >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2098 | }
|
---|
| 2099 | break;
|
---|
| 2100 |
|
---|
| 2101 | #ifdef USE_SUBEXP_CALL
|
---|
| 2102 | case NT_CALL:
|
---|
| 2103 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
| 2104 | return NQ_TARGET_IS_EMPTY_REC; /* tiny version */
|
---|
| 2105 | }
|
---|
| 2106 | else
|
---|
| 2107 | r = quantifiers_memory_node_info(NCALL(node)->target);
|
---|
| 2108 | break;
|
---|
| 2109 | #endif
|
---|
| 2110 |
|
---|
| 2111 | case NT_QTFR:
|
---|
| 2112 | {
|
---|
| 2113 | QtfrNode* qn = NQTFR(node);
|
---|
| 2114 | if (qn->upper != 0) {
|
---|
| 2115 | r = quantifiers_memory_node_info(qn->target);
|
---|
| 2116 | }
|
---|
| 2117 | }
|
---|
| 2118 | break;
|
---|
| 2119 |
|
---|
| 2120 | case NT_ENCLOSE:
|
---|
| 2121 | {
|
---|
| 2122 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2123 | switch (en->type) {
|
---|
| 2124 | case ENCLOSE_MEMORY:
|
---|
| 2125 | return NQ_TARGET_IS_EMPTY_MEM;
|
---|
| 2126 | break;
|
---|
| 2127 |
|
---|
| 2128 | case ENCLOSE_OPTION:
|
---|
| 2129 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2130 | case ENCLOSE_CONDITION:
|
---|
| 2131 | r = quantifiers_memory_node_info(en->target);
|
---|
| 2132 | break;
|
---|
| 2133 | default:
|
---|
| 2134 | break;
|
---|
| 2135 | }
|
---|
| 2136 | }
|
---|
| 2137 | break;
|
---|
| 2138 |
|
---|
| 2139 | case NT_BREF:
|
---|
| 2140 | case NT_STR:
|
---|
| 2141 | case NT_CTYPE:
|
---|
| 2142 | case NT_CCLASS:
|
---|
| 2143 | case NT_CANY:
|
---|
| 2144 | case NT_ANCHOR:
|
---|
| 2145 | default:
|
---|
| 2146 | break;
|
---|
| 2147 | }
|
---|
| 2148 |
|
---|
| 2149 | return r;
|
---|
| 2150 | }
|
---|
| 2151 | #endif /* USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT */
|
---|
| 2152 |
|
---|
| 2153 | static int
|
---|
| 2154 | get_min_match_length(Node* node, OnigDistance *min, ScanEnv* env)
|
---|
| 2155 | {
|
---|
| 2156 | OnigDistance tmin;
|
---|
| 2157 | int r = 0;
|
---|
| 2158 |
|
---|
| 2159 | *min = 0;
|
---|
| 2160 | switch (NTYPE(node)) {
|
---|
| 2161 | case NT_BREF:
|
---|
| 2162 | {
|
---|
| 2163 | int i;
|
---|
| 2164 | int* backs;
|
---|
| 2165 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 2166 | BRefNode* br = NBREF(node);
|
---|
| 2167 | if (br->state & NST_RECURSION) break;
|
---|
| 2168 |
|
---|
| 2169 | backs = BACKREFS_P(br);
|
---|
| 2170 | if (backs[0] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 2171 | r = get_min_match_length(nodes[backs[0]], min, env);
|
---|
| 2172 | if (r != 0) break;
|
---|
| 2173 | for (i = 1; i < br->back_num; i++) {
|
---|
| 2174 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 2175 | r = get_min_match_length(nodes[backs[i]], &tmin, env);
|
---|
| 2176 | if (r != 0) break;
|
---|
| 2177 | if (*min > tmin) *min = tmin;
|
---|
| 2178 | }
|
---|
| 2179 | }
|
---|
| 2180 | break;
|
---|
| 2181 |
|
---|
| 2182 | #ifdef USE_SUBEXP_CALL
|
---|
| 2183 | case NT_CALL:
|
---|
| 2184 | if (IS_CALL_RECURSION(NCALL(node))) {
|
---|
| 2185 | EncloseNode* en = NENCLOSE(NCALL(node)->target);
|
---|
| 2186 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
| 2187 | *min = en->min_len;
|
---|
| 2188 | }
|
---|
| 2189 | else
|
---|
| 2190 | r = get_min_match_length(NCALL(node)->target, min, env);
|
---|
| 2191 | break;
|
---|
| 2192 | #endif
|
---|
| 2193 |
|
---|
| 2194 | case NT_LIST:
|
---|
| 2195 | do {
|
---|
| 2196 | r = get_min_match_length(NCAR(node), &tmin, env);
|
---|
| 2197 | if (r == 0) *min += tmin;
|
---|
| 2198 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2199 | break;
|
---|
| 2200 |
|
---|
| 2201 | case NT_ALT:
|
---|
| 2202 | {
|
---|
| 2203 | Node *x, *y;
|
---|
| 2204 | y = node;
|
---|
| 2205 | do {
|
---|
| 2206 | x = NCAR(y);
|
---|
| 2207 | r = get_min_match_length(x, &tmin, env);
|
---|
| 2208 | if (r != 0) break;
|
---|
| 2209 | if (y == node) *min = tmin;
|
---|
| 2210 | else if (*min > tmin) *min = tmin;
|
---|
| 2211 | } while (r == 0 && IS_NOT_NULL(y = NCDR(y)));
|
---|
| 2212 | }
|
---|
| 2213 | break;
|
---|
| 2214 |
|
---|
| 2215 | case NT_STR:
|
---|
| 2216 | {
|
---|
| 2217 | StrNode* sn = NSTR(node);
|
---|
| 2218 | *min = sn->end - sn->s;
|
---|
| 2219 | }
|
---|
| 2220 | break;
|
---|
| 2221 |
|
---|
| 2222 | case NT_CTYPE:
|
---|
| 2223 | *min = 1;
|
---|
| 2224 | break;
|
---|
| 2225 |
|
---|
| 2226 | case NT_CCLASS:
|
---|
| 2227 | case NT_CANY:
|
---|
| 2228 | *min = 1;
|
---|
| 2229 | break;
|
---|
| 2230 |
|
---|
| 2231 | case NT_QTFR:
|
---|
| 2232 | {
|
---|
| 2233 | QtfrNode* qn = NQTFR(node);
|
---|
| 2234 |
|
---|
| 2235 | if (qn->lower > 0) {
|
---|
| 2236 | r = get_min_match_length(qn->target, min, env);
|
---|
| 2237 | if (r == 0)
|
---|
| 2238 | *min = distance_multiply(*min, qn->lower);
|
---|
| 2239 | }
|
---|
| 2240 | }
|
---|
| 2241 | break;
|
---|
| 2242 |
|
---|
| 2243 | case NT_ENCLOSE:
|
---|
| 2244 | {
|
---|
| 2245 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2246 | switch (en->type) {
|
---|
| 2247 | case ENCLOSE_MEMORY:
|
---|
| 2248 | #ifdef USE_SUBEXP_CALL
|
---|
| 2249 | if (IS_ENCLOSE_MIN_FIXED(en))
|
---|
| 2250 | *min = en->min_len;
|
---|
| 2251 | else {
|
---|
| 2252 | r = get_min_match_length(en->target, min, env);
|
---|
| 2253 | if (r == 0) {
|
---|
| 2254 | en->min_len = *min;
|
---|
| 2255 | SET_ENCLOSE_STATUS(node, NST_MIN_FIXED);
|
---|
| 2256 | }
|
---|
| 2257 | }
|
---|
| 2258 | break;
|
---|
| 2259 | #endif
|
---|
| 2260 | case ENCLOSE_OPTION:
|
---|
| 2261 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2262 | case ENCLOSE_CONDITION:
|
---|
| 2263 | r = get_min_match_length(en->target, min, env);
|
---|
| 2264 | break;
|
---|
| 2265 | }
|
---|
| 2266 | }
|
---|
| 2267 | break;
|
---|
| 2268 |
|
---|
| 2269 | case NT_ANCHOR:
|
---|
| 2270 | default:
|
---|
| 2271 | break;
|
---|
| 2272 | }
|
---|
| 2273 |
|
---|
| 2274 | return r;
|
---|
| 2275 | }
|
---|
| 2276 |
|
---|
| 2277 | static int
|
---|
| 2278 | get_max_match_length(Node* node, OnigDistance *max, ScanEnv* env)
|
---|
| 2279 | {
|
---|
| 2280 | OnigDistance tmax;
|
---|
| 2281 | int r = 0;
|
---|
| 2282 |
|
---|
| 2283 | *max = 0;
|
---|
| 2284 | switch (NTYPE(node)) {
|
---|
| 2285 | case NT_LIST:
|
---|
| 2286 | do {
|
---|
| 2287 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
| 2288 | if (r == 0)
|
---|
| 2289 | *max = distance_add(*max, tmax);
|
---|
| 2290 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2291 | break;
|
---|
| 2292 |
|
---|
| 2293 | case NT_ALT:
|
---|
| 2294 | do {
|
---|
| 2295 | r = get_max_match_length(NCAR(node), &tmax, env);
|
---|
| 2296 | if (r == 0 && *max < tmax) *max = tmax;
|
---|
| 2297 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2298 | break;
|
---|
| 2299 |
|
---|
| 2300 | case NT_STR:
|
---|
| 2301 | {
|
---|
| 2302 | StrNode* sn = NSTR(node);
|
---|
| 2303 | *max = sn->end - sn->s;
|
---|
| 2304 | }
|
---|
| 2305 | break;
|
---|
| 2306 |
|
---|
| 2307 | case NT_CTYPE:
|
---|
| 2308 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 2309 | break;
|
---|
| 2310 |
|
---|
| 2311 | case NT_CCLASS:
|
---|
| 2312 | case NT_CANY:
|
---|
| 2313 | *max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 2314 | break;
|
---|
| 2315 |
|
---|
| 2316 | case NT_BREF:
|
---|
| 2317 | {
|
---|
| 2318 | int i;
|
---|
| 2319 | int* backs;
|
---|
| 2320 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 2321 | BRefNode* br = NBREF(node);
|
---|
| 2322 | if (br->state & NST_RECURSION) {
|
---|
| 2323 | *max = ONIG_INFINITE_DISTANCE;
|
---|
| 2324 | break;
|
---|
| 2325 | }
|
---|
| 2326 | backs = BACKREFS_P(br);
|
---|
| 2327 | for (i = 0; i < br->back_num; i++) {
|
---|
| 2328 | if (backs[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 2329 | r = get_max_match_length(nodes[backs[i]], &tmax, env);
|
---|
| 2330 | if (r != 0) break;
|
---|
| 2331 | if (*max < tmax) *max = tmax;
|
---|
| 2332 | }
|
---|
| 2333 | }
|
---|
| 2334 | break;
|
---|
| 2335 |
|
---|
| 2336 | #ifdef USE_SUBEXP_CALL
|
---|
| 2337 | case NT_CALL:
|
---|
| 2338 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
| 2339 | r = get_max_match_length(NCALL(node)->target, max, env);
|
---|
| 2340 | else
|
---|
| 2341 | *max = ONIG_INFINITE_DISTANCE;
|
---|
| 2342 | break;
|
---|
| 2343 | #endif
|
---|
| 2344 |
|
---|
| 2345 | case NT_QTFR:
|
---|
| 2346 | {
|
---|
| 2347 | QtfrNode* qn = NQTFR(node);
|
---|
| 2348 |
|
---|
| 2349 | if (qn->upper != 0) {
|
---|
| 2350 | r = get_max_match_length(qn->target, max, env);
|
---|
| 2351 | if (r == 0 && *max != 0) {
|
---|
| 2352 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
| 2353 | *max = distance_multiply(*max, qn->upper);
|
---|
| 2354 | else
|
---|
| 2355 | *max = ONIG_INFINITE_DISTANCE;
|
---|
| 2356 | }
|
---|
| 2357 | }
|
---|
| 2358 | }
|
---|
| 2359 | break;
|
---|
| 2360 |
|
---|
| 2361 | case NT_ENCLOSE:
|
---|
| 2362 | {
|
---|
| 2363 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2364 | switch (en->type) {
|
---|
| 2365 | case ENCLOSE_MEMORY:
|
---|
| 2366 | #ifdef USE_SUBEXP_CALL
|
---|
| 2367 | if (IS_ENCLOSE_MAX_FIXED(en))
|
---|
| 2368 | *max = en->max_len;
|
---|
| 2369 | else {
|
---|
| 2370 | r = get_max_match_length(en->target, max, env);
|
---|
| 2371 | if (r == 0) {
|
---|
| 2372 | en->max_len = *max;
|
---|
| 2373 | SET_ENCLOSE_STATUS(node, NST_MAX_FIXED);
|
---|
| 2374 | }
|
---|
| 2375 | }
|
---|
| 2376 | break;
|
---|
| 2377 | #endif
|
---|
| 2378 | case ENCLOSE_OPTION:
|
---|
| 2379 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2380 | case ENCLOSE_CONDITION:
|
---|
| 2381 | r = get_max_match_length(en->target, max, env);
|
---|
| 2382 | break;
|
---|
| 2383 | }
|
---|
| 2384 | }
|
---|
| 2385 | break;
|
---|
| 2386 |
|
---|
| 2387 | case NT_ANCHOR:
|
---|
| 2388 | default:
|
---|
| 2389 | break;
|
---|
| 2390 | }
|
---|
| 2391 |
|
---|
| 2392 | return r;
|
---|
| 2393 | }
|
---|
| 2394 |
|
---|
| 2395 | #define GET_CHAR_LEN_VARLEN -1
|
---|
| 2396 | #define GET_CHAR_LEN_TOP_ALT_VARLEN -2
|
---|
| 2397 |
|
---|
| 2398 | /* fixed size pattern node only */
|
---|
| 2399 | static int
|
---|
| 2400 | get_char_length_tree1(Node* node, regex_t* reg, int* len, int level)
|
---|
| 2401 | {
|
---|
| 2402 | int tlen;
|
---|
| 2403 | int r = 0;
|
---|
| 2404 |
|
---|
| 2405 | level++;
|
---|
| 2406 | *len = 0;
|
---|
| 2407 | switch (NTYPE(node)) {
|
---|
| 2408 | case NT_LIST:
|
---|
| 2409 | do {
|
---|
| 2410 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
| 2411 | if (r == 0)
|
---|
| 2412 | *len = (int )distance_add(*len, tlen);
|
---|
| 2413 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2414 | break;
|
---|
| 2415 |
|
---|
| 2416 | case NT_ALT:
|
---|
| 2417 | {
|
---|
| 2418 | int tlen2;
|
---|
| 2419 | int varlen = 0;
|
---|
| 2420 |
|
---|
| 2421 | r = get_char_length_tree1(NCAR(node), reg, &tlen, level);
|
---|
| 2422 | while (r == 0 && IS_NOT_NULL(node = NCDR(node))) {
|
---|
| 2423 | r = get_char_length_tree1(NCAR(node), reg, &tlen2, level);
|
---|
| 2424 | if (r == 0) {
|
---|
| 2425 | if (tlen != tlen2)
|
---|
| 2426 | varlen = 1;
|
---|
| 2427 | }
|
---|
| 2428 | }
|
---|
| 2429 | if (r == 0) {
|
---|
| 2430 | if (varlen != 0) {
|
---|
| 2431 | if (level == 1)
|
---|
| 2432 | r = GET_CHAR_LEN_TOP_ALT_VARLEN;
|
---|
| 2433 | else
|
---|
| 2434 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2435 | }
|
---|
| 2436 | else
|
---|
| 2437 | *len = tlen;
|
---|
| 2438 | }
|
---|
| 2439 | }
|
---|
| 2440 | break;
|
---|
| 2441 |
|
---|
| 2442 | case NT_STR:
|
---|
| 2443 | {
|
---|
| 2444 | StrNode* sn = NSTR(node);
|
---|
| 2445 | UChar *s = sn->s;
|
---|
| 2446 | while (s < sn->end) {
|
---|
| 2447 | s += enclen(reg->enc, s);
|
---|
| 2448 | (*len)++;
|
---|
| 2449 | }
|
---|
| 2450 | }
|
---|
| 2451 | break;
|
---|
| 2452 |
|
---|
| 2453 | case NT_QTFR:
|
---|
| 2454 | {
|
---|
| 2455 | QtfrNode* qn = NQTFR(node);
|
---|
| 2456 | if (qn->lower == qn->upper) {
|
---|
| 2457 | r = get_char_length_tree1(qn->target, reg, &tlen, level);
|
---|
| 2458 | if (r == 0)
|
---|
| 2459 | *len = (int )distance_multiply(tlen, qn->lower);
|
---|
| 2460 | }
|
---|
| 2461 | else
|
---|
| 2462 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2463 | }
|
---|
| 2464 | break;
|
---|
| 2465 |
|
---|
| 2466 | #ifdef USE_SUBEXP_CALL
|
---|
| 2467 | case NT_CALL:
|
---|
| 2468 | if (! IS_CALL_RECURSION(NCALL(node)))
|
---|
| 2469 | r = get_char_length_tree1(NCALL(node)->target, reg, len, level);
|
---|
| 2470 | else
|
---|
| 2471 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2472 | break;
|
---|
| 2473 | #endif
|
---|
| 2474 |
|
---|
| 2475 | case NT_CTYPE:
|
---|
| 2476 | *len = 1;
|
---|
| 2477 | break;
|
---|
| 2478 |
|
---|
| 2479 | case NT_CCLASS:
|
---|
| 2480 | case NT_CANY:
|
---|
| 2481 | *len = 1;
|
---|
| 2482 | break;
|
---|
| 2483 |
|
---|
| 2484 | case NT_ENCLOSE:
|
---|
| 2485 | {
|
---|
| 2486 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2487 | switch (en->type) {
|
---|
| 2488 | case ENCLOSE_MEMORY:
|
---|
| 2489 | #ifdef USE_SUBEXP_CALL
|
---|
| 2490 | if (IS_ENCLOSE_CLEN_FIXED(en))
|
---|
| 2491 | *len = en->char_len;
|
---|
| 2492 | else {
|
---|
| 2493 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
| 2494 | if (r == 0) {
|
---|
| 2495 | en->char_len = *len;
|
---|
| 2496 | SET_ENCLOSE_STATUS(node, NST_CLEN_FIXED);
|
---|
| 2497 | }
|
---|
| 2498 | }
|
---|
| 2499 | break;
|
---|
| 2500 | #endif
|
---|
| 2501 | case ENCLOSE_OPTION:
|
---|
| 2502 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2503 | case ENCLOSE_CONDITION:
|
---|
| 2504 | r = get_char_length_tree1(en->target, reg, len, level);
|
---|
| 2505 | break;
|
---|
| 2506 | default:
|
---|
| 2507 | break;
|
---|
| 2508 | }
|
---|
| 2509 | }
|
---|
| 2510 | break;
|
---|
| 2511 |
|
---|
| 2512 | case NT_ANCHOR:
|
---|
| 2513 | break;
|
---|
| 2514 |
|
---|
| 2515 | default:
|
---|
| 2516 | r = GET_CHAR_LEN_VARLEN;
|
---|
| 2517 | break;
|
---|
| 2518 | }
|
---|
| 2519 |
|
---|
| 2520 | return r;
|
---|
| 2521 | }
|
---|
| 2522 |
|
---|
| 2523 | static int
|
---|
| 2524 | get_char_length_tree(Node* node, regex_t* reg, int* len)
|
---|
| 2525 | {
|
---|
| 2526 | return get_char_length_tree1(node, reg, len, 0);
|
---|
| 2527 | }
|
---|
| 2528 |
|
---|
| 2529 | /* x is not included y ==> 1 : 0 */
|
---|
| 2530 | static int
|
---|
| 2531 | is_not_included(Node* x, Node* y, regex_t* reg)
|
---|
| 2532 | {
|
---|
| 2533 | int i;
|
---|
| 2534 | OnigDistance len;
|
---|
| 2535 | OnigCodePoint code;
|
---|
| 2536 | UChar *p;
|
---|
| 2537 | int ytype;
|
---|
| 2538 |
|
---|
| 2539 | retry:
|
---|
| 2540 | ytype = NTYPE(y);
|
---|
| 2541 | switch (NTYPE(x)) {
|
---|
| 2542 | case NT_CTYPE:
|
---|
| 2543 | {
|
---|
| 2544 | switch (ytype) {
|
---|
| 2545 | case NT_CTYPE:
|
---|
| 2546 | if (NCTYPE(y)->ctype == NCTYPE(x)->ctype &&
|
---|
| 2547 | NCTYPE(y)->not != NCTYPE(x)->not &&
|
---|
| 2548 | NCTYPE(y)->ascii_range == NCTYPE(x)->ascii_range)
|
---|
| 2549 | return 1;
|
---|
| 2550 | else
|
---|
| 2551 | return 0;
|
---|
| 2552 | break;
|
---|
| 2553 |
|
---|
| 2554 | case NT_CCLASS:
|
---|
| 2555 | swap:
|
---|
| 2556 | {
|
---|
| 2557 | Node* tmp;
|
---|
| 2558 | tmp = x; x = y; y = tmp;
|
---|
| 2559 | goto retry;
|
---|
| 2560 | }
|
---|
| 2561 | break;
|
---|
| 2562 |
|
---|
| 2563 | case NT_STR:
|
---|
| 2564 | goto swap;
|
---|
| 2565 | break;
|
---|
| 2566 |
|
---|
| 2567 | default:
|
---|
| 2568 | break;
|
---|
| 2569 | }
|
---|
| 2570 | }
|
---|
| 2571 | break;
|
---|
| 2572 |
|
---|
| 2573 | case NT_CCLASS:
|
---|
| 2574 | {
|
---|
| 2575 | CClassNode* xc = NCCLASS(x);
|
---|
| 2576 | switch (ytype) {
|
---|
| 2577 | case NT_CTYPE:
|
---|
| 2578 | switch (NCTYPE(y)->ctype) {
|
---|
| 2579 | case ONIGENC_CTYPE_WORD:
|
---|
| 2580 | if (NCTYPE(y)->not == 0) {
|
---|
| 2581 | if (IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) {
|
---|
| 2582 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 2583 | if (BITSET_AT(xc->bs, i)) {
|
---|
| 2584 | if (NCTYPE(y)->ascii_range) {
|
---|
| 2585 | if (IS_CODE_SB_WORD(reg->enc, i)) return 0;
|
---|
| 2586 | }
|
---|
| 2587 | else {
|
---|
| 2588 | if (ONIGENC_IS_CODE_WORD(reg->enc, i)) return 0;
|
---|
| 2589 | }
|
---|
| 2590 | }
|
---|
| 2591 | }
|
---|
| 2592 | return 1;
|
---|
| 2593 | }
|
---|
| 2594 | return 0;
|
---|
| 2595 | }
|
---|
| 2596 | else {
|
---|
| 2597 | if (IS_NOT_NULL(xc->mbuf)) return 0;
|
---|
| 2598 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 2599 | int is_word;
|
---|
| 2600 | if (NCTYPE(y)->ascii_range)
|
---|
| 2601 | is_word = IS_CODE_SB_WORD(reg->enc, i);
|
---|
| 2602 | else
|
---|
| 2603 | is_word = ONIGENC_IS_CODE_WORD(reg->enc, i);
|
---|
| 2604 | if (! is_word) {
|
---|
| 2605 | if (!IS_NCCLASS_NOT(xc)) {
|
---|
| 2606 | if (BITSET_AT(xc->bs, i))
|
---|
| 2607 | return 0;
|
---|
| 2608 | }
|
---|
| 2609 | else {
|
---|
| 2610 | if (! BITSET_AT(xc->bs, i))
|
---|
| 2611 | return 0;
|
---|
| 2612 | }
|
---|
| 2613 | }
|
---|
| 2614 | }
|
---|
| 2615 | return 1;
|
---|
| 2616 | }
|
---|
| 2617 | break;
|
---|
| 2618 |
|
---|
| 2619 | default:
|
---|
| 2620 | break;
|
---|
| 2621 | }
|
---|
| 2622 | break;
|
---|
| 2623 |
|
---|
| 2624 | case NT_CCLASS:
|
---|
| 2625 | {
|
---|
| 2626 | int v;
|
---|
| 2627 | CClassNode* yc = NCCLASS(y);
|
---|
| 2628 |
|
---|
| 2629 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 2630 | v = BITSET_AT(xc->bs, i);
|
---|
| 2631 | if ((v != 0 && !IS_NCCLASS_NOT(xc)) ||
|
---|
| 2632 | (v == 0 && IS_NCCLASS_NOT(xc))) {
|
---|
| 2633 | v = BITSET_AT(yc->bs, i);
|
---|
| 2634 | if ((v != 0 && !IS_NCCLASS_NOT(yc)) ||
|
---|
| 2635 | (v == 0 && IS_NCCLASS_NOT(yc)))
|
---|
| 2636 | return 0;
|
---|
| 2637 | }
|
---|
| 2638 | }
|
---|
| 2639 | if ((IS_NULL(xc->mbuf) && !IS_NCCLASS_NOT(xc)) ||
|
---|
| 2640 | (IS_NULL(yc->mbuf) && !IS_NCCLASS_NOT(yc)))
|
---|
| 2641 | return 1;
|
---|
| 2642 | return 0;
|
---|
| 2643 | }
|
---|
| 2644 | break;
|
---|
| 2645 |
|
---|
| 2646 | case NT_STR:
|
---|
| 2647 | goto swap;
|
---|
| 2648 | break;
|
---|
| 2649 |
|
---|
| 2650 | default:
|
---|
| 2651 | break;
|
---|
| 2652 | }
|
---|
| 2653 | }
|
---|
| 2654 | break;
|
---|
| 2655 |
|
---|
| 2656 | case NT_STR:
|
---|
| 2657 | {
|
---|
| 2658 | StrNode* xs = NSTR(x);
|
---|
| 2659 | if (NSTRING_LEN(x) == 0)
|
---|
| 2660 | break;
|
---|
| 2661 |
|
---|
| 2662 | switch (ytype) {
|
---|
| 2663 | case NT_CTYPE:
|
---|
| 2664 | switch (NCTYPE(y)->ctype) {
|
---|
| 2665 | case ONIGENC_CTYPE_WORD:
|
---|
| 2666 | if (NCTYPE(y)->ascii_range) {
|
---|
| 2667 | if (ONIGENC_IS_MBC_ASCII_WORD(reg->enc, xs->s, xs->end))
|
---|
| 2668 | return NCTYPE(y)->not;
|
---|
| 2669 | else
|
---|
| 2670 | return !(NCTYPE(y)->not);
|
---|
| 2671 | }
|
---|
| 2672 | else {
|
---|
| 2673 | if (ONIGENC_IS_MBC_WORD(reg->enc, xs->s, xs->end))
|
---|
| 2674 | return NCTYPE(y)->not;
|
---|
| 2675 | else
|
---|
| 2676 | return !(NCTYPE(y)->not);
|
---|
| 2677 | }
|
---|
| 2678 | break;
|
---|
| 2679 | default:
|
---|
| 2680 | break;
|
---|
| 2681 | }
|
---|
| 2682 | break;
|
---|
| 2683 |
|
---|
| 2684 | case NT_CCLASS:
|
---|
| 2685 | {
|
---|
| 2686 | CClassNode* cc = NCCLASS(y);
|
---|
| 2687 |
|
---|
| 2688 | code = ONIGENC_MBC_TO_CODE(reg->enc, xs->s,
|
---|
| 2689 | xs->s + ONIGENC_MBC_MAXLEN(reg->enc));
|
---|
| 2690 | return (onig_is_code_in_cc(reg->enc, code, cc) != 0 ? 0 : 1);
|
---|
| 2691 | }
|
---|
| 2692 | break;
|
---|
| 2693 |
|
---|
| 2694 | case NT_STR:
|
---|
| 2695 | {
|
---|
| 2696 | UChar *q;
|
---|
| 2697 | StrNode* ys = NSTR(y);
|
---|
| 2698 | len = NSTRING_LEN(x);
|
---|
| 2699 | if (len > NSTRING_LEN(y)) len = NSTRING_LEN(y);
|
---|
| 2700 | if (NSTRING_IS_AMBIG(x) || NSTRING_IS_AMBIG(y)) {
|
---|
| 2701 | /* tiny version */
|
---|
| 2702 | return 0;
|
---|
| 2703 | }
|
---|
| 2704 | else {
|
---|
| 2705 | for (i = 0, p = ys->s, q = xs->s; (OnigDistance )i < len; i++, p++, q++) {
|
---|
| 2706 | if (*p != *q) return 1;
|
---|
| 2707 | }
|
---|
| 2708 | }
|
---|
| 2709 | }
|
---|
| 2710 | break;
|
---|
| 2711 |
|
---|
| 2712 | default:
|
---|
| 2713 | break;
|
---|
| 2714 | }
|
---|
| 2715 | }
|
---|
| 2716 | break;
|
---|
| 2717 |
|
---|
| 2718 | default:
|
---|
| 2719 | break;
|
---|
| 2720 | }
|
---|
| 2721 |
|
---|
| 2722 | return 0;
|
---|
| 2723 | }
|
---|
| 2724 |
|
---|
| 2725 | static Node*
|
---|
| 2726 | get_head_value_node(Node* node, int exact, regex_t* reg)
|
---|
| 2727 | {
|
---|
| 2728 | Node* n = NULL_NODE;
|
---|
| 2729 |
|
---|
| 2730 | switch (NTYPE(node)) {
|
---|
| 2731 | case NT_BREF:
|
---|
| 2732 | case NT_ALT:
|
---|
| 2733 | case NT_CANY:
|
---|
| 2734 | #ifdef USE_SUBEXP_CALL
|
---|
| 2735 | case NT_CALL:
|
---|
| 2736 | #endif
|
---|
| 2737 | break;
|
---|
| 2738 |
|
---|
| 2739 | case NT_CTYPE:
|
---|
| 2740 | case NT_CCLASS:
|
---|
| 2741 | if (exact == 0) {
|
---|
| 2742 | n = node;
|
---|
| 2743 | }
|
---|
| 2744 | break;
|
---|
| 2745 |
|
---|
| 2746 | case NT_LIST:
|
---|
| 2747 | n = get_head_value_node(NCAR(node), exact, reg);
|
---|
| 2748 | break;
|
---|
| 2749 |
|
---|
| 2750 | case NT_STR:
|
---|
| 2751 | {
|
---|
| 2752 | StrNode* sn = NSTR(node);
|
---|
| 2753 |
|
---|
| 2754 | if (sn->end <= sn->s)
|
---|
| 2755 | break;
|
---|
| 2756 |
|
---|
| 2757 | if (exact != 0 &&
|
---|
| 2758 | !NSTRING_IS_RAW(node) && IS_IGNORECASE(reg->options)) {
|
---|
| 2759 | }
|
---|
| 2760 | else {
|
---|
| 2761 | n = node;
|
---|
| 2762 | }
|
---|
| 2763 | }
|
---|
| 2764 | break;
|
---|
| 2765 |
|
---|
| 2766 | case NT_QTFR:
|
---|
| 2767 | {
|
---|
| 2768 | QtfrNode* qn = NQTFR(node);
|
---|
| 2769 | if (qn->lower > 0) {
|
---|
| 2770 | if (IS_NOT_NULL(qn->head_exact))
|
---|
| 2771 | n = qn->head_exact;
|
---|
| 2772 | else
|
---|
| 2773 | n = get_head_value_node(qn->target, exact, reg);
|
---|
| 2774 | }
|
---|
| 2775 | }
|
---|
| 2776 | break;
|
---|
| 2777 |
|
---|
| 2778 | case NT_ENCLOSE:
|
---|
| 2779 | {
|
---|
| 2780 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2781 | switch (en->type) {
|
---|
| 2782 | case ENCLOSE_OPTION:
|
---|
| 2783 | {
|
---|
| 2784 | OnigOptionType options = reg->options;
|
---|
| 2785 |
|
---|
| 2786 | reg->options = NENCLOSE(node)->option;
|
---|
| 2787 | n = get_head_value_node(NENCLOSE(node)->target, exact, reg);
|
---|
| 2788 | reg->options = options;
|
---|
| 2789 | }
|
---|
| 2790 | break;
|
---|
| 2791 |
|
---|
| 2792 | case ENCLOSE_MEMORY:
|
---|
| 2793 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 2794 | case ENCLOSE_CONDITION:
|
---|
| 2795 | n = get_head_value_node(en->target, exact, reg);
|
---|
| 2796 | break;
|
---|
| 2797 | }
|
---|
| 2798 | }
|
---|
| 2799 | break;
|
---|
| 2800 |
|
---|
| 2801 | case NT_ANCHOR:
|
---|
| 2802 | if (NANCHOR(node)->type == ANCHOR_PREC_READ)
|
---|
| 2803 | n = get_head_value_node(NANCHOR(node)->target, exact, reg);
|
---|
| 2804 | break;
|
---|
| 2805 |
|
---|
| 2806 | default:
|
---|
| 2807 | break;
|
---|
| 2808 | }
|
---|
| 2809 |
|
---|
| 2810 | return n;
|
---|
| 2811 | }
|
---|
| 2812 |
|
---|
| 2813 | static int
|
---|
| 2814 | check_type_tree(Node* node, int type_mask, int enclose_mask, int anchor_mask)
|
---|
| 2815 | {
|
---|
| 2816 | int type, r = 0;
|
---|
| 2817 |
|
---|
| 2818 | type = NTYPE(node);
|
---|
| 2819 | if ((NTYPE2BIT(type) & type_mask) == 0)
|
---|
| 2820 | return 1;
|
---|
| 2821 |
|
---|
| 2822 | switch (type) {
|
---|
| 2823 | case NT_LIST:
|
---|
| 2824 | case NT_ALT:
|
---|
| 2825 | do {
|
---|
| 2826 | r = check_type_tree(NCAR(node), type_mask, enclose_mask,
|
---|
| 2827 | anchor_mask);
|
---|
| 2828 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2829 | break;
|
---|
| 2830 |
|
---|
| 2831 | case NT_QTFR:
|
---|
| 2832 | r = check_type_tree(NQTFR(node)->target, type_mask, enclose_mask,
|
---|
| 2833 | anchor_mask);
|
---|
| 2834 | break;
|
---|
| 2835 |
|
---|
| 2836 | case NT_ENCLOSE:
|
---|
| 2837 | {
|
---|
| 2838 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2839 | if ((en->type & enclose_mask) == 0)
|
---|
| 2840 | return 1;
|
---|
| 2841 |
|
---|
| 2842 | r = check_type_tree(en->target, type_mask, enclose_mask, anchor_mask);
|
---|
| 2843 | }
|
---|
| 2844 | break;
|
---|
| 2845 |
|
---|
| 2846 | case NT_ANCHOR:
|
---|
| 2847 | type = NANCHOR(node)->type;
|
---|
| 2848 | if ((type & anchor_mask) == 0)
|
---|
| 2849 | return 1;
|
---|
| 2850 |
|
---|
| 2851 | if (NANCHOR(node)->target)
|
---|
| 2852 | r = check_type_tree(NANCHOR(node)->target,
|
---|
| 2853 | type_mask, enclose_mask, anchor_mask);
|
---|
| 2854 | break;
|
---|
| 2855 |
|
---|
| 2856 | default:
|
---|
| 2857 | break;
|
---|
| 2858 | }
|
---|
| 2859 | return r;
|
---|
| 2860 | }
|
---|
| 2861 |
|
---|
| 2862 | #ifdef USE_SUBEXP_CALL
|
---|
| 2863 |
|
---|
| 2864 | #define RECURSION_EXIST 1
|
---|
| 2865 | #define RECURSION_INFINITE 2
|
---|
| 2866 |
|
---|
| 2867 | static int
|
---|
| 2868 | subexp_inf_recursive_check(Node* node, ScanEnv* env, int head)
|
---|
| 2869 | {
|
---|
| 2870 | int type;
|
---|
| 2871 | int r = 0;
|
---|
| 2872 |
|
---|
| 2873 | type = NTYPE(node);
|
---|
| 2874 | switch (type) {
|
---|
| 2875 | case NT_LIST:
|
---|
| 2876 | {
|
---|
| 2877 | Node *x;
|
---|
| 2878 | OnigDistance min;
|
---|
| 2879 | int ret;
|
---|
| 2880 |
|
---|
| 2881 | x = node;
|
---|
| 2882 | do {
|
---|
| 2883 | ret = subexp_inf_recursive_check(NCAR(x), env, head);
|
---|
| 2884 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
| 2885 | r |= ret;
|
---|
| 2886 | if (head) {
|
---|
| 2887 | ret = get_min_match_length(NCAR(x), &min, env);
|
---|
| 2888 | if (ret != 0) return ret;
|
---|
| 2889 | if (min != 0) head = 0;
|
---|
| 2890 | }
|
---|
| 2891 | } while (IS_NOT_NULL(x = NCDR(x)));
|
---|
| 2892 | }
|
---|
| 2893 | break;
|
---|
| 2894 |
|
---|
| 2895 | case NT_ALT:
|
---|
| 2896 | {
|
---|
| 2897 | int ret;
|
---|
| 2898 | r = RECURSION_EXIST;
|
---|
| 2899 | do {
|
---|
| 2900 | ret = subexp_inf_recursive_check(NCAR(node), env, head);
|
---|
| 2901 | if (ret < 0 || ret == RECURSION_INFINITE) return ret;
|
---|
| 2902 | r &= ret;
|
---|
| 2903 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2904 | }
|
---|
| 2905 | break;
|
---|
| 2906 |
|
---|
| 2907 | case NT_QTFR:
|
---|
| 2908 | r = subexp_inf_recursive_check(NQTFR(node)->target, env, head);
|
---|
| 2909 | if (r == RECURSION_EXIST) {
|
---|
| 2910 | if (NQTFR(node)->lower == 0) r = 0;
|
---|
| 2911 | }
|
---|
| 2912 | break;
|
---|
| 2913 |
|
---|
| 2914 | case NT_ANCHOR:
|
---|
| 2915 | {
|
---|
| 2916 | AnchorNode* an = NANCHOR(node);
|
---|
| 2917 | switch (an->type) {
|
---|
| 2918 | case ANCHOR_PREC_READ:
|
---|
| 2919 | case ANCHOR_PREC_READ_NOT:
|
---|
| 2920 | case ANCHOR_LOOK_BEHIND:
|
---|
| 2921 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 2922 | r = subexp_inf_recursive_check(an->target, env, head);
|
---|
| 2923 | break;
|
---|
| 2924 | }
|
---|
| 2925 | }
|
---|
| 2926 | break;
|
---|
| 2927 |
|
---|
| 2928 | case NT_CALL:
|
---|
| 2929 | r = subexp_inf_recursive_check(NCALL(node)->target, env, head);
|
---|
| 2930 | break;
|
---|
| 2931 |
|
---|
| 2932 | case NT_ENCLOSE:
|
---|
| 2933 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
| 2934 | return 0;
|
---|
| 2935 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
| 2936 | return (head == 0 ? RECURSION_EXIST : RECURSION_INFINITE);
|
---|
| 2937 | else {
|
---|
| 2938 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 2939 | r = subexp_inf_recursive_check(NENCLOSE(node)->target, env, head);
|
---|
| 2940 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 2941 | }
|
---|
| 2942 | break;
|
---|
| 2943 |
|
---|
| 2944 | default:
|
---|
| 2945 | break;
|
---|
| 2946 | }
|
---|
| 2947 |
|
---|
| 2948 | return r;
|
---|
| 2949 | }
|
---|
| 2950 |
|
---|
| 2951 | static int
|
---|
| 2952 | subexp_inf_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
| 2953 | {
|
---|
| 2954 | int type;
|
---|
| 2955 | int r = 0;
|
---|
| 2956 |
|
---|
| 2957 | type = NTYPE(node);
|
---|
| 2958 | switch (type) {
|
---|
| 2959 | case NT_LIST:
|
---|
| 2960 | case NT_ALT:
|
---|
| 2961 | do {
|
---|
| 2962 | r = subexp_inf_recursive_check_trav(NCAR(node), env);
|
---|
| 2963 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 2964 | break;
|
---|
| 2965 |
|
---|
| 2966 | case NT_QTFR:
|
---|
| 2967 | r = subexp_inf_recursive_check_trav(NQTFR(node)->target, env);
|
---|
| 2968 | break;
|
---|
| 2969 |
|
---|
| 2970 | case NT_ANCHOR:
|
---|
| 2971 | {
|
---|
| 2972 | AnchorNode* an = NANCHOR(node);
|
---|
| 2973 | switch (an->type) {
|
---|
| 2974 | case ANCHOR_PREC_READ:
|
---|
| 2975 | case ANCHOR_PREC_READ_NOT:
|
---|
| 2976 | case ANCHOR_LOOK_BEHIND:
|
---|
| 2977 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 2978 | r = subexp_inf_recursive_check_trav(an->target, env);
|
---|
| 2979 | break;
|
---|
| 2980 | }
|
---|
| 2981 | }
|
---|
| 2982 | break;
|
---|
| 2983 |
|
---|
| 2984 | case NT_ENCLOSE:
|
---|
| 2985 | {
|
---|
| 2986 | EncloseNode* en = NENCLOSE(node);
|
---|
| 2987 |
|
---|
| 2988 | if (IS_ENCLOSE_RECURSION(en)) {
|
---|
| 2989 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 2990 | r = subexp_inf_recursive_check(en->target, env, 1);
|
---|
| 2991 | if (r > 0) return ONIGERR_NEVER_ENDING_RECURSION;
|
---|
| 2992 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 2993 | }
|
---|
| 2994 | r = subexp_inf_recursive_check_trav(en->target, env);
|
---|
| 2995 | }
|
---|
| 2996 |
|
---|
| 2997 | break;
|
---|
| 2998 |
|
---|
| 2999 | default:
|
---|
| 3000 | break;
|
---|
| 3001 | }
|
---|
| 3002 |
|
---|
| 3003 | return r;
|
---|
| 3004 | }
|
---|
| 3005 |
|
---|
| 3006 | static int
|
---|
| 3007 | subexp_recursive_check(Node* node)
|
---|
| 3008 | {
|
---|
| 3009 | int r = 0;
|
---|
| 3010 |
|
---|
| 3011 | switch (NTYPE(node)) {
|
---|
| 3012 | case NT_LIST:
|
---|
| 3013 | case NT_ALT:
|
---|
| 3014 | do {
|
---|
| 3015 | r |= subexp_recursive_check(NCAR(node));
|
---|
| 3016 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3017 | break;
|
---|
| 3018 |
|
---|
| 3019 | case NT_QTFR:
|
---|
| 3020 | r = subexp_recursive_check(NQTFR(node)->target);
|
---|
| 3021 | break;
|
---|
| 3022 |
|
---|
| 3023 | case NT_ANCHOR:
|
---|
| 3024 | {
|
---|
| 3025 | AnchorNode* an = NANCHOR(node);
|
---|
| 3026 | switch (an->type) {
|
---|
| 3027 | case ANCHOR_PREC_READ:
|
---|
| 3028 | case ANCHOR_PREC_READ_NOT:
|
---|
| 3029 | case ANCHOR_LOOK_BEHIND:
|
---|
| 3030 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 3031 | r = subexp_recursive_check(an->target);
|
---|
| 3032 | break;
|
---|
| 3033 | }
|
---|
| 3034 | }
|
---|
| 3035 | break;
|
---|
| 3036 |
|
---|
| 3037 | case NT_CALL:
|
---|
| 3038 | r = subexp_recursive_check(NCALL(node)->target);
|
---|
| 3039 | if (r != 0) SET_CALL_RECURSION(node);
|
---|
| 3040 | break;
|
---|
| 3041 |
|
---|
| 3042 | case NT_ENCLOSE:
|
---|
| 3043 | if (IS_ENCLOSE_MARK2(NENCLOSE(node)))
|
---|
| 3044 | return 0;
|
---|
| 3045 | else if (IS_ENCLOSE_MARK1(NENCLOSE(node)))
|
---|
| 3046 | return 1; /* recursion */
|
---|
| 3047 | else {
|
---|
| 3048 | SET_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 3049 | r = subexp_recursive_check(NENCLOSE(node)->target);
|
---|
| 3050 | CLEAR_ENCLOSE_STATUS(node, NST_MARK2);
|
---|
| 3051 | }
|
---|
| 3052 | break;
|
---|
| 3053 |
|
---|
| 3054 | default:
|
---|
| 3055 | break;
|
---|
| 3056 | }
|
---|
| 3057 |
|
---|
| 3058 | return r;
|
---|
| 3059 | }
|
---|
| 3060 |
|
---|
| 3061 |
|
---|
| 3062 | static int
|
---|
| 3063 | subexp_recursive_check_trav(Node* node, ScanEnv* env)
|
---|
| 3064 | {
|
---|
| 3065 | #define FOUND_CALLED_NODE 1
|
---|
| 3066 |
|
---|
| 3067 | int type;
|
---|
| 3068 | int r = 0;
|
---|
| 3069 |
|
---|
| 3070 | type = NTYPE(node);
|
---|
| 3071 | switch (type) {
|
---|
| 3072 | case NT_LIST:
|
---|
| 3073 | case NT_ALT:
|
---|
| 3074 | {
|
---|
| 3075 | int ret;
|
---|
| 3076 | do {
|
---|
| 3077 | ret = subexp_recursive_check_trav(NCAR(node), env);
|
---|
| 3078 | if (ret == FOUND_CALLED_NODE) r = FOUND_CALLED_NODE;
|
---|
| 3079 | else if (ret < 0) return ret;
|
---|
| 3080 | } while (IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3081 | }
|
---|
| 3082 | break;
|
---|
| 3083 |
|
---|
| 3084 | case NT_QTFR:
|
---|
| 3085 | r = subexp_recursive_check_trav(NQTFR(node)->target, env);
|
---|
| 3086 | if (NQTFR(node)->upper == 0) {
|
---|
| 3087 | if (r == FOUND_CALLED_NODE)
|
---|
| 3088 | NQTFR(node)->is_refered = 1;
|
---|
| 3089 | }
|
---|
| 3090 | break;
|
---|
| 3091 |
|
---|
| 3092 | case NT_ANCHOR:
|
---|
| 3093 | {
|
---|
| 3094 | AnchorNode* an = NANCHOR(node);
|
---|
| 3095 | switch (an->type) {
|
---|
| 3096 | case ANCHOR_PREC_READ:
|
---|
| 3097 | case ANCHOR_PREC_READ_NOT:
|
---|
| 3098 | case ANCHOR_LOOK_BEHIND:
|
---|
| 3099 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 3100 | r = subexp_recursive_check_trav(an->target, env);
|
---|
| 3101 | break;
|
---|
| 3102 | }
|
---|
| 3103 | }
|
---|
| 3104 | break;
|
---|
| 3105 |
|
---|
| 3106 | case NT_ENCLOSE:
|
---|
| 3107 | {
|
---|
| 3108 | EncloseNode* en = NENCLOSE(node);
|
---|
| 3109 |
|
---|
| 3110 | if (! IS_ENCLOSE_RECURSION(en)) {
|
---|
| 3111 | if (IS_ENCLOSE_CALLED(en)) {
|
---|
| 3112 | SET_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 3113 | r = subexp_recursive_check(en->target);
|
---|
| 3114 | if (r != 0) SET_ENCLOSE_STATUS(node, NST_RECURSION);
|
---|
| 3115 | CLEAR_ENCLOSE_STATUS(node, NST_MARK1);
|
---|
| 3116 | }
|
---|
| 3117 | }
|
---|
| 3118 | r = subexp_recursive_check_trav(en->target, env);
|
---|
| 3119 | if (IS_ENCLOSE_CALLED(en))
|
---|
| 3120 | r |= FOUND_CALLED_NODE;
|
---|
| 3121 | }
|
---|
| 3122 | break;
|
---|
| 3123 |
|
---|
| 3124 | default:
|
---|
| 3125 | break;
|
---|
| 3126 | }
|
---|
| 3127 |
|
---|
| 3128 | return r;
|
---|
| 3129 | }
|
---|
| 3130 |
|
---|
| 3131 | static int
|
---|
| 3132 | setup_subexp_call(Node* node, ScanEnv* env)
|
---|
| 3133 | {
|
---|
| 3134 | int type;
|
---|
| 3135 | int r = 0;
|
---|
| 3136 |
|
---|
| 3137 | type = NTYPE(node);
|
---|
| 3138 | switch (type) {
|
---|
| 3139 | case NT_LIST:
|
---|
| 3140 | do {
|
---|
| 3141 | r = setup_subexp_call(NCAR(node), env);
|
---|
| 3142 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3143 | break;
|
---|
| 3144 |
|
---|
| 3145 | case NT_ALT:
|
---|
| 3146 | do {
|
---|
| 3147 | r = setup_subexp_call(NCAR(node), env);
|
---|
| 3148 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3149 | break;
|
---|
| 3150 |
|
---|
| 3151 | case NT_QTFR:
|
---|
| 3152 | r = setup_subexp_call(NQTFR(node)->target, env);
|
---|
| 3153 | break;
|
---|
| 3154 | case NT_ENCLOSE:
|
---|
| 3155 | r = setup_subexp_call(NENCLOSE(node)->target, env);
|
---|
| 3156 | break;
|
---|
| 3157 |
|
---|
| 3158 | case NT_CALL:
|
---|
| 3159 | {
|
---|
| 3160 | CallNode* cn = NCALL(node);
|
---|
| 3161 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 3162 |
|
---|
| 3163 | if (cn->group_num != 0) {
|
---|
| 3164 | int gnum = cn->group_num;
|
---|
| 3165 |
|
---|
| 3166 | #ifdef USE_NAMED_GROUP
|
---|
| 3167 | if (env->num_named > 0 &&
|
---|
| 3168 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
| 3169 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 3170 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 3171 | }
|
---|
| 3172 | #endif
|
---|
| 3173 | if (gnum > env->num_mem) {
|
---|
| 3174 | onig_scan_env_set_error_string(env,
|
---|
| 3175 | ONIGERR_UNDEFINED_GROUP_REFERENCE, cn->name, cn->name_end);
|
---|
| 3176 | return ONIGERR_UNDEFINED_GROUP_REFERENCE;
|
---|
| 3177 | }
|
---|
| 3178 |
|
---|
| 3179 | #ifdef USE_NAMED_GROUP
|
---|
| 3180 | set_call_attr:
|
---|
| 3181 | #endif
|
---|
| 3182 | cn->target = nodes[cn->group_num];
|
---|
| 3183 | if (IS_NULL(cn->target)) {
|
---|
| 3184 | onig_scan_env_set_error_string(env,
|
---|
| 3185 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
| 3186 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
| 3187 | }
|
---|
| 3188 | SET_ENCLOSE_STATUS(cn->target, NST_CALLED);
|
---|
| 3189 | BIT_STATUS_ON_AT(env->bt_mem_start, cn->group_num);
|
---|
| 3190 | cn->unset_addr_list = env->unset_addr_list;
|
---|
| 3191 | }
|
---|
| 3192 | #ifdef USE_NAMED_GROUP
|
---|
| 3193 | #ifdef USE_PERL_SUBEXP_CALL
|
---|
| 3194 | else if (cn->name == cn->name_end) {
|
---|
| 3195 | goto set_call_attr;
|
---|
| 3196 | }
|
---|
| 3197 | #endif
|
---|
| 3198 | else {
|
---|
| 3199 | int *refs;
|
---|
| 3200 |
|
---|
| 3201 | int n = onig_name_to_group_numbers(env->reg, cn->name, cn->name_end,
|
---|
| 3202 | &refs);
|
---|
| 3203 | if (n <= 0) {
|
---|
| 3204 | onig_scan_env_set_error_string(env,
|
---|
| 3205 | ONIGERR_UNDEFINED_NAME_REFERENCE, cn->name, cn->name_end);
|
---|
| 3206 | return ONIGERR_UNDEFINED_NAME_REFERENCE;
|
---|
| 3207 | }
|
---|
| 3208 | else if (n > 1 &&
|
---|
| 3209 | ! IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_MULTIPLEX_DEFINITION_NAME_CALL)) {
|
---|
| 3210 | onig_scan_env_set_error_string(env,
|
---|
| 3211 | ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL, cn->name, cn->name_end);
|
---|
| 3212 | return ONIGERR_MULTIPLEX_DEFINITION_NAME_CALL;
|
---|
| 3213 | }
|
---|
| 3214 | else {
|
---|
| 3215 | cn->group_num = refs[0];
|
---|
| 3216 | goto set_call_attr;
|
---|
| 3217 | }
|
---|
| 3218 | }
|
---|
| 3219 | #endif
|
---|
| 3220 | }
|
---|
| 3221 | break;
|
---|
| 3222 |
|
---|
| 3223 | case NT_ANCHOR:
|
---|
| 3224 | {
|
---|
| 3225 | AnchorNode* an = NANCHOR(node);
|
---|
| 3226 |
|
---|
| 3227 | switch (an->type) {
|
---|
| 3228 | case ANCHOR_PREC_READ:
|
---|
| 3229 | case ANCHOR_PREC_READ_NOT:
|
---|
| 3230 | case ANCHOR_LOOK_BEHIND:
|
---|
| 3231 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 3232 | r = setup_subexp_call(an->target, env);
|
---|
| 3233 | break;
|
---|
| 3234 | }
|
---|
| 3235 | }
|
---|
| 3236 | break;
|
---|
| 3237 |
|
---|
| 3238 | default:
|
---|
| 3239 | break;
|
---|
| 3240 | }
|
---|
| 3241 |
|
---|
| 3242 | return r;
|
---|
| 3243 | }
|
---|
| 3244 | #endif
|
---|
| 3245 |
|
---|
| 3246 | /* divide different length alternatives in look-behind.
|
---|
| 3247 | (?<=A|B) ==> (?<=A)|(?<=B)
|
---|
| 3248 | (?<!A|B) ==> (?<!A)(?<!B)
|
---|
| 3249 | */
|
---|
| 3250 | static int
|
---|
| 3251 | divide_look_behind_alternatives(Node* node)
|
---|
| 3252 | {
|
---|
| 3253 | Node *head, *np, *insert_node;
|
---|
| 3254 | AnchorNode* an = NANCHOR(node);
|
---|
| 3255 | int anc_type = an->type;
|
---|
| 3256 |
|
---|
| 3257 | head = an->target;
|
---|
| 3258 | np = NCAR(head);
|
---|
| 3259 | swap_node(node, head);
|
---|
| 3260 | NCAR(node) = head;
|
---|
| 3261 | NANCHOR(head)->target = np;
|
---|
| 3262 |
|
---|
| 3263 | np = node;
|
---|
| 3264 | while ((np = NCDR(np)) != NULL_NODE) {
|
---|
| 3265 | insert_node = onig_node_new_anchor(anc_type);
|
---|
| 3266 | CHECK_NULL_RETURN_MEMERR(insert_node);
|
---|
| 3267 | NANCHOR(insert_node)->target = NCAR(np);
|
---|
| 3268 | NCAR(np) = insert_node;
|
---|
| 3269 | }
|
---|
| 3270 |
|
---|
| 3271 | if (anc_type == ANCHOR_LOOK_BEHIND_NOT) {
|
---|
| 3272 | np = node;
|
---|
| 3273 | do {
|
---|
| 3274 | SET_NTYPE(np, NT_LIST); /* alt -> list */
|
---|
| 3275 | } while ((np = NCDR(np)) != NULL_NODE);
|
---|
| 3276 | }
|
---|
| 3277 | return 0;
|
---|
| 3278 | }
|
---|
| 3279 |
|
---|
| 3280 | static int
|
---|
| 3281 | setup_look_behind(Node* node, regex_t* reg, ScanEnv* env)
|
---|
| 3282 | {
|
---|
| 3283 | int r, len;
|
---|
| 3284 | AnchorNode* an = NANCHOR(node);
|
---|
| 3285 |
|
---|
| 3286 | r = get_char_length_tree(an->target, reg, &len);
|
---|
| 3287 | if (r == 0)
|
---|
| 3288 | an->char_len = len;
|
---|
| 3289 | else if (r == GET_CHAR_LEN_VARLEN)
|
---|
| 3290 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 3291 | else if (r == GET_CHAR_LEN_TOP_ALT_VARLEN) {
|
---|
| 3292 | if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_DIFFERENT_LEN_ALT_LOOK_BEHIND))
|
---|
| 3293 | r = divide_look_behind_alternatives(node);
|
---|
| 3294 | else
|
---|
| 3295 | r = ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 3296 | }
|
---|
| 3297 |
|
---|
| 3298 | return r;
|
---|
| 3299 | }
|
---|
| 3300 |
|
---|
| 3301 | static int
|
---|
| 3302 | next_setup(Node* node, Node* next_node, int in_root, regex_t* reg)
|
---|
| 3303 | {
|
---|
| 3304 | int type;
|
---|
| 3305 |
|
---|
| 3306 | retry:
|
---|
| 3307 | type = NTYPE(node);
|
---|
| 3308 | if (type == NT_QTFR) {
|
---|
| 3309 | QtfrNode* qn = NQTFR(node);
|
---|
| 3310 | if (qn->greedy && IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 3311 | #ifdef USE_QTFR_PEEK_NEXT
|
---|
| 3312 | Node* n = get_head_value_node(next_node, 1, reg);
|
---|
| 3313 | /* '\0': for UTF-16BE etc... */
|
---|
| 3314 | if (IS_NOT_NULL(n) && NSTR(n)->s[0] != '\0') {
|
---|
| 3315 | qn->next_head_exact = n;
|
---|
| 3316 | }
|
---|
| 3317 | #endif
|
---|
| 3318 | /* automatic possessification a*b ==> (?>a*)b */
|
---|
| 3319 | if (qn->lower <= 1) {
|
---|
| 3320 | int ttype = NTYPE(qn->target);
|
---|
| 3321 | if (IS_NODE_TYPE_SIMPLE(ttype)) {
|
---|
| 3322 | Node *x, *y;
|
---|
| 3323 | x = get_head_value_node(qn->target, 0, reg);
|
---|
| 3324 | if (IS_NOT_NULL(x)) {
|
---|
| 3325 | y = get_head_value_node(next_node, 0, reg);
|
---|
| 3326 | if (IS_NOT_NULL(y) && is_not_included(x, y, reg)) {
|
---|
| 3327 | Node* en = onig_node_new_enclose(ENCLOSE_STOP_BACKTRACK);
|
---|
| 3328 | CHECK_NULL_RETURN_MEMERR(en);
|
---|
| 3329 | SET_ENCLOSE_STATUS(en, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
| 3330 | swap_node(node, en);
|
---|
| 3331 | NENCLOSE(node)->target = en;
|
---|
| 3332 | }
|
---|
| 3333 | }
|
---|
| 3334 | }
|
---|
| 3335 | }
|
---|
| 3336 |
|
---|
| 3337 | #ifndef ONIG_DONT_OPTIMIZE
|
---|
| 3338 | if (NTYPE(node) == NT_QTFR && /* the type may be changed by above block */
|
---|
| 3339 | in_root && /* qn->lower == 0 && */
|
---|
| 3340 | NTYPE(qn->target) == NT_CANY &&
|
---|
| 3341 | ! IS_MULTILINE(reg->options)) {
|
---|
| 3342 | /* implicit anchor: /.*a/ ==> /(?:^|\G).*a/ */
|
---|
| 3343 | Node *np;
|
---|
| 3344 | np = onig_node_new_list(NULL_NODE, NULL_NODE);
|
---|
| 3345 | CHECK_NULL_RETURN_MEMERR(np);
|
---|
| 3346 | swap_node(node, np);
|
---|
| 3347 | NCDR(node) = onig_node_new_list(np, NULL_NODE);
|
---|
| 3348 | if (IS_NULL(NCDR(node))) {
|
---|
| 3349 | onig_node_free(np);
|
---|
| 3350 | return ONIGERR_MEMORY;
|
---|
| 3351 | }
|
---|
| 3352 | np = onig_node_new_anchor(ANCHOR_ANYCHAR_STAR); /* (?:^|\G) */
|
---|
| 3353 | CHECK_NULL_RETURN_MEMERR(np);
|
---|
| 3354 | NCAR(node) = np;
|
---|
| 3355 | }
|
---|
| 3356 | #endif
|
---|
| 3357 | }
|
---|
| 3358 | }
|
---|
| 3359 | else if (type == NT_ENCLOSE) {
|
---|
| 3360 | EncloseNode* en = NENCLOSE(node);
|
---|
| 3361 | in_root = 0;
|
---|
| 3362 | if (en->type == ENCLOSE_MEMORY) {
|
---|
| 3363 | node = en->target;
|
---|
| 3364 | goto retry;
|
---|
| 3365 | }
|
---|
| 3366 | }
|
---|
| 3367 | return 0;
|
---|
| 3368 | }
|
---|
| 3369 |
|
---|
| 3370 |
|
---|
| 3371 | static int
|
---|
| 3372 | update_string_node_case_fold(regex_t* reg, Node *node)
|
---|
| 3373 | {
|
---|
| 3374 | UChar *p, *end, buf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
| 3375 | UChar *sbuf, *ebuf, *sp;
|
---|
| 3376 | int r, i, len;
|
---|
| 3377 | OnigDistance sbuf_size;
|
---|
| 3378 | StrNode* sn = NSTR(node);
|
---|
| 3379 |
|
---|
| 3380 | end = sn->end;
|
---|
| 3381 | sbuf_size = (end - sn->s) * 2;
|
---|
| 3382 | sbuf = (UChar* )xmalloc(sbuf_size);
|
---|
| 3383 | CHECK_NULL_RETURN_MEMERR(sbuf);
|
---|
| 3384 | ebuf = sbuf + sbuf_size;
|
---|
| 3385 |
|
---|
| 3386 | sp = sbuf;
|
---|
| 3387 | p = sn->s;
|
---|
| 3388 | while (p < end) {
|
---|
| 3389 | len = ONIGENC_MBC_CASE_FOLD(reg->enc, reg->case_fold_flag, &p, end, buf);
|
---|
| 3390 | for (i = 0; i < len; i++) {
|
---|
| 3391 | if (sp >= ebuf) {
|
---|
| 3392 | UChar* p = (UChar* )xrealloc(sbuf, sbuf_size * 2);
|
---|
| 3393 | if (IS_NULL(p)) {
|
---|
| 3394 | xfree(sbuf);
|
---|
| 3395 | return ONIGERR_MEMORY;
|
---|
| 3396 | }
|
---|
| 3397 | sbuf = p;
|
---|
| 3398 | sp = sbuf + sbuf_size;
|
---|
| 3399 | sbuf_size *= 2;
|
---|
| 3400 | ebuf = sbuf + sbuf_size;
|
---|
| 3401 | }
|
---|
| 3402 |
|
---|
| 3403 | *sp++ = buf[i];
|
---|
| 3404 | }
|
---|
| 3405 | }
|
---|
| 3406 |
|
---|
| 3407 | r = onig_node_str_set(node, sbuf, sp);
|
---|
| 3408 |
|
---|
| 3409 | xfree(sbuf);
|
---|
| 3410 | return r;
|
---|
| 3411 | }
|
---|
| 3412 |
|
---|
| 3413 | static int
|
---|
| 3414 | expand_case_fold_make_rem_string(Node** rnode, UChar *s, UChar *end,
|
---|
| 3415 | regex_t* reg)
|
---|
| 3416 | {
|
---|
| 3417 | int r;
|
---|
| 3418 | Node *node;
|
---|
| 3419 |
|
---|
| 3420 | node = onig_node_new_str(s, end);
|
---|
| 3421 | if (IS_NULL(node)) return ONIGERR_MEMORY;
|
---|
| 3422 |
|
---|
| 3423 | r = update_string_node_case_fold(reg, node);
|
---|
| 3424 | if (r != 0) {
|
---|
| 3425 | onig_node_free(node);
|
---|
| 3426 | return r;
|
---|
| 3427 | }
|
---|
| 3428 |
|
---|
| 3429 | NSTRING_SET_AMBIG(node);
|
---|
| 3430 | NSTRING_SET_DONT_GET_OPT_INFO(node);
|
---|
| 3431 | *rnode = node;
|
---|
| 3432 | return 0;
|
---|
| 3433 | }
|
---|
| 3434 |
|
---|
| 3435 | static int
|
---|
| 3436 | is_case_fold_variable_len(int item_num, OnigCaseFoldCodeItem items[],
|
---|
| 3437 | int slen)
|
---|
| 3438 | {
|
---|
| 3439 | int i;
|
---|
| 3440 |
|
---|
| 3441 | for (i = 0; i < item_num; i++) {
|
---|
| 3442 | if (items[i].byte_len != slen) {
|
---|
| 3443 | return 1;
|
---|
| 3444 | }
|
---|
| 3445 | if (items[i].code_len != 1) {
|
---|
| 3446 | return 1;
|
---|
| 3447 | }
|
---|
| 3448 | }
|
---|
| 3449 | return 0;
|
---|
| 3450 | }
|
---|
| 3451 |
|
---|
| 3452 | static int
|
---|
| 3453 | expand_case_fold_string_alt(int item_num, OnigCaseFoldCodeItem items[],
|
---|
| 3454 | UChar *p, int slen, UChar *end,
|
---|
| 3455 | regex_t* reg, Node **rnode)
|
---|
| 3456 | {
|
---|
| 3457 | int r, i, j, len, varlen;
|
---|
| 3458 | Node *anode, *var_anode, *snode, *xnode, *an;
|
---|
| 3459 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
| 3460 |
|
---|
| 3461 | *rnode = var_anode = NULL_NODE;
|
---|
| 3462 |
|
---|
| 3463 | varlen = 0;
|
---|
| 3464 | for (i = 0; i < item_num; i++) {
|
---|
| 3465 | if (items[i].byte_len != slen) {
|
---|
| 3466 | varlen = 1;
|
---|
| 3467 | break;
|
---|
| 3468 | }
|
---|
| 3469 | }
|
---|
| 3470 |
|
---|
| 3471 | if (varlen != 0) {
|
---|
| 3472 | *rnode = var_anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3473 | if (IS_NULL(var_anode)) return ONIGERR_MEMORY;
|
---|
| 3474 |
|
---|
| 3475 | xnode = onig_node_new_list(NULL, NULL);
|
---|
| 3476 | if (IS_NULL(xnode)) goto mem_err;
|
---|
| 3477 | NCAR(var_anode) = xnode;
|
---|
| 3478 |
|
---|
| 3479 | anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3480 | if (IS_NULL(anode)) goto mem_err;
|
---|
| 3481 | NCAR(xnode) = anode;
|
---|
| 3482 | }
|
---|
| 3483 | else {
|
---|
| 3484 | *rnode = anode = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3485 | if (IS_NULL(anode)) return ONIGERR_MEMORY;
|
---|
| 3486 | }
|
---|
| 3487 |
|
---|
| 3488 | snode = onig_node_new_str(p, p + slen);
|
---|
| 3489 | if (IS_NULL(snode)) goto mem_err;
|
---|
| 3490 |
|
---|
| 3491 | NCAR(anode) = snode;
|
---|
| 3492 |
|
---|
| 3493 | for (i = 0; i < item_num; i++) {
|
---|
| 3494 | snode = onig_node_new_str(NULL, NULL);
|
---|
| 3495 | if (IS_NULL(snode)) goto mem_err;
|
---|
| 3496 |
|
---|
| 3497 | for (j = 0; j < items[i].code_len; j++) {
|
---|
| 3498 | len = ONIGENC_CODE_TO_MBC(reg->enc, items[i].code[j], buf);
|
---|
| 3499 | if (len < 0) {
|
---|
| 3500 | r = len;
|
---|
| 3501 | goto mem_err2;
|
---|
| 3502 | }
|
---|
| 3503 |
|
---|
| 3504 | r = onig_node_str_cat(snode, buf, buf + len);
|
---|
| 3505 | if (r != 0) goto mem_err2;
|
---|
| 3506 | }
|
---|
| 3507 |
|
---|
| 3508 | an = onig_node_new_alt(NULL_NODE, NULL_NODE);
|
---|
| 3509 | if (IS_NULL(an)) {
|
---|
| 3510 | goto mem_err2;
|
---|
| 3511 | }
|
---|
| 3512 |
|
---|
| 3513 | if (items[i].byte_len != slen) {
|
---|
| 3514 | Node *rem;
|
---|
| 3515 | UChar *q = p + items[i].byte_len;
|
---|
| 3516 |
|
---|
| 3517 | if (q < end) {
|
---|
| 3518 | r = expand_case_fold_make_rem_string(&rem, q, end, reg);
|
---|
| 3519 | if (r != 0) {
|
---|
| 3520 | onig_node_free(an);
|
---|
| 3521 | goto mem_err2;
|
---|
| 3522 | }
|
---|
| 3523 |
|
---|
| 3524 | xnode = onig_node_list_add(NULL_NODE, snode);
|
---|
| 3525 | if (IS_NULL(xnode)) {
|
---|
| 3526 | onig_node_free(an);
|
---|
| 3527 | onig_node_free(rem);
|
---|
| 3528 | goto mem_err2;
|
---|
| 3529 | }
|
---|
| 3530 | if (IS_NULL(onig_node_list_add(xnode, rem))) {
|
---|
| 3531 | onig_node_free(an);
|
---|
| 3532 | onig_node_free(xnode);
|
---|
| 3533 | onig_node_free(rem);
|
---|
| 3534 | goto mem_err;
|
---|
| 3535 | }
|
---|
| 3536 |
|
---|
| 3537 | NCAR(an) = xnode;
|
---|
| 3538 | }
|
---|
| 3539 | else {
|
---|
| 3540 | NCAR(an) = snode;
|
---|
| 3541 | }
|
---|
| 3542 |
|
---|
| 3543 | NCDR(var_anode) = an;
|
---|
| 3544 | var_anode = an;
|
---|
| 3545 | }
|
---|
| 3546 | else {
|
---|
| 3547 | NCAR(an) = snode;
|
---|
| 3548 | NCDR(anode) = an;
|
---|
| 3549 | anode = an;
|
---|
| 3550 | }
|
---|
| 3551 | }
|
---|
| 3552 |
|
---|
| 3553 | return varlen;
|
---|
| 3554 |
|
---|
| 3555 | mem_err2:
|
---|
| 3556 | onig_node_free(snode);
|
---|
| 3557 |
|
---|
| 3558 | mem_err:
|
---|
| 3559 | onig_node_free(*rnode);
|
---|
| 3560 |
|
---|
| 3561 | return ONIGERR_MEMORY;
|
---|
| 3562 | }
|
---|
| 3563 |
|
---|
| 3564 | static int
|
---|
| 3565 | expand_case_fold_string(Node* node, regex_t* reg)
|
---|
| 3566 | {
|
---|
| 3567 | #define THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION 8
|
---|
| 3568 |
|
---|
| 3569 | int r, n, len, alt_num;
|
---|
| 3570 | int varlen = 0;
|
---|
| 3571 | UChar *start, *end, *p;
|
---|
| 3572 | Node *top_root, *root, *snode, *prev_node;
|
---|
| 3573 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 3574 | StrNode* sn = NSTR(node);
|
---|
| 3575 |
|
---|
| 3576 | if (NSTRING_IS_AMBIG(node)) return 0;
|
---|
| 3577 |
|
---|
| 3578 | start = sn->s;
|
---|
| 3579 | end = sn->end;
|
---|
| 3580 | if (start >= end) return 0;
|
---|
| 3581 |
|
---|
| 3582 | r = 0;
|
---|
| 3583 | top_root = root = prev_node = snode = NULL_NODE;
|
---|
| 3584 | alt_num = 1;
|
---|
| 3585 | p = start;
|
---|
| 3586 | while (p < end) {
|
---|
| 3587 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(reg->enc, reg->case_fold_flag,
|
---|
| 3588 | p, end, items);
|
---|
| 3589 | if (n < 0) {
|
---|
| 3590 | r = n;
|
---|
| 3591 | goto err;
|
---|
| 3592 | }
|
---|
| 3593 |
|
---|
| 3594 | len = enclen(reg->enc, p);
|
---|
| 3595 |
|
---|
| 3596 | varlen = is_case_fold_variable_len(n, items, len);
|
---|
| 3597 | if (n == 0 || varlen == 0) {
|
---|
| 3598 | if (IS_NULL(snode)) {
|
---|
| 3599 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
| 3600 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
| 3601 | if (IS_NULL(root)) {
|
---|
| 3602 | onig_node_free(prev_node);
|
---|
| 3603 | goto mem_err;
|
---|
| 3604 | }
|
---|
| 3605 | }
|
---|
| 3606 |
|
---|
| 3607 | prev_node = snode = onig_node_new_str(NULL, NULL);
|
---|
| 3608 | if (IS_NULL(snode)) goto mem_err;
|
---|
| 3609 | if (IS_NOT_NULL(root)) {
|
---|
| 3610 | if (IS_NULL(onig_node_list_add(root, snode))) {
|
---|
| 3611 | onig_node_free(snode);
|
---|
| 3612 | goto mem_err;
|
---|
| 3613 | }
|
---|
| 3614 | }
|
---|
| 3615 | }
|
---|
| 3616 |
|
---|
| 3617 | r = onig_node_str_cat(snode, p, p + len);
|
---|
| 3618 | if (r != 0) goto err;
|
---|
| 3619 | }
|
---|
| 3620 | else {
|
---|
| 3621 | alt_num *= (n + 1);
|
---|
| 3622 | if (alt_num > THRESHOLD_CASE_FOLD_ALT_FOR_EXPANSION) break;
|
---|
| 3623 |
|
---|
| 3624 | if (IS_NOT_NULL(snode)) {
|
---|
| 3625 | r = update_string_node_case_fold(reg, snode);
|
---|
| 3626 | if (r == 0) {
|
---|
| 3627 | NSTRING_SET_AMBIG(snode);
|
---|
| 3628 | }
|
---|
| 3629 | }
|
---|
| 3630 | if (IS_NULL(root) && IS_NOT_NULL(prev_node)) {
|
---|
| 3631 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
| 3632 | if (IS_NULL(root)) {
|
---|
| 3633 | onig_node_free(prev_node);
|
---|
| 3634 | goto mem_err;
|
---|
| 3635 | }
|
---|
| 3636 | }
|
---|
| 3637 |
|
---|
| 3638 | r = expand_case_fold_string_alt(n, items, p, len, end, reg, &prev_node);
|
---|
| 3639 | if (r < 0) goto mem_err;
|
---|
| 3640 | if (r == 1) {
|
---|
| 3641 | if (IS_NULL(root)) {
|
---|
| 3642 | top_root = prev_node;
|
---|
| 3643 | }
|
---|
| 3644 | else {
|
---|
| 3645 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
| 3646 | onig_node_free(prev_node);
|
---|
| 3647 | goto mem_err;
|
---|
| 3648 | }
|
---|
| 3649 | }
|
---|
| 3650 |
|
---|
| 3651 | root = NCAR(prev_node);
|
---|
| 3652 | }
|
---|
| 3653 | else { /* r == 0 */
|
---|
| 3654 | if (IS_NOT_NULL(root)) {
|
---|
| 3655 | if (IS_NULL(onig_node_list_add(root, prev_node))) {
|
---|
| 3656 | onig_node_free(prev_node);
|
---|
| 3657 | goto mem_err;
|
---|
| 3658 | }
|
---|
| 3659 | }
|
---|
| 3660 | }
|
---|
| 3661 |
|
---|
| 3662 | snode = NULL_NODE;
|
---|
| 3663 | }
|
---|
| 3664 |
|
---|
| 3665 | p += len;
|
---|
| 3666 | }
|
---|
| 3667 | if (IS_NOT_NULL(snode)) {
|
---|
| 3668 | r = update_string_node_case_fold(reg, snode);
|
---|
| 3669 | if (r == 0) {
|
---|
| 3670 | NSTRING_SET_AMBIG(snode);
|
---|
| 3671 | }
|
---|
| 3672 | }
|
---|
| 3673 |
|
---|
| 3674 | if (p < end) {
|
---|
| 3675 | Node *srem;
|
---|
| 3676 |
|
---|
| 3677 | r = expand_case_fold_make_rem_string(&srem, p, end, reg);
|
---|
| 3678 | if (r != 0) goto mem_err;
|
---|
| 3679 |
|
---|
| 3680 | if (IS_NOT_NULL(prev_node) && IS_NULL(root)) {
|
---|
| 3681 | top_root = root = onig_node_list_add(NULL_NODE, prev_node);
|
---|
| 3682 | if (IS_NULL(root)) {
|
---|
| 3683 | onig_node_free(srem);
|
---|
| 3684 | onig_node_free(prev_node);
|
---|
| 3685 | goto mem_err;
|
---|
| 3686 | }
|
---|
| 3687 | }
|
---|
| 3688 |
|
---|
| 3689 | if (IS_NULL(root)) {
|
---|
| 3690 | prev_node = srem;
|
---|
| 3691 | }
|
---|
| 3692 | else {
|
---|
| 3693 | if (IS_NULL(onig_node_list_add(root, srem))) {
|
---|
| 3694 | onig_node_free(srem);
|
---|
| 3695 | goto mem_err;
|
---|
| 3696 | }
|
---|
| 3697 | }
|
---|
| 3698 | }
|
---|
| 3699 |
|
---|
| 3700 | /* ending */
|
---|
| 3701 | top_root = (IS_NOT_NULL(top_root) ? top_root : prev_node);
|
---|
| 3702 | swap_node(node, top_root);
|
---|
| 3703 | onig_node_free(top_root);
|
---|
| 3704 | return 0;
|
---|
| 3705 |
|
---|
| 3706 | mem_err:
|
---|
| 3707 | r = ONIGERR_MEMORY;
|
---|
| 3708 |
|
---|
| 3709 | err:
|
---|
| 3710 | onig_node_free(top_root);
|
---|
| 3711 | return r;
|
---|
| 3712 | }
|
---|
| 3713 |
|
---|
| 3714 |
|
---|
| 3715 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 3716 |
|
---|
| 3717 | #define CEC_THRES_NUM_BIG_REPEAT 512
|
---|
| 3718 | #define CEC_INFINITE_NUM 0x7fffffff
|
---|
| 3719 |
|
---|
| 3720 | #define CEC_IN_INFINITE_REPEAT (1<<0)
|
---|
| 3721 | #define CEC_IN_FINITE_REPEAT (1<<1)
|
---|
| 3722 | #define CEC_CONT_BIG_REPEAT (1<<2)
|
---|
| 3723 |
|
---|
| 3724 | static int
|
---|
| 3725 | setup_comb_exp_check(Node* node, int state, ScanEnv* env)
|
---|
| 3726 | {
|
---|
| 3727 | int type;
|
---|
| 3728 | int r = state;
|
---|
| 3729 |
|
---|
| 3730 | type = NTYPE(node);
|
---|
| 3731 | switch (type) {
|
---|
| 3732 | case NT_LIST:
|
---|
| 3733 | {
|
---|
| 3734 | Node* prev = NULL_NODE;
|
---|
| 3735 | do {
|
---|
| 3736 | r = setup_comb_exp_check(NCAR(node), r, env);
|
---|
| 3737 | prev = NCAR(node);
|
---|
| 3738 | } while (r >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3739 | }
|
---|
| 3740 | break;
|
---|
| 3741 |
|
---|
| 3742 | case NT_ALT:
|
---|
| 3743 | {
|
---|
| 3744 | int ret;
|
---|
| 3745 | do {
|
---|
| 3746 | ret = setup_comb_exp_check(NCAR(node), state, env);
|
---|
| 3747 | r |= ret;
|
---|
| 3748 | } while (ret >= 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3749 | }
|
---|
| 3750 | break;
|
---|
| 3751 |
|
---|
| 3752 | case NT_QTFR:
|
---|
| 3753 | {
|
---|
| 3754 | int child_state = state;
|
---|
| 3755 | int add_state = 0;
|
---|
| 3756 | QtfrNode* qn = NQTFR(node);
|
---|
| 3757 | Node* target = qn->target;
|
---|
| 3758 | int var_num;
|
---|
| 3759 |
|
---|
| 3760 | if (! IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 3761 | if (qn->upper > 1) {
|
---|
| 3762 | /* {0,1}, {1,1} are allowed */
|
---|
| 3763 | child_state |= CEC_IN_FINITE_REPEAT;
|
---|
| 3764 |
|
---|
| 3765 | /* check (a*){n,m}, (a+){n,m} => (a*){n,n}, (a+){n,n} */
|
---|
| 3766 | if (env->backrefed_mem == 0) {
|
---|
| 3767 | if (NTYPE(qn->target) == NT_ENCLOSE) {
|
---|
| 3768 | EncloseNode* en = NENCLOSE(qn->target);
|
---|
| 3769 | if (en->type == ENCLOSE_MEMORY) {
|
---|
| 3770 | if (NTYPE(en->target) == NT_QTFR) {
|
---|
| 3771 | QtfrNode* q = NQTFR(en->target);
|
---|
| 3772 | if (IS_REPEAT_INFINITE(q->upper)
|
---|
| 3773 | && q->greedy == qn->greedy) {
|
---|
| 3774 | qn->upper = (qn->lower == 0 ? 1 : qn->lower);
|
---|
| 3775 | if (qn->upper == 1)
|
---|
| 3776 | child_state = state;
|
---|
| 3777 | }
|
---|
| 3778 | }
|
---|
| 3779 | }
|
---|
| 3780 | }
|
---|
| 3781 | }
|
---|
| 3782 | }
|
---|
| 3783 | }
|
---|
| 3784 |
|
---|
| 3785 | if (state & CEC_IN_FINITE_REPEAT) {
|
---|
| 3786 | qn->comb_exp_check_num = -1;
|
---|
| 3787 | }
|
---|
| 3788 | else {
|
---|
| 3789 | if (IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 3790 | var_num = CEC_INFINITE_NUM;
|
---|
| 3791 | child_state |= CEC_IN_INFINITE_REPEAT;
|
---|
| 3792 | }
|
---|
| 3793 | else {
|
---|
| 3794 | var_num = qn->upper - qn->lower;
|
---|
| 3795 | }
|
---|
| 3796 |
|
---|
| 3797 | if (var_num >= CEC_THRES_NUM_BIG_REPEAT)
|
---|
| 3798 | add_state |= CEC_CONT_BIG_REPEAT;
|
---|
| 3799 |
|
---|
| 3800 | if (((state & CEC_IN_INFINITE_REPEAT) != 0 && var_num != 0) ||
|
---|
| 3801 | ((state & CEC_CONT_BIG_REPEAT) != 0 &&
|
---|
| 3802 | var_num >= CEC_THRES_NUM_BIG_REPEAT)) {
|
---|
| 3803 | if (qn->comb_exp_check_num == 0) {
|
---|
| 3804 | env->num_comb_exp_check++;
|
---|
| 3805 | qn->comb_exp_check_num = env->num_comb_exp_check;
|
---|
| 3806 | if (env->curr_max_regnum > env->comb_exp_max_regnum)
|
---|
| 3807 | env->comb_exp_max_regnum = env->curr_max_regnum;
|
---|
| 3808 | }
|
---|
| 3809 | }
|
---|
| 3810 | }
|
---|
| 3811 |
|
---|
| 3812 | r = setup_comb_exp_check(target, child_state, env);
|
---|
| 3813 | r |= add_state;
|
---|
| 3814 | }
|
---|
| 3815 | break;
|
---|
| 3816 |
|
---|
| 3817 | case NT_ENCLOSE:
|
---|
| 3818 | {
|
---|
| 3819 | EncloseNode* en = NENCLOSE(node);
|
---|
| 3820 |
|
---|
| 3821 | switch (en->type) {
|
---|
| 3822 | case ENCLOSE_MEMORY:
|
---|
| 3823 | {
|
---|
| 3824 | if (env->curr_max_regnum < en->regnum)
|
---|
| 3825 | env->curr_max_regnum = en->regnum;
|
---|
| 3826 |
|
---|
| 3827 | r = setup_comb_exp_check(en->target, state, env);
|
---|
| 3828 | }
|
---|
| 3829 | break;
|
---|
| 3830 |
|
---|
| 3831 | default:
|
---|
| 3832 | r = setup_comb_exp_check(en->target, state, env);
|
---|
| 3833 | break;
|
---|
| 3834 | }
|
---|
| 3835 | }
|
---|
| 3836 | break;
|
---|
| 3837 |
|
---|
| 3838 | #ifdef USE_SUBEXP_CALL
|
---|
| 3839 | case NT_CALL:
|
---|
| 3840 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
| 3841 | env->has_recursion = 1;
|
---|
| 3842 | else
|
---|
| 3843 | r = setup_comb_exp_check(NCALL(node)->target, state, env);
|
---|
| 3844 | break;
|
---|
| 3845 | #endif
|
---|
| 3846 |
|
---|
| 3847 | default:
|
---|
| 3848 | break;
|
---|
| 3849 | }
|
---|
| 3850 |
|
---|
| 3851 | return r;
|
---|
| 3852 | }
|
---|
| 3853 | #endif
|
---|
| 3854 |
|
---|
| 3855 | #define IN_ALT (1<<0)
|
---|
| 3856 | #define IN_NOT (1<<1)
|
---|
| 3857 | #define IN_REPEAT (1<<2)
|
---|
| 3858 | #define IN_VAR_REPEAT (1<<3)
|
---|
| 3859 | #define IN_ROOT (1<<4)
|
---|
| 3860 |
|
---|
| 3861 | /* setup_tree does the following work.
|
---|
| 3862 | 1. check empty loop. (set qn->target_empty_info)
|
---|
| 3863 | 2. expand ignore-case in char class.
|
---|
| 3864 | 3. set memory status bit flags. (reg->mem_stats)
|
---|
| 3865 | 4. set qn->head_exact for [push, exact] -> [push_or_jump_exact1, exact].
|
---|
| 3866 | 5. find invalid patterns in look-behind.
|
---|
| 3867 | 6. expand repeated string.
|
---|
| 3868 | */
|
---|
| 3869 | static int
|
---|
| 3870 | setup_tree(Node* node, regex_t* reg, int state, ScanEnv* env)
|
---|
| 3871 | {
|
---|
| 3872 | int type;
|
---|
| 3873 | int r = 0;
|
---|
| 3874 | int in_root = state & IN_ROOT;
|
---|
| 3875 |
|
---|
| 3876 | state &= ~IN_ROOT;
|
---|
| 3877 | restart:
|
---|
| 3878 | type = NTYPE(node);
|
---|
| 3879 | switch (type) {
|
---|
| 3880 | case NT_LIST:
|
---|
| 3881 | {
|
---|
| 3882 | Node* prev = NULL_NODE;
|
---|
| 3883 | int prev_in_root = 0;
|
---|
| 3884 | state |= in_root;
|
---|
| 3885 | do {
|
---|
| 3886 | r = setup_tree(NCAR(node), reg, state, env);
|
---|
| 3887 | if (IS_NOT_NULL(prev) && r == 0) {
|
---|
| 3888 | r = next_setup(prev, NCAR(node), prev_in_root, reg);
|
---|
| 3889 | }
|
---|
| 3890 | prev = NCAR(node);
|
---|
| 3891 | prev_in_root = state & IN_ROOT;
|
---|
| 3892 | state &= ~IN_ROOT;
|
---|
| 3893 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3894 | }
|
---|
| 3895 | break;
|
---|
| 3896 |
|
---|
| 3897 | case NT_ALT:
|
---|
| 3898 | do {
|
---|
| 3899 | r = setup_tree(NCAR(node), reg, (state | IN_ALT), env);
|
---|
| 3900 | } while (r == 0 && IS_NOT_NULL(node = NCDR(node)));
|
---|
| 3901 | break;
|
---|
| 3902 |
|
---|
| 3903 | case NT_CCLASS:
|
---|
| 3904 | break;
|
---|
| 3905 |
|
---|
| 3906 | case NT_STR:
|
---|
| 3907 | if (IS_IGNORECASE(reg->options) && !NSTRING_IS_RAW(node)) {
|
---|
| 3908 | r = expand_case_fold_string(node, reg);
|
---|
| 3909 | }
|
---|
| 3910 | break;
|
---|
| 3911 |
|
---|
| 3912 | case NT_CTYPE:
|
---|
| 3913 | case NT_CANY:
|
---|
| 3914 | break;
|
---|
| 3915 |
|
---|
| 3916 | #ifdef USE_SUBEXP_CALL
|
---|
| 3917 | case NT_CALL:
|
---|
| 3918 | break;
|
---|
| 3919 | #endif
|
---|
| 3920 |
|
---|
| 3921 | case NT_BREF:
|
---|
| 3922 | {
|
---|
| 3923 | int i;
|
---|
| 3924 | int* p;
|
---|
| 3925 | Node** nodes = SCANENV_MEM_NODES(env);
|
---|
| 3926 | BRefNode* br = NBREF(node);
|
---|
| 3927 | p = BACKREFS_P(br);
|
---|
| 3928 | for (i = 0; i < br->back_num; i++) {
|
---|
| 3929 | if (p[i] > env->num_mem) return ONIGERR_INVALID_BACKREF;
|
---|
| 3930 | BIT_STATUS_ON_AT(env->backrefed_mem, p[i]);
|
---|
| 3931 | BIT_STATUS_ON_AT(env->bt_mem_start, p[i]);
|
---|
| 3932 | #ifdef USE_BACKREF_WITH_LEVEL
|
---|
| 3933 | if (IS_BACKREF_NEST_LEVEL(br)) {
|
---|
| 3934 | BIT_STATUS_ON_AT(env->bt_mem_end, p[i]);
|
---|
| 3935 | }
|
---|
| 3936 | #endif
|
---|
| 3937 | SET_ENCLOSE_STATUS(nodes[p[i]], NST_MEM_BACKREFED);
|
---|
| 3938 | }
|
---|
| 3939 | }
|
---|
| 3940 | break;
|
---|
| 3941 |
|
---|
| 3942 | case NT_QTFR:
|
---|
| 3943 | {
|
---|
| 3944 | OnigDistance d;
|
---|
| 3945 | QtfrNode* qn = NQTFR(node);
|
---|
| 3946 | Node* target = qn->target;
|
---|
| 3947 |
|
---|
| 3948 | if ((state & IN_REPEAT) != 0) {
|
---|
| 3949 | qn->state |= NST_IN_REPEAT;
|
---|
| 3950 | }
|
---|
| 3951 |
|
---|
| 3952 | if (IS_REPEAT_INFINITE(qn->upper) || qn->upper >= 1) {
|
---|
| 3953 | r = get_min_match_length(target, &d, env);
|
---|
| 3954 | if (r) break;
|
---|
| 3955 | if (d == 0) {
|
---|
| 3956 | qn->target_empty_info = NQ_TARGET_IS_EMPTY;
|
---|
| 3957 | #ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
|
---|
| 3958 | r = quantifiers_memory_node_info(target);
|
---|
| 3959 | if (r < 0) break;
|
---|
| 3960 | if (r > 0) {
|
---|
| 3961 | qn->target_empty_info = r;
|
---|
| 3962 | }
|
---|
| 3963 | #endif
|
---|
| 3964 | #if 0
|
---|
| 3965 | r = get_max_match_length(target, &d, env);
|
---|
| 3966 | if (r == 0 && d == 0) {
|
---|
| 3967 | /* ()* ==> ()?, ()+ ==> () */
|
---|
| 3968 | qn->upper = 1;
|
---|
| 3969 | if (qn->lower > 1) qn->lower = 1;
|
---|
| 3970 | if (NTYPE(target) == NT_STR) {
|
---|
| 3971 | qn->upper = qn->lower = 0; /* /(?:)+/ ==> // */
|
---|
| 3972 | }
|
---|
| 3973 | }
|
---|
| 3974 | #endif
|
---|
| 3975 | }
|
---|
| 3976 | }
|
---|
| 3977 |
|
---|
| 3978 | state |= IN_REPEAT;
|
---|
| 3979 | if (qn->lower != qn->upper)
|
---|
| 3980 | state |= IN_VAR_REPEAT;
|
---|
| 3981 | r = setup_tree(target, reg, state, env);
|
---|
| 3982 | if (r) break;
|
---|
| 3983 |
|
---|
| 3984 | /* expand string */
|
---|
| 3985 | #define EXPAND_STRING_MAX_LENGTH 100
|
---|
| 3986 | if (NTYPE(target) == NT_STR) {
|
---|
| 3987 | if (qn->lower > 1) {
|
---|
| 3988 | int i, n = qn->lower;
|
---|
| 3989 | OnigDistance len = NSTRING_LEN(target);
|
---|
| 3990 | StrNode* sn = NSTR(target);
|
---|
| 3991 | Node* np;
|
---|
| 3992 |
|
---|
| 3993 | np = onig_node_new_str(sn->s, sn->end);
|
---|
| 3994 | if (IS_NULL(np)) return ONIGERR_MEMORY;
|
---|
| 3995 | NSTR(np)->flag = sn->flag;
|
---|
| 3996 |
|
---|
| 3997 | for (i = 1; i < n && (i+1) * len <= EXPAND_STRING_MAX_LENGTH; i++) {
|
---|
| 3998 | r = onig_node_str_cat(np, sn->s, sn->end);
|
---|
| 3999 | if (r) {
|
---|
| 4000 | onig_node_free(np);
|
---|
| 4001 | return r;
|
---|
| 4002 | }
|
---|
| 4003 | }
|
---|
| 4004 | if (i < qn->upper || IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 4005 | Node *np1, *np2;
|
---|
| 4006 |
|
---|
| 4007 | qn->lower -= i;
|
---|
| 4008 | if (! IS_REPEAT_INFINITE(qn->upper))
|
---|
| 4009 | qn->upper -= i;
|
---|
| 4010 |
|
---|
| 4011 | np1 = onig_node_new_list(np, NULL);
|
---|
| 4012 | if (IS_NULL(np1)) {
|
---|
| 4013 | onig_node_free(np);
|
---|
| 4014 | return ONIGERR_MEMORY;
|
---|
| 4015 | }
|
---|
| 4016 | swap_node(np1, node);
|
---|
| 4017 | np2 = onig_node_list_add(node, np1);
|
---|
| 4018 | if (IS_NULL(np2)) {
|
---|
| 4019 | onig_node_free(np1);
|
---|
| 4020 | return ONIGERR_MEMORY;
|
---|
| 4021 | }
|
---|
| 4022 | }
|
---|
| 4023 | else {
|
---|
| 4024 | swap_node(np, node);
|
---|
| 4025 | onig_node_free(np);
|
---|
| 4026 | }
|
---|
| 4027 | break; /* break case NT_QTFR: */
|
---|
| 4028 | }
|
---|
| 4029 | }
|
---|
| 4030 |
|
---|
| 4031 | #ifdef USE_OP_PUSH_OR_JUMP_EXACT
|
---|
| 4032 | if (qn->greedy && (qn->target_empty_info != 0)) {
|
---|
| 4033 | if (NTYPE(target) == NT_QTFR) {
|
---|
| 4034 | QtfrNode* tqn = NQTFR(target);
|
---|
| 4035 | if (IS_NOT_NULL(tqn->head_exact)) {
|
---|
| 4036 | qn->head_exact = tqn->head_exact;
|
---|
| 4037 | tqn->head_exact = NULL;
|
---|
| 4038 | }
|
---|
| 4039 | }
|
---|
| 4040 | else {
|
---|
| 4041 | qn->head_exact = get_head_value_node(qn->target, 1, reg);
|
---|
| 4042 | }
|
---|
| 4043 | }
|
---|
| 4044 | #endif
|
---|
| 4045 | }
|
---|
| 4046 | break;
|
---|
| 4047 |
|
---|
| 4048 | case NT_ENCLOSE:
|
---|
| 4049 | {
|
---|
| 4050 | EncloseNode* en = NENCLOSE(node);
|
---|
| 4051 |
|
---|
| 4052 | switch (en->type) {
|
---|
| 4053 | case ENCLOSE_OPTION:
|
---|
| 4054 | {
|
---|
| 4055 | OnigOptionType options = reg->options;
|
---|
| 4056 | state |= in_root;
|
---|
| 4057 | reg->options = NENCLOSE(node)->option;
|
---|
| 4058 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
| 4059 | reg->options = options;
|
---|
| 4060 | }
|
---|
| 4061 | break;
|
---|
| 4062 |
|
---|
| 4063 | case ENCLOSE_MEMORY:
|
---|
| 4064 | if ((state & (IN_ALT | IN_NOT | IN_VAR_REPEAT)) != 0) {
|
---|
| 4065 | BIT_STATUS_ON_AT(env->bt_mem_start, en->regnum);
|
---|
| 4066 | /* SET_ENCLOSE_STATUS(node, NST_MEM_IN_ALT_NOT); */
|
---|
| 4067 | }
|
---|
| 4068 | r = setup_tree(en->target, reg, state, env);
|
---|
| 4069 | break;
|
---|
| 4070 |
|
---|
| 4071 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 4072 | {
|
---|
| 4073 | Node* target = en->target;
|
---|
| 4074 | r = setup_tree(target, reg, state, env);
|
---|
| 4075 | if (NTYPE(target) == NT_QTFR) {
|
---|
| 4076 | QtfrNode* tqn = NQTFR(target);
|
---|
| 4077 | if (IS_REPEAT_INFINITE(tqn->upper) && tqn->lower <= 1 &&
|
---|
| 4078 | tqn->greedy != 0) { /* (?>a*), a*+ etc... */
|
---|
| 4079 | int qtype = NTYPE(tqn->target);
|
---|
| 4080 | if (IS_NODE_TYPE_SIMPLE(qtype))
|
---|
| 4081 | SET_ENCLOSE_STATUS(node, NST_STOP_BT_SIMPLE_REPEAT);
|
---|
| 4082 | }
|
---|
| 4083 | }
|
---|
| 4084 | }
|
---|
| 4085 | break;
|
---|
| 4086 |
|
---|
| 4087 | case ENCLOSE_CONDITION:
|
---|
| 4088 | #ifdef USE_NAMED_GROUP
|
---|
| 4089 | if (! IS_ENCLOSE_NAME_REF(NENCLOSE(node)) &&
|
---|
| 4090 | env->num_named > 0 &&
|
---|
| 4091 | IS_SYNTAX_BV(env->syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
| 4092 | !ONIG_IS_OPTION_ON(env->option, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 4093 | return ONIGERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED;
|
---|
| 4094 | }
|
---|
| 4095 | #endif
|
---|
| 4096 | r = setup_tree(NENCLOSE(node)->target, reg, state, env);
|
---|
| 4097 | break;
|
---|
| 4098 | }
|
---|
| 4099 | }
|
---|
| 4100 | break;
|
---|
| 4101 |
|
---|
| 4102 | case NT_ANCHOR:
|
---|
| 4103 | {
|
---|
| 4104 | AnchorNode* an = NANCHOR(node);
|
---|
| 4105 |
|
---|
| 4106 | switch (an->type) {
|
---|
| 4107 | case ANCHOR_PREC_READ:
|
---|
| 4108 | r = setup_tree(an->target, reg, state, env);
|
---|
| 4109 | break;
|
---|
| 4110 | case ANCHOR_PREC_READ_NOT:
|
---|
| 4111 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
| 4112 | break;
|
---|
| 4113 |
|
---|
| 4114 | /* allowed node types in look-behind */
|
---|
| 4115 | #define ALLOWED_TYPE_IN_LB \
|
---|
| 4116 | ( BIT_NT_LIST | BIT_NT_ALT | BIT_NT_STR | BIT_NT_CCLASS | BIT_NT_CTYPE | \
|
---|
| 4117 | BIT_NT_CANY | BIT_NT_ANCHOR | BIT_NT_ENCLOSE | BIT_NT_QTFR | BIT_NT_CALL )
|
---|
| 4118 |
|
---|
| 4119 | #define ALLOWED_ENCLOSE_IN_LB ( ENCLOSE_MEMORY | ENCLOSE_OPTION )
|
---|
| 4120 | #define ALLOWED_ENCLOSE_IN_LB_NOT ENCLOSE_OPTION
|
---|
| 4121 |
|
---|
| 4122 | #define ALLOWED_ANCHOR_IN_LB \
|
---|
| 4123 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
| 4124 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
| 4125 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
| 4126 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
| 4127 | #define ALLOWED_ANCHOR_IN_LB_NOT \
|
---|
| 4128 | ( ANCHOR_LOOK_BEHIND | ANCHOR_LOOK_BEHIND_NOT | ANCHOR_BEGIN_LINE | \
|
---|
| 4129 | ANCHOR_END_LINE | ANCHOR_BEGIN_BUF | ANCHOR_BEGIN_POSITION | ANCHOR_KEEP | \
|
---|
| 4130 | ANCHOR_WORD_BOUND | ANCHOR_NOT_WORD_BOUND | \
|
---|
| 4131 | ANCHOR_WORD_BEGIN | ANCHOR_WORD_END )
|
---|
| 4132 |
|
---|
| 4133 | case ANCHOR_LOOK_BEHIND:
|
---|
| 4134 | {
|
---|
| 4135 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
| 4136 | ALLOWED_ENCLOSE_IN_LB, ALLOWED_ANCHOR_IN_LB);
|
---|
| 4137 | if (r < 0) return r;
|
---|
| 4138 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 4139 | r = setup_look_behind(node, reg, env);
|
---|
| 4140 | if (r != 0) return r;
|
---|
| 4141 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
| 4142 | r = setup_tree(an->target, reg, state, env);
|
---|
| 4143 | }
|
---|
| 4144 | break;
|
---|
| 4145 |
|
---|
| 4146 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 4147 | {
|
---|
| 4148 | r = check_type_tree(an->target, ALLOWED_TYPE_IN_LB,
|
---|
| 4149 | ALLOWED_ENCLOSE_IN_LB_NOT, ALLOWED_ANCHOR_IN_LB_NOT);
|
---|
| 4150 | if (r < 0) return r;
|
---|
| 4151 | if (r > 0) return ONIGERR_INVALID_LOOK_BEHIND_PATTERN;
|
---|
| 4152 | r = setup_look_behind(node, reg, env);
|
---|
| 4153 | if (r != 0) return r;
|
---|
| 4154 | if (NTYPE(node) != NT_ANCHOR) goto restart;
|
---|
| 4155 | r = setup_tree(an->target, reg, (state | IN_NOT), env);
|
---|
| 4156 | }
|
---|
| 4157 | break;
|
---|
| 4158 | }
|
---|
| 4159 | }
|
---|
| 4160 | break;
|
---|
| 4161 |
|
---|
| 4162 | default:
|
---|
| 4163 | break;
|
---|
| 4164 | }
|
---|
| 4165 |
|
---|
| 4166 | return r;
|
---|
| 4167 | }
|
---|
| 4168 |
|
---|
| 4169 | #ifndef USE_SUNDAY_QUICK_SEARCH
|
---|
| 4170 | /* set skip map for Boyer-Moore search */
|
---|
| 4171 | static int
|
---|
| 4172 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
| 4173 | UChar skip[], int** int_skip, int ignore_case)
|
---|
| 4174 | {
|
---|
| 4175 | OnigDistance i, len;
|
---|
| 4176 | int clen, flen, n, j, k;
|
---|
| 4177 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
| 4178 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 4179 | OnigEncoding enc = reg->enc;
|
---|
| 4180 |
|
---|
| 4181 | len = end - s;
|
---|
| 4182 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
| 4183 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )len;
|
---|
| 4184 |
|
---|
| 4185 | n = 0;
|
---|
| 4186 | for (i = 0; i < len - 1; i += clen) {
|
---|
| 4187 | p = s + i;
|
---|
| 4188 | if (ignore_case)
|
---|
| 4189 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4190 | p, end, items);
|
---|
| 4191 | clen = enclen(enc, p);
|
---|
| 4192 |
|
---|
| 4193 | for (j = 0; j < n; j++) {
|
---|
| 4194 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4195 | return 1; /* different length isn't supported. */
|
---|
| 4196 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4197 | if (flen != clen)
|
---|
| 4198 | return 1; /* different length isn't supported. */
|
---|
| 4199 | }
|
---|
| 4200 | for (j = 0; j < clen; j++) {
|
---|
| 4201 | skip[s[i + j]] = (UChar )(len - 1 - i - j);
|
---|
| 4202 | for (k = 0; k < n; k++) {
|
---|
| 4203 | skip[buf[k][j]] = (UChar )(len - 1 - i - j);
|
---|
| 4204 | }
|
---|
| 4205 | }
|
---|
| 4206 | }
|
---|
| 4207 | }
|
---|
| 4208 | else {
|
---|
| 4209 | if (IS_NULL(*int_skip)) {
|
---|
| 4210 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
| 4211 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
| 4212 | }
|
---|
| 4213 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )len;
|
---|
| 4214 |
|
---|
| 4215 | n = 0;
|
---|
| 4216 | for (i = 0; i < len - 1; i += clen) {
|
---|
| 4217 | p = s + i;
|
---|
| 4218 | if (ignore_case)
|
---|
| 4219 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4220 | p, end, items);
|
---|
| 4221 | clen = enclen(enc, p);
|
---|
| 4222 |
|
---|
| 4223 | for (j = 0; j < n; j++) {
|
---|
| 4224 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4225 | return 1; /* different length isn't supported. */
|
---|
| 4226 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4227 | if (flen != clen)
|
---|
| 4228 | return 1; /* different length isn't supported. */
|
---|
| 4229 | }
|
---|
| 4230 | for (j = 0; j < clen; j++) {
|
---|
| 4231 | (*int_skip)[s[i + j]] = (int )(len - 1 - i - j);
|
---|
| 4232 | for (k = 0; k < n; k++) {
|
---|
| 4233 | (*int_skip)[buf[k][j]] = (int )(len - 1 - i - j);
|
---|
| 4234 | }
|
---|
| 4235 | }
|
---|
| 4236 | }
|
---|
| 4237 | }
|
---|
| 4238 | return 0;
|
---|
| 4239 | }
|
---|
| 4240 |
|
---|
| 4241 | #else /* USE_SUNDAY_QUICK_SEARCH */
|
---|
| 4242 |
|
---|
| 4243 | /* set skip map for Sunday's quick search */
|
---|
| 4244 | static int
|
---|
| 4245 | set_bm_skip(UChar* s, UChar* end, regex_t* reg,
|
---|
| 4246 | UChar skip[], int** int_skip, int ignore_case)
|
---|
| 4247 | {
|
---|
| 4248 | OnigDistance i, len;
|
---|
| 4249 | int clen, flen, n, j, k;
|
---|
| 4250 | UChar *p, buf[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM][ONIGENC_MBC_CASE_FOLD_MAXLEN];
|
---|
| 4251 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 4252 | OnigEncoding enc = reg->enc;
|
---|
| 4253 |
|
---|
| 4254 | len = end - s;
|
---|
| 4255 | if (len < ONIG_CHAR_TABLE_SIZE) {
|
---|
| 4256 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) skip[i] = (UChar )(len + 1);
|
---|
| 4257 |
|
---|
| 4258 | n = 0;
|
---|
| 4259 | for (i = 0; i < len; i += clen) {
|
---|
| 4260 | p = s + i;
|
---|
| 4261 | if (ignore_case)
|
---|
| 4262 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4263 | p, end, items);
|
---|
| 4264 | clen = enclen(enc, p);
|
---|
| 4265 |
|
---|
| 4266 | for (j = 0; j < n; j++) {
|
---|
| 4267 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4268 | return 1; /* different length isn't supported. */
|
---|
| 4269 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4270 | if (flen != clen)
|
---|
| 4271 | return 1; /* different length isn't supported. */
|
---|
| 4272 | }
|
---|
| 4273 | for (j = 0; j < clen; j++) {
|
---|
| 4274 | skip[s[i + j]] = (UChar )(len - i - j);
|
---|
| 4275 | for (k = 0; k < n; k++) {
|
---|
| 4276 | skip[buf[k][j]] = (UChar )(len - i - j);
|
---|
| 4277 | }
|
---|
| 4278 | }
|
---|
| 4279 | }
|
---|
| 4280 | }
|
---|
| 4281 | else {
|
---|
| 4282 | if (IS_NULL(*int_skip)) {
|
---|
| 4283 | *int_skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
|
---|
| 4284 | if (IS_NULL(*int_skip)) return ONIGERR_MEMORY;
|
---|
| 4285 | }
|
---|
| 4286 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) (*int_skip)[i] = (int )(len + 1);
|
---|
| 4287 |
|
---|
| 4288 | n = 0;
|
---|
| 4289 | for (i = 0; i < len; i += clen) {
|
---|
| 4290 | p = s + i;
|
---|
| 4291 | if (ignore_case)
|
---|
| 4292 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, reg->case_fold_flag,
|
---|
| 4293 | p, end, items);
|
---|
| 4294 | clen = enclen(enc, p);
|
---|
| 4295 |
|
---|
| 4296 | for (j = 0; j < n; j++) {
|
---|
| 4297 | if ((items[j].code_len != 1) || (items[j].byte_len != clen))
|
---|
| 4298 | return 1; /* different length isn't supported. */
|
---|
| 4299 | flen = ONIGENC_CODE_TO_MBC(enc, items[j].code[0], buf[j]);
|
---|
| 4300 | if (flen != clen)
|
---|
| 4301 | return 1; /* different length isn't supported. */
|
---|
| 4302 | }
|
---|
| 4303 | for (j = 0; j < clen; j++) {
|
---|
| 4304 | (*int_skip)[s[i + j]] = (int )(len - i - j);
|
---|
| 4305 | for (k = 0; k < n; k++) {
|
---|
| 4306 | (*int_skip)[buf[k][j]] = (int )(len - i - j);
|
---|
| 4307 | }
|
---|
| 4308 | }
|
---|
| 4309 | }
|
---|
| 4310 | }
|
---|
| 4311 | return 0;
|
---|
| 4312 | }
|
---|
| 4313 | #endif /* USE_SUNDAY_QUICK_SEARCH */
|
---|
| 4314 |
|
---|
| 4315 | #define OPT_EXACT_MAXLEN 24
|
---|
| 4316 |
|
---|
| 4317 | typedef struct {
|
---|
| 4318 | OnigDistance min; /* min byte length */
|
---|
| 4319 | OnigDistance max; /* max byte length */
|
---|
| 4320 | } MinMaxLen;
|
---|
| 4321 |
|
---|
| 4322 | typedef struct {
|
---|
| 4323 | MinMaxLen mmd;
|
---|
| 4324 | OnigEncoding enc;
|
---|
| 4325 | OnigOptionType options;
|
---|
| 4326 | OnigCaseFoldType case_fold_flag;
|
---|
| 4327 | ScanEnv* scan_env;
|
---|
| 4328 | } OptEnv;
|
---|
| 4329 |
|
---|
| 4330 | typedef struct {
|
---|
| 4331 | int left_anchor;
|
---|
| 4332 | int right_anchor;
|
---|
| 4333 | } OptAncInfo;
|
---|
| 4334 |
|
---|
| 4335 | typedef struct {
|
---|
| 4336 | MinMaxLen mmd; /* info position */
|
---|
| 4337 | OptAncInfo anc;
|
---|
| 4338 |
|
---|
| 4339 | int reach_end;
|
---|
| 4340 | int ignore_case; /* -1: unset, 0: case sensitive, 1: ignore case */
|
---|
| 4341 | int len;
|
---|
| 4342 | UChar s[OPT_EXACT_MAXLEN];
|
---|
| 4343 | } OptExactInfo;
|
---|
| 4344 |
|
---|
| 4345 | typedef struct {
|
---|
| 4346 | MinMaxLen mmd; /* info position */
|
---|
| 4347 | OptAncInfo anc;
|
---|
| 4348 |
|
---|
| 4349 | int value; /* weighted value */
|
---|
| 4350 | UChar map[ONIG_CHAR_TABLE_SIZE];
|
---|
| 4351 | } OptMapInfo;
|
---|
| 4352 |
|
---|
| 4353 | typedef struct {
|
---|
| 4354 | MinMaxLen len;
|
---|
| 4355 |
|
---|
| 4356 | OptAncInfo anc;
|
---|
| 4357 | OptExactInfo exb; /* boundary */
|
---|
| 4358 | OptExactInfo exm; /* middle */
|
---|
| 4359 | OptExactInfo expr; /* prec read (?=...) */
|
---|
| 4360 |
|
---|
| 4361 | OptMapInfo map; /* boundary */
|
---|
| 4362 | } NodeOptInfo;
|
---|
| 4363 |
|
---|
| 4364 |
|
---|
| 4365 | static int
|
---|
| 4366 | map_position_value(OnigEncoding enc, int i)
|
---|
| 4367 | {
|
---|
| 4368 | static const short int ByteValTable[] = {
|
---|
| 4369 | 5, 1, 1, 1, 1, 1, 1, 1, 1, 10, 10, 1, 1, 10, 1, 1,
|
---|
| 4370 | 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
---|
| 4371 | 12, 4, 7, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5,
|
---|
| 4372 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5,
|
---|
| 4373 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
| 4374 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 6, 5, 5, 5,
|
---|
| 4375 | 5, 6, 6, 6, 6, 7, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
|
---|
| 4376 | 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 1
|
---|
| 4377 | };
|
---|
| 4378 |
|
---|
| 4379 | if (i < numberof(ByteValTable)) {
|
---|
| 4380 | if (i == 0 && ONIGENC_MBC_MINLEN(enc) > 1)
|
---|
| 4381 | return 20;
|
---|
| 4382 | else
|
---|
| 4383 | return (int )ByteValTable[i];
|
---|
| 4384 | }
|
---|
| 4385 | else
|
---|
| 4386 | return 4; /* Take it easy. */
|
---|
| 4387 | }
|
---|
| 4388 |
|
---|
| 4389 | static int
|
---|
| 4390 | distance_value(MinMaxLen* mm)
|
---|
| 4391 | {
|
---|
| 4392 | /* 1000 / (min-max-dist + 1) */
|
---|
| 4393 | static const short int dist_vals[] = {
|
---|
| 4394 | 1000, 500, 333, 250, 200, 167, 143, 125, 111, 100,
|
---|
| 4395 | 91, 83, 77, 71, 67, 63, 59, 56, 53, 50,
|
---|
| 4396 | 48, 45, 43, 42, 40, 38, 37, 36, 34, 33,
|
---|
| 4397 | 32, 31, 30, 29, 29, 28, 27, 26, 26, 25,
|
---|
| 4398 | 24, 24, 23, 23, 22, 22, 21, 21, 20, 20,
|
---|
| 4399 | 20, 19, 19, 19, 18, 18, 18, 17, 17, 17,
|
---|
| 4400 | 16, 16, 16, 16, 15, 15, 15, 15, 14, 14,
|
---|
| 4401 | 14, 14, 14, 14, 13, 13, 13, 13, 13, 13,
|
---|
| 4402 | 12, 12, 12, 12, 12, 12, 11, 11, 11, 11,
|
---|
| 4403 | 11, 11, 11, 11, 11, 10, 10, 10, 10, 10
|
---|
| 4404 | };
|
---|
| 4405 |
|
---|
| 4406 | OnigDistance d;
|
---|
| 4407 |
|
---|
| 4408 | if (mm->max == ONIG_INFINITE_DISTANCE) return 0;
|
---|
| 4409 |
|
---|
| 4410 | d = mm->max - mm->min;
|
---|
| 4411 | if (d < numberof(dist_vals))
|
---|
| 4412 | /* return dist_vals[d] * 16 / (mm->min + 12); */
|
---|
| 4413 | return (int )dist_vals[d];
|
---|
| 4414 | else
|
---|
| 4415 | return 1;
|
---|
| 4416 | }
|
---|
| 4417 |
|
---|
| 4418 | static int
|
---|
| 4419 | comp_distance_value(MinMaxLen* d1, MinMaxLen* d2, int v1, int v2)
|
---|
| 4420 | {
|
---|
| 4421 | if (v2 <= 0) return -1;
|
---|
| 4422 | if (v1 <= 0) return 1;
|
---|
| 4423 |
|
---|
| 4424 | v1 *= distance_value(d1);
|
---|
| 4425 | v2 *= distance_value(d2);
|
---|
| 4426 |
|
---|
| 4427 | if (v2 > v1) return 1;
|
---|
| 4428 | if (v2 < v1) return -1;
|
---|
| 4429 |
|
---|
| 4430 | if (d2->min < d1->min) return 1;
|
---|
| 4431 | if (d2->min > d1->min) return -1;
|
---|
| 4432 | return 0;
|
---|
| 4433 | }
|
---|
| 4434 |
|
---|
| 4435 | static int
|
---|
| 4436 | is_equal_mml(MinMaxLen* a, MinMaxLen* b)
|
---|
| 4437 | {
|
---|
| 4438 | return (a->min == b->min && a->max == b->max) ? 1 : 0;
|
---|
| 4439 | }
|
---|
| 4440 |
|
---|
| 4441 |
|
---|
| 4442 | static void
|
---|
| 4443 | set_mml(MinMaxLen* mml, OnigDistance min, OnigDistance max)
|
---|
| 4444 | {
|
---|
| 4445 | mml->min = min;
|
---|
| 4446 | mml->max = max;
|
---|
| 4447 | }
|
---|
| 4448 |
|
---|
| 4449 | static void
|
---|
| 4450 | clear_mml(MinMaxLen* mml)
|
---|
| 4451 | {
|
---|
| 4452 | mml->min = mml->max = 0;
|
---|
| 4453 | }
|
---|
| 4454 |
|
---|
| 4455 | static void
|
---|
| 4456 | copy_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
| 4457 | {
|
---|
| 4458 | to->min = from->min;
|
---|
| 4459 | to->max = from->max;
|
---|
| 4460 | }
|
---|
| 4461 |
|
---|
| 4462 | static void
|
---|
| 4463 | add_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
| 4464 | {
|
---|
| 4465 | to->min = distance_add(to->min, from->min);
|
---|
| 4466 | to->max = distance_add(to->max, from->max);
|
---|
| 4467 | }
|
---|
| 4468 |
|
---|
| 4469 | #if 0
|
---|
| 4470 | static void
|
---|
| 4471 | add_len_mml(MinMaxLen* to, OnigDistance len)
|
---|
| 4472 | {
|
---|
| 4473 | to->min = distance_add(to->min, len);
|
---|
| 4474 | to->max = distance_add(to->max, len);
|
---|
| 4475 | }
|
---|
| 4476 | #endif
|
---|
| 4477 |
|
---|
| 4478 | static void
|
---|
| 4479 | alt_merge_mml(MinMaxLen* to, MinMaxLen* from)
|
---|
| 4480 | {
|
---|
| 4481 | if (to->min > from->min) to->min = from->min;
|
---|
| 4482 | if (to->max < from->max) to->max = from->max;
|
---|
| 4483 | }
|
---|
| 4484 |
|
---|
| 4485 | static void
|
---|
| 4486 | copy_opt_env(OptEnv* to, OptEnv* from)
|
---|
| 4487 | {
|
---|
| 4488 | *to = *from;
|
---|
| 4489 | }
|
---|
| 4490 |
|
---|
| 4491 | static void
|
---|
| 4492 | clear_opt_anc_info(OptAncInfo* anc)
|
---|
| 4493 | {
|
---|
| 4494 | anc->left_anchor = 0;
|
---|
| 4495 | anc->right_anchor = 0;
|
---|
| 4496 | }
|
---|
| 4497 |
|
---|
| 4498 | static void
|
---|
| 4499 | copy_opt_anc_info(OptAncInfo* to, OptAncInfo* from)
|
---|
| 4500 | {
|
---|
| 4501 | *to = *from;
|
---|
| 4502 | }
|
---|
| 4503 |
|
---|
| 4504 | static void
|
---|
| 4505 | concat_opt_anc_info(OptAncInfo* to, OptAncInfo* left, OptAncInfo* right,
|
---|
| 4506 | OnigDistance left_len, OnigDistance right_len)
|
---|
| 4507 | {
|
---|
| 4508 | clear_opt_anc_info(to);
|
---|
| 4509 |
|
---|
| 4510 | to->left_anchor = left->left_anchor;
|
---|
| 4511 | if (left_len == 0) {
|
---|
| 4512 | to->left_anchor |= right->left_anchor;
|
---|
| 4513 | }
|
---|
| 4514 |
|
---|
| 4515 | to->right_anchor = right->right_anchor;
|
---|
| 4516 | if (right_len == 0) {
|
---|
| 4517 | to->right_anchor |= left->right_anchor;
|
---|
| 4518 | }
|
---|
| 4519 | else {
|
---|
| 4520 | to->right_anchor |= (left->right_anchor & ANCHOR_PREC_READ_NOT);
|
---|
| 4521 | }
|
---|
| 4522 | }
|
---|
| 4523 |
|
---|
| 4524 | static int
|
---|
| 4525 | is_left_anchor(int anc)
|
---|
| 4526 | {
|
---|
| 4527 | if (anc == ANCHOR_END_BUF || anc == ANCHOR_SEMI_END_BUF ||
|
---|
| 4528 | anc == ANCHOR_END_LINE || anc == ANCHOR_PREC_READ ||
|
---|
| 4529 | anc == ANCHOR_PREC_READ_NOT)
|
---|
| 4530 | return 0;
|
---|
| 4531 |
|
---|
| 4532 | return 1;
|
---|
| 4533 | }
|
---|
| 4534 |
|
---|
| 4535 | static int
|
---|
| 4536 | is_set_opt_anc_info(OptAncInfo* to, int anc)
|
---|
| 4537 | {
|
---|
| 4538 | if ((to->left_anchor & anc) != 0) return 1;
|
---|
| 4539 |
|
---|
| 4540 | return ((to->right_anchor & anc) != 0 ? 1 : 0);
|
---|
| 4541 | }
|
---|
| 4542 |
|
---|
| 4543 | static void
|
---|
| 4544 | add_opt_anc_info(OptAncInfo* to, int anc)
|
---|
| 4545 | {
|
---|
| 4546 | if (is_left_anchor(anc))
|
---|
| 4547 | to->left_anchor |= anc;
|
---|
| 4548 | else
|
---|
| 4549 | to->right_anchor |= anc;
|
---|
| 4550 | }
|
---|
| 4551 |
|
---|
| 4552 | static void
|
---|
| 4553 | remove_opt_anc_info(OptAncInfo* to, int anc)
|
---|
| 4554 | {
|
---|
| 4555 | if (is_left_anchor(anc))
|
---|
| 4556 | to->left_anchor &= ~anc;
|
---|
| 4557 | else
|
---|
| 4558 | to->right_anchor &= ~anc;
|
---|
| 4559 | }
|
---|
| 4560 |
|
---|
| 4561 | static void
|
---|
| 4562 | alt_merge_opt_anc_info(OptAncInfo* to, OptAncInfo* add)
|
---|
| 4563 | {
|
---|
| 4564 | to->left_anchor &= add->left_anchor;
|
---|
| 4565 | to->right_anchor &= add->right_anchor;
|
---|
| 4566 | }
|
---|
| 4567 |
|
---|
| 4568 | static int
|
---|
| 4569 | is_full_opt_exact_info(OptExactInfo* ex)
|
---|
| 4570 | {
|
---|
| 4571 | return (ex->len >= OPT_EXACT_MAXLEN ? 1 : 0);
|
---|
| 4572 | }
|
---|
| 4573 |
|
---|
| 4574 | static void
|
---|
| 4575 | clear_opt_exact_info(OptExactInfo* ex)
|
---|
| 4576 | {
|
---|
| 4577 | clear_mml(&ex->mmd);
|
---|
| 4578 | clear_opt_anc_info(&ex->anc);
|
---|
| 4579 | ex->reach_end = 0;
|
---|
| 4580 | ex->ignore_case = -1; /* unset */
|
---|
| 4581 | ex->len = 0;
|
---|
| 4582 | ex->s[0] = '\0';
|
---|
| 4583 | }
|
---|
| 4584 |
|
---|
| 4585 | static void
|
---|
| 4586 | copy_opt_exact_info(OptExactInfo* to, OptExactInfo* from)
|
---|
| 4587 | {
|
---|
| 4588 | *to = *from;
|
---|
| 4589 | }
|
---|
| 4590 |
|
---|
| 4591 | static void
|
---|
| 4592 | concat_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OnigEncoding enc)
|
---|
| 4593 | {
|
---|
| 4594 | int i, j, len;
|
---|
| 4595 | UChar *p, *end;
|
---|
| 4596 | OptAncInfo tanc;
|
---|
| 4597 |
|
---|
| 4598 | if (to->ignore_case < 0)
|
---|
| 4599 | to->ignore_case = add->ignore_case;
|
---|
| 4600 | else if (to->ignore_case != add->ignore_case)
|
---|
| 4601 | return ; /* avoid */
|
---|
| 4602 |
|
---|
| 4603 | p = add->s;
|
---|
| 4604 | end = p + add->len;
|
---|
| 4605 | for (i = to->len; p < end; ) {
|
---|
| 4606 | len = enclen(enc, p);
|
---|
| 4607 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
| 4608 | for (j = 0; j < len && p < end; j++)
|
---|
| 4609 | to->s[i++] = *p++;
|
---|
| 4610 | }
|
---|
| 4611 |
|
---|
| 4612 | to->len = i;
|
---|
| 4613 | to->reach_end = (p == end ? add->reach_end : 0);
|
---|
| 4614 |
|
---|
| 4615 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, 1, 1);
|
---|
| 4616 | if (! to->reach_end) tanc.right_anchor = 0;
|
---|
| 4617 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
| 4618 | }
|
---|
| 4619 |
|
---|
| 4620 | static void
|
---|
| 4621 | concat_opt_exact_info_str(OptExactInfo* to, UChar* s, UChar* end,
|
---|
| 4622 | int raw ARG_UNUSED, OnigEncoding enc)
|
---|
| 4623 | {
|
---|
| 4624 | int i, j, len;
|
---|
| 4625 | UChar *p;
|
---|
| 4626 |
|
---|
| 4627 | for (i = to->len, p = s; p < end && i < OPT_EXACT_MAXLEN; ) {
|
---|
| 4628 | len = enclen(enc, p);
|
---|
| 4629 | if (i + len > OPT_EXACT_MAXLEN) break;
|
---|
| 4630 | for (j = 0; j < len && p < end; j++)
|
---|
| 4631 | to->s[i++] = *p++;
|
---|
| 4632 | }
|
---|
| 4633 |
|
---|
| 4634 | to->len = i;
|
---|
| 4635 | }
|
---|
| 4636 |
|
---|
| 4637 | static void
|
---|
| 4638 | alt_merge_opt_exact_info(OptExactInfo* to, OptExactInfo* add, OptEnv* env)
|
---|
| 4639 | {
|
---|
| 4640 | int i, j, len;
|
---|
| 4641 |
|
---|
| 4642 | if (add->len == 0 || to->len == 0) {
|
---|
| 4643 | clear_opt_exact_info(to);
|
---|
| 4644 | return ;
|
---|
| 4645 | }
|
---|
| 4646 |
|
---|
| 4647 | if (! is_equal_mml(&to->mmd, &add->mmd)) {
|
---|
| 4648 | clear_opt_exact_info(to);
|
---|
| 4649 | return ;
|
---|
| 4650 | }
|
---|
| 4651 |
|
---|
| 4652 | for (i = 0; i < to->len && i < add->len; ) {
|
---|
| 4653 | if (to->s[i] != add->s[i]) break;
|
---|
| 4654 | len = enclen(env->enc, to->s + i);
|
---|
| 4655 |
|
---|
| 4656 | for (j = 1; j < len; j++) {
|
---|
| 4657 | if (to->s[i+j] != add->s[i+j]) break;
|
---|
| 4658 | }
|
---|
| 4659 | if (j < len) break;
|
---|
| 4660 | i += len;
|
---|
| 4661 | }
|
---|
| 4662 |
|
---|
| 4663 | if (! add->reach_end || i < add->len || i < to->len) {
|
---|
| 4664 | to->reach_end = 0;
|
---|
| 4665 | }
|
---|
| 4666 | to->len = i;
|
---|
| 4667 | if (to->ignore_case < 0)
|
---|
| 4668 | to->ignore_case = add->ignore_case;
|
---|
| 4669 | else if (add->ignore_case >= 0)
|
---|
| 4670 | to->ignore_case |= add->ignore_case;
|
---|
| 4671 |
|
---|
| 4672 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
| 4673 | if (! to->reach_end) to->anc.right_anchor = 0;
|
---|
| 4674 | }
|
---|
| 4675 |
|
---|
| 4676 | static void
|
---|
| 4677 | select_opt_exact_info(OnigEncoding enc, OptExactInfo* now, OptExactInfo* alt)
|
---|
| 4678 | {
|
---|
| 4679 | int v1, v2;
|
---|
| 4680 |
|
---|
| 4681 | v1 = now->len;
|
---|
| 4682 | v2 = alt->len;
|
---|
| 4683 |
|
---|
| 4684 | if (v2 == 0) {
|
---|
| 4685 | return ;
|
---|
| 4686 | }
|
---|
| 4687 | else if (v1 == 0) {
|
---|
| 4688 | copy_opt_exact_info(now, alt);
|
---|
| 4689 | return ;
|
---|
| 4690 | }
|
---|
| 4691 | else if (v1 <= 2 && v2 <= 2) {
|
---|
| 4692 | /* ByteValTable[x] is big value --> low price */
|
---|
| 4693 | v2 = map_position_value(enc, now->s[0]);
|
---|
| 4694 | v1 = map_position_value(enc, alt->s[0]);
|
---|
| 4695 |
|
---|
| 4696 | if (now->len > 1) v1 += 5;
|
---|
| 4697 | if (alt->len > 1) v2 += 5;
|
---|
| 4698 | }
|
---|
| 4699 |
|
---|
| 4700 | if (now->ignore_case <= 0) v1 *= 2;
|
---|
| 4701 | if (alt->ignore_case <= 0) v2 *= 2;
|
---|
| 4702 |
|
---|
| 4703 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
| 4704 | copy_opt_exact_info(now, alt);
|
---|
| 4705 | }
|
---|
| 4706 |
|
---|
| 4707 | static void
|
---|
| 4708 | clear_opt_map_info(OptMapInfo* map)
|
---|
| 4709 | {
|
---|
| 4710 | static const OptMapInfo clean_info = {
|
---|
| 4711 | {0, 0}, {0, 0}, 0,
|
---|
| 4712 | {
|
---|
| 4713 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4714 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4715 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4716 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4717 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4718 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4719 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4720 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4721 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4722 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4723 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4724 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4725 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4726 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4727 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
---|
| 4728 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
---|
| 4729 | }
|
---|
| 4730 | };
|
---|
| 4731 |
|
---|
| 4732 | xmemcpy(map, &clean_info, sizeof(OptMapInfo));
|
---|
| 4733 | }
|
---|
| 4734 |
|
---|
| 4735 | static void
|
---|
| 4736 | copy_opt_map_info(OptMapInfo* to, OptMapInfo* from)
|
---|
| 4737 | {
|
---|
| 4738 | *to = *from;
|
---|
| 4739 | }
|
---|
| 4740 |
|
---|
| 4741 | static void
|
---|
| 4742 | add_char_opt_map_info(OptMapInfo* map, UChar c, OnigEncoding enc)
|
---|
| 4743 | {
|
---|
| 4744 | if (map->map[c] == 0) {
|
---|
| 4745 | map->map[c] = 1;
|
---|
| 4746 | map->value += map_position_value(enc, c);
|
---|
| 4747 | }
|
---|
| 4748 | }
|
---|
| 4749 |
|
---|
| 4750 | static int
|
---|
| 4751 | add_char_amb_opt_map_info(OptMapInfo* map, UChar* p, UChar* end,
|
---|
| 4752 | OnigEncoding enc, OnigCaseFoldType case_fold_flag)
|
---|
| 4753 | {
|
---|
| 4754 | OnigCaseFoldCodeItem items[ONIGENC_GET_CASE_FOLD_CODES_MAX_NUM];
|
---|
| 4755 | UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN];
|
---|
| 4756 | int i, n;
|
---|
| 4757 |
|
---|
| 4758 | add_char_opt_map_info(map, p[0], enc);
|
---|
| 4759 |
|
---|
| 4760 | case_fold_flag = DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag);
|
---|
| 4761 | n = ONIGENC_GET_CASE_FOLD_CODES_BY_STR(enc, case_fold_flag, p, end, items);
|
---|
| 4762 | if (n < 0) return n;
|
---|
| 4763 |
|
---|
| 4764 | for (i = 0; i < n; i++) {
|
---|
| 4765 | ONIGENC_CODE_TO_MBC(enc, items[i].code[0], buf);
|
---|
| 4766 | add_char_opt_map_info(map, buf[0], enc);
|
---|
| 4767 | }
|
---|
| 4768 |
|
---|
| 4769 | return 0;
|
---|
| 4770 | }
|
---|
| 4771 |
|
---|
| 4772 | static void
|
---|
| 4773 | select_opt_map_info(OptMapInfo* now, OptMapInfo* alt)
|
---|
| 4774 | {
|
---|
| 4775 | const int z = 1<<15; /* 32768: something big value */
|
---|
| 4776 |
|
---|
| 4777 | int v1, v2;
|
---|
| 4778 |
|
---|
| 4779 | if (alt->value == 0) return ;
|
---|
| 4780 | if (now->value == 0) {
|
---|
| 4781 | copy_opt_map_info(now, alt);
|
---|
| 4782 | return ;
|
---|
| 4783 | }
|
---|
| 4784 |
|
---|
| 4785 | v1 = z / now->value;
|
---|
| 4786 | v2 = z / alt->value;
|
---|
| 4787 | if (comp_distance_value(&now->mmd, &alt->mmd, v1, v2) > 0)
|
---|
| 4788 | copy_opt_map_info(now, alt);
|
---|
| 4789 | }
|
---|
| 4790 |
|
---|
| 4791 | static int
|
---|
| 4792 | comp_opt_exact_or_map_info(OptExactInfo* e, OptMapInfo* m)
|
---|
| 4793 | {
|
---|
| 4794 | #define COMP_EM_BASE 20
|
---|
| 4795 | int ve, vm;
|
---|
| 4796 |
|
---|
| 4797 | if (m->value <= 0) return -1;
|
---|
| 4798 |
|
---|
| 4799 | ve = COMP_EM_BASE * e->len * (e->ignore_case > 0 ? 1 : 2);
|
---|
| 4800 | vm = COMP_EM_BASE * 5 * 2 / m->value;
|
---|
| 4801 | return comp_distance_value(&e->mmd, &m->mmd, ve, vm);
|
---|
| 4802 | }
|
---|
| 4803 |
|
---|
| 4804 | static void
|
---|
| 4805 | alt_merge_opt_map_info(OnigEncoding enc, OptMapInfo* to, OptMapInfo* add)
|
---|
| 4806 | {
|
---|
| 4807 | int i, val;
|
---|
| 4808 |
|
---|
| 4809 | /* if (! is_equal_mml(&to->mmd, &add->mmd)) return ; */
|
---|
| 4810 | if (to->value == 0) return ;
|
---|
| 4811 | if (add->value == 0 || to->mmd.max < add->mmd.min) {
|
---|
| 4812 | clear_opt_map_info(to);
|
---|
| 4813 | return ;
|
---|
| 4814 | }
|
---|
| 4815 |
|
---|
| 4816 | alt_merge_mml(&to->mmd, &add->mmd);
|
---|
| 4817 |
|
---|
| 4818 | val = 0;
|
---|
| 4819 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
| 4820 | if (add->map[i])
|
---|
| 4821 | to->map[i] = 1;
|
---|
| 4822 |
|
---|
| 4823 | if (to->map[i])
|
---|
| 4824 | val += map_position_value(enc, i);
|
---|
| 4825 | }
|
---|
| 4826 | to->value = val;
|
---|
| 4827 |
|
---|
| 4828 | alt_merge_opt_anc_info(&to->anc, &add->anc);
|
---|
| 4829 | }
|
---|
| 4830 |
|
---|
| 4831 | static void
|
---|
| 4832 | set_bound_node_opt_info(NodeOptInfo* opt, MinMaxLen* mmd)
|
---|
| 4833 | {
|
---|
| 4834 | copy_mml(&(opt->exb.mmd), mmd);
|
---|
| 4835 | copy_mml(&(opt->expr.mmd), mmd);
|
---|
| 4836 | copy_mml(&(opt->map.mmd), mmd);
|
---|
| 4837 | }
|
---|
| 4838 |
|
---|
| 4839 | static void
|
---|
| 4840 | clear_node_opt_info(NodeOptInfo* opt)
|
---|
| 4841 | {
|
---|
| 4842 | clear_mml(&opt->len);
|
---|
| 4843 | clear_opt_anc_info(&opt->anc);
|
---|
| 4844 | clear_opt_exact_info(&opt->exb);
|
---|
| 4845 | clear_opt_exact_info(&opt->exm);
|
---|
| 4846 | clear_opt_exact_info(&opt->expr);
|
---|
| 4847 | clear_opt_map_info(&opt->map);
|
---|
| 4848 | }
|
---|
| 4849 |
|
---|
| 4850 | static void
|
---|
| 4851 | copy_node_opt_info(NodeOptInfo* to, NodeOptInfo* from)
|
---|
| 4852 | {
|
---|
| 4853 | *to = *from;
|
---|
| 4854 | }
|
---|
| 4855 |
|
---|
| 4856 | static void
|
---|
| 4857 | concat_left_node_opt_info(OnigEncoding enc, NodeOptInfo* to, NodeOptInfo* add)
|
---|
| 4858 | {
|
---|
| 4859 | int exb_reach, exm_reach;
|
---|
| 4860 | OptAncInfo tanc;
|
---|
| 4861 |
|
---|
| 4862 | concat_opt_anc_info(&tanc, &to->anc, &add->anc, to->len.max, add->len.max);
|
---|
| 4863 | copy_opt_anc_info(&to->anc, &tanc);
|
---|
| 4864 |
|
---|
| 4865 | if (add->exb.len > 0 && to->len.max == 0) {
|
---|
| 4866 | concat_opt_anc_info(&tanc, &to->anc, &add->exb.anc,
|
---|
| 4867 | to->len.max, add->len.max);
|
---|
| 4868 | copy_opt_anc_info(&add->exb.anc, &tanc);
|
---|
| 4869 | }
|
---|
| 4870 |
|
---|
| 4871 | if (add->map.value > 0 && to->len.max == 0) {
|
---|
| 4872 | if (add->map.mmd.max == 0)
|
---|
| 4873 | add->map.anc.left_anchor |= to->anc.left_anchor;
|
---|
| 4874 | }
|
---|
| 4875 |
|
---|
| 4876 | exb_reach = to->exb.reach_end;
|
---|
| 4877 | exm_reach = to->exm.reach_end;
|
---|
| 4878 |
|
---|
| 4879 | if (add->len.max != 0)
|
---|
| 4880 | to->exb.reach_end = to->exm.reach_end = 0;
|
---|
| 4881 |
|
---|
| 4882 | if (add->exb.len > 0) {
|
---|
| 4883 | if (exb_reach) {
|
---|
| 4884 | concat_opt_exact_info(&to->exb, &add->exb, enc);
|
---|
| 4885 | clear_opt_exact_info(&add->exb);
|
---|
| 4886 | }
|
---|
| 4887 | else if (exm_reach) {
|
---|
| 4888 | concat_opt_exact_info(&to->exm, &add->exb, enc);
|
---|
| 4889 | clear_opt_exact_info(&add->exb);
|
---|
| 4890 | }
|
---|
| 4891 | }
|
---|
| 4892 | select_opt_exact_info(enc, &to->exm, &add->exb);
|
---|
| 4893 | select_opt_exact_info(enc, &to->exm, &add->exm);
|
---|
| 4894 |
|
---|
| 4895 | if (to->expr.len > 0) {
|
---|
| 4896 | if (add->len.max > 0) {
|
---|
| 4897 | if (to->expr.len > (int )add->len.max)
|
---|
| 4898 | to->expr.len = (int )add->len.max;
|
---|
| 4899 |
|
---|
| 4900 | if (to->expr.mmd.max == 0)
|
---|
| 4901 | select_opt_exact_info(enc, &to->exb, &to->expr);
|
---|
| 4902 | else
|
---|
| 4903 | select_opt_exact_info(enc, &to->exm, &to->expr);
|
---|
| 4904 | }
|
---|
| 4905 | }
|
---|
| 4906 | else if (add->expr.len > 0) {
|
---|
| 4907 | copy_opt_exact_info(&to->expr, &add->expr);
|
---|
| 4908 | }
|
---|
| 4909 |
|
---|
| 4910 | select_opt_map_info(&to->map, &add->map);
|
---|
| 4911 |
|
---|
| 4912 | add_mml(&to->len, &add->len);
|
---|
| 4913 | }
|
---|
| 4914 |
|
---|
| 4915 | static void
|
---|
| 4916 | alt_merge_node_opt_info(NodeOptInfo* to, NodeOptInfo* add, OptEnv* env)
|
---|
| 4917 | {
|
---|
| 4918 | alt_merge_opt_anc_info (&to->anc, &add->anc);
|
---|
| 4919 | alt_merge_opt_exact_info(&to->exb, &add->exb, env);
|
---|
| 4920 | alt_merge_opt_exact_info(&to->exm, &add->exm, env);
|
---|
| 4921 | alt_merge_opt_exact_info(&to->expr, &add->expr, env);
|
---|
| 4922 | alt_merge_opt_map_info(env->enc, &to->map, &add->map);
|
---|
| 4923 |
|
---|
| 4924 | alt_merge_mml(&to->len, &add->len);
|
---|
| 4925 | }
|
---|
| 4926 |
|
---|
| 4927 |
|
---|
| 4928 | #define MAX_NODE_OPT_INFO_REF_COUNT 5
|
---|
| 4929 |
|
---|
| 4930 | static int
|
---|
| 4931 | optimize_node_left(Node* node, NodeOptInfo* opt, OptEnv* env)
|
---|
| 4932 | {
|
---|
| 4933 | int type;
|
---|
| 4934 | int r = 0;
|
---|
| 4935 |
|
---|
| 4936 | clear_node_opt_info(opt);
|
---|
| 4937 | set_bound_node_opt_info(opt, &env->mmd);
|
---|
| 4938 |
|
---|
| 4939 | type = NTYPE(node);
|
---|
| 4940 | switch (type) {
|
---|
| 4941 | case NT_LIST:
|
---|
| 4942 | {
|
---|
| 4943 | OptEnv nenv;
|
---|
| 4944 | NodeOptInfo nopt;
|
---|
| 4945 | Node* nd = node;
|
---|
| 4946 |
|
---|
| 4947 | copy_opt_env(&nenv, env);
|
---|
| 4948 | do {
|
---|
| 4949 | r = optimize_node_left(NCAR(nd), &nopt, &nenv);
|
---|
| 4950 | if (r == 0) {
|
---|
| 4951 | add_mml(&nenv.mmd, &nopt.len);
|
---|
| 4952 | concat_left_node_opt_info(env->enc, opt, &nopt);
|
---|
| 4953 | }
|
---|
| 4954 | } while (r == 0 && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
| 4955 | }
|
---|
| 4956 | break;
|
---|
| 4957 |
|
---|
| 4958 | case NT_ALT:
|
---|
| 4959 | {
|
---|
| 4960 | NodeOptInfo nopt;
|
---|
| 4961 | Node* nd = node;
|
---|
| 4962 |
|
---|
| 4963 | do {
|
---|
| 4964 | r = optimize_node_left(NCAR(nd), &nopt, env);
|
---|
| 4965 | if (r == 0) {
|
---|
| 4966 | if (nd == node) copy_node_opt_info(opt, &nopt);
|
---|
| 4967 | else alt_merge_node_opt_info(opt, &nopt, env);
|
---|
| 4968 | }
|
---|
| 4969 | } while ((r == 0) && IS_NOT_NULL(nd = NCDR(nd)));
|
---|
| 4970 | }
|
---|
| 4971 | break;
|
---|
| 4972 |
|
---|
| 4973 | case NT_STR:
|
---|
| 4974 | {
|
---|
| 4975 | StrNode* sn = NSTR(node);
|
---|
| 4976 | OnigDistance slen = sn->end - sn->s;
|
---|
| 4977 | int is_raw = NSTRING_IS_RAW(node);
|
---|
| 4978 |
|
---|
| 4979 | if (! NSTRING_IS_AMBIG(node)) {
|
---|
| 4980 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
| 4981 | is_raw, env->enc);
|
---|
| 4982 | opt->exb.ignore_case = 0;
|
---|
| 4983 | if (slen > 0) {
|
---|
| 4984 | add_char_opt_map_info(&opt->map, *(sn->s), env->enc);
|
---|
| 4985 | }
|
---|
| 4986 | set_mml(&opt->len, slen, slen);
|
---|
| 4987 | }
|
---|
| 4988 | else {
|
---|
| 4989 | OnigDistance max;
|
---|
| 4990 |
|
---|
| 4991 | if (NSTRING_IS_DONT_GET_OPT_INFO(node)) {
|
---|
| 4992 | int n = onigenc_strlen(env->enc, sn->s, sn->end);
|
---|
| 4993 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc) * n;
|
---|
| 4994 | }
|
---|
| 4995 | else {
|
---|
| 4996 | concat_opt_exact_info_str(&opt->exb, sn->s, sn->end,
|
---|
| 4997 | is_raw, env->enc);
|
---|
| 4998 | opt->exb.ignore_case = 1;
|
---|
| 4999 |
|
---|
| 5000 | if (slen > 0) {
|
---|
| 5001 | r = add_char_amb_opt_map_info(&opt->map, sn->s, sn->end,
|
---|
| 5002 | env->enc, env->case_fold_flag);
|
---|
| 5003 | if (r != 0) break;
|
---|
| 5004 | }
|
---|
| 5005 |
|
---|
| 5006 | max = slen;
|
---|
| 5007 | }
|
---|
| 5008 |
|
---|
| 5009 | set_mml(&opt->len, slen, max);
|
---|
| 5010 | }
|
---|
| 5011 |
|
---|
| 5012 | if ((OnigDistance )opt->exb.len == slen)
|
---|
| 5013 | opt->exb.reach_end = 1;
|
---|
| 5014 | }
|
---|
| 5015 | break;
|
---|
| 5016 |
|
---|
| 5017 | case NT_CCLASS:
|
---|
| 5018 | {
|
---|
| 5019 | int i, z;
|
---|
| 5020 | CClassNode* cc = NCCLASS(node);
|
---|
| 5021 |
|
---|
| 5022 | /* no need to check ignore case. (set in setup_tree()) */
|
---|
| 5023 |
|
---|
| 5024 | if (IS_NOT_NULL(cc->mbuf) || IS_NCCLASS_NOT(cc)) {
|
---|
| 5025 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
| 5026 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 5027 |
|
---|
| 5028 | set_mml(&opt->len, min, max);
|
---|
| 5029 | }
|
---|
| 5030 | else {
|
---|
| 5031 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 5032 | z = BITSET_AT(cc->bs, i);
|
---|
| 5033 | if ((z && !IS_NCCLASS_NOT(cc)) || (!z && IS_NCCLASS_NOT(cc))) {
|
---|
| 5034 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
| 5035 | }
|
---|
| 5036 | }
|
---|
| 5037 | set_mml(&opt->len, 1, 1);
|
---|
| 5038 | }
|
---|
| 5039 | }
|
---|
| 5040 | break;
|
---|
| 5041 |
|
---|
| 5042 | case NT_CTYPE:
|
---|
| 5043 | {
|
---|
| 5044 | int i, min, max;
|
---|
| 5045 | int maxcode;
|
---|
| 5046 |
|
---|
| 5047 | max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 5048 |
|
---|
| 5049 | if (max == 1) {
|
---|
| 5050 | min = 1;
|
---|
| 5051 |
|
---|
| 5052 | maxcode = NCTYPE(node)->ascii_range ? 0x80 : SINGLE_BYTE_SIZE;
|
---|
| 5053 | switch (NCTYPE(node)->ctype) {
|
---|
| 5054 | case ONIGENC_CTYPE_WORD:
|
---|
| 5055 | if (NCTYPE(node)->not != 0) {
|
---|
| 5056 | for (i = 0; i < SINGLE_BYTE_SIZE; i++) {
|
---|
| 5057 | if (! ONIGENC_IS_CODE_WORD(env->enc, i) || i >= maxcode) {
|
---|
| 5058 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
| 5059 | }
|
---|
| 5060 | }
|
---|
| 5061 | }
|
---|
| 5062 | else {
|
---|
| 5063 | for (i = 0; i < maxcode; i++) {
|
---|
| 5064 | if (ONIGENC_IS_CODE_WORD(env->enc, i)) {
|
---|
| 5065 | add_char_opt_map_info(&opt->map, (UChar )i, env->enc);
|
---|
| 5066 | }
|
---|
| 5067 | }
|
---|
| 5068 | }
|
---|
| 5069 | break;
|
---|
| 5070 | }
|
---|
| 5071 | }
|
---|
| 5072 | else {
|
---|
| 5073 | min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
| 5074 | }
|
---|
| 5075 | set_mml(&opt->len, min, max);
|
---|
| 5076 | }
|
---|
| 5077 | break;
|
---|
| 5078 |
|
---|
| 5079 | case NT_CANY:
|
---|
| 5080 | {
|
---|
| 5081 | OnigDistance min = ONIGENC_MBC_MINLEN(env->enc);
|
---|
| 5082 | OnigDistance max = ONIGENC_MBC_MAXLEN_DIST(env->enc);
|
---|
| 5083 | set_mml(&opt->len, min, max);
|
---|
| 5084 | }
|
---|
| 5085 | break;
|
---|
| 5086 |
|
---|
| 5087 | case NT_ANCHOR:
|
---|
| 5088 | switch (NANCHOR(node)->type) {
|
---|
| 5089 | case ANCHOR_BEGIN_BUF:
|
---|
| 5090 | case ANCHOR_BEGIN_POSITION:
|
---|
| 5091 | case ANCHOR_BEGIN_LINE:
|
---|
| 5092 | case ANCHOR_END_BUF:
|
---|
| 5093 | case ANCHOR_SEMI_END_BUF:
|
---|
| 5094 | case ANCHOR_END_LINE:
|
---|
| 5095 | case ANCHOR_LOOK_BEHIND: /* just for (?<=x).* */
|
---|
| 5096 | case ANCHOR_PREC_READ_NOT: /* just for (?!x).* */
|
---|
| 5097 | add_opt_anc_info(&opt->anc, NANCHOR(node)->type);
|
---|
| 5098 | break;
|
---|
| 5099 |
|
---|
| 5100 | case ANCHOR_PREC_READ:
|
---|
| 5101 | {
|
---|
| 5102 | NodeOptInfo nopt;
|
---|
| 5103 |
|
---|
| 5104 | r = optimize_node_left(NANCHOR(node)->target, &nopt, env);
|
---|
| 5105 | if (r == 0) {
|
---|
| 5106 | if (nopt.exb.len > 0)
|
---|
| 5107 | copy_opt_exact_info(&opt->expr, &nopt.exb);
|
---|
| 5108 | else if (nopt.exm.len > 0)
|
---|
| 5109 | copy_opt_exact_info(&opt->expr, &nopt.exm);
|
---|
| 5110 |
|
---|
| 5111 | opt->expr.reach_end = 0;
|
---|
| 5112 |
|
---|
| 5113 | if (nopt.map.value > 0)
|
---|
| 5114 | copy_opt_map_info(&opt->map, &nopt.map);
|
---|
| 5115 | }
|
---|
| 5116 | }
|
---|
| 5117 | break;
|
---|
| 5118 |
|
---|
| 5119 | case ANCHOR_LOOK_BEHIND_NOT:
|
---|
| 5120 | break;
|
---|
| 5121 | }
|
---|
| 5122 | break;
|
---|
| 5123 |
|
---|
| 5124 | case NT_BREF:
|
---|
| 5125 | {
|
---|
| 5126 | int i;
|
---|
| 5127 | int* backs;
|
---|
| 5128 | OnigDistance min, max, tmin, tmax;
|
---|
| 5129 | Node** nodes = SCANENV_MEM_NODES(env->scan_env);
|
---|
| 5130 | BRefNode* br = NBREF(node);
|
---|
| 5131 |
|
---|
| 5132 | if (br->state & NST_RECURSION) {
|
---|
| 5133 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
| 5134 | break;
|
---|
| 5135 | }
|
---|
| 5136 | backs = BACKREFS_P(br);
|
---|
| 5137 | r = get_min_match_length(nodes[backs[0]], &min, env->scan_env);
|
---|
| 5138 | if (r != 0) break;
|
---|
| 5139 | r = get_max_match_length(nodes[backs[0]], &max, env->scan_env);
|
---|
| 5140 | if (r != 0) break;
|
---|
| 5141 | for (i = 1; i < br->back_num; i++) {
|
---|
| 5142 | r = get_min_match_length(nodes[backs[i]], &tmin, env->scan_env);
|
---|
| 5143 | if (r != 0) break;
|
---|
| 5144 | r = get_max_match_length(nodes[backs[i]], &tmax, env->scan_env);
|
---|
| 5145 | if (r != 0) break;
|
---|
| 5146 | if (min > tmin) min = tmin;
|
---|
| 5147 | if (max < tmax) max = tmax;
|
---|
| 5148 | }
|
---|
| 5149 | if (r == 0) set_mml(&opt->len, min, max);
|
---|
| 5150 | }
|
---|
| 5151 | break;
|
---|
| 5152 |
|
---|
| 5153 | #ifdef USE_SUBEXP_CALL
|
---|
| 5154 | case NT_CALL:
|
---|
| 5155 | if (IS_CALL_RECURSION(NCALL(node)))
|
---|
| 5156 | set_mml(&opt->len, 0, ONIG_INFINITE_DISTANCE);
|
---|
| 5157 | else {
|
---|
| 5158 | OnigOptionType save = env->options;
|
---|
| 5159 | env->options = NENCLOSE(NCALL(node)->target)->option;
|
---|
| 5160 | r = optimize_node_left(NCALL(node)->target, opt, env);
|
---|
| 5161 | env->options = save;
|
---|
| 5162 | }
|
---|
| 5163 | break;
|
---|
| 5164 | #endif
|
---|
| 5165 |
|
---|
| 5166 | case NT_QTFR:
|
---|
| 5167 | {
|
---|
| 5168 | int i;
|
---|
| 5169 | OnigDistance min, max;
|
---|
| 5170 | NodeOptInfo nopt;
|
---|
| 5171 | QtfrNode* qn = NQTFR(node);
|
---|
| 5172 |
|
---|
| 5173 | r = optimize_node_left(qn->target, &nopt, env);
|
---|
| 5174 | if (r) break;
|
---|
| 5175 |
|
---|
| 5176 | if (/*qn->lower == 0 &&*/ IS_REPEAT_INFINITE(qn->upper)) {
|
---|
| 5177 | if (env->mmd.max == 0 &&
|
---|
| 5178 | NTYPE(qn->target) == NT_CANY && qn->greedy) {
|
---|
| 5179 | if (IS_MULTILINE(env->options))
|
---|
| 5180 | /* implicit anchor: /.*a/ ==> /\A.*a/ */
|
---|
| 5181 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_ML);
|
---|
| 5182 | else
|
---|
| 5183 | add_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR);
|
---|
| 5184 | }
|
---|
| 5185 | }
|
---|
| 5186 | else {
|
---|
| 5187 | if (qn->lower > 0) {
|
---|
| 5188 | copy_node_opt_info(opt, &nopt);
|
---|
| 5189 | if (nopt.exb.len > 0) {
|
---|
| 5190 | if (nopt.exb.reach_end) {
|
---|
| 5191 | for (i = 2; i <= qn->lower &&
|
---|
| 5192 | ! is_full_opt_exact_info(&opt->exb); i++) {
|
---|
| 5193 | concat_opt_exact_info(&opt->exb, &nopt.exb, env->enc);
|
---|
| 5194 | }
|
---|
| 5195 | if (i < qn->lower) {
|
---|
| 5196 | opt->exb.reach_end = 0;
|
---|
| 5197 | }
|
---|
| 5198 | }
|
---|
| 5199 | }
|
---|
| 5200 |
|
---|
| 5201 | if (qn->lower != qn->upper) {
|
---|
| 5202 | opt->exb.reach_end = 0;
|
---|
| 5203 | opt->exm.reach_end = 0;
|
---|
| 5204 | }
|
---|
| 5205 | if (qn->lower > 1)
|
---|
| 5206 | opt->exm.reach_end = 0;
|
---|
| 5207 | }
|
---|
| 5208 | }
|
---|
| 5209 |
|
---|
| 5210 | min = distance_multiply(nopt.len.min, qn->lower);
|
---|
| 5211 | if (IS_REPEAT_INFINITE(qn->upper))
|
---|
| 5212 | max = (nopt.len.max > 0 ? ONIG_INFINITE_DISTANCE : 0);
|
---|
| 5213 | else
|
---|
| 5214 | max = distance_multiply(nopt.len.max, qn->upper);
|
---|
| 5215 |
|
---|
| 5216 | set_mml(&opt->len, min, max);
|
---|
| 5217 | }
|
---|
| 5218 | break;
|
---|
| 5219 |
|
---|
| 5220 | case NT_ENCLOSE:
|
---|
| 5221 | {
|
---|
| 5222 | EncloseNode* en = NENCLOSE(node);
|
---|
| 5223 |
|
---|
| 5224 | switch (en->type) {
|
---|
| 5225 | case ENCLOSE_OPTION:
|
---|
| 5226 | {
|
---|
| 5227 | OnigOptionType save = env->options;
|
---|
| 5228 |
|
---|
| 5229 | env->options = en->option;
|
---|
| 5230 | r = optimize_node_left(en->target, opt, env);
|
---|
| 5231 | env->options = save;
|
---|
| 5232 | }
|
---|
| 5233 | break;
|
---|
| 5234 |
|
---|
| 5235 | case ENCLOSE_MEMORY:
|
---|
| 5236 | #ifdef USE_SUBEXP_CALL
|
---|
| 5237 | en->opt_count++;
|
---|
| 5238 | if (en->opt_count > MAX_NODE_OPT_INFO_REF_COUNT) {
|
---|
| 5239 | OnigDistance min, max;
|
---|
| 5240 |
|
---|
| 5241 | min = 0;
|
---|
| 5242 | max = ONIG_INFINITE_DISTANCE;
|
---|
| 5243 | if (IS_ENCLOSE_MIN_FIXED(en)) min = en->min_len;
|
---|
| 5244 | if (IS_ENCLOSE_MAX_FIXED(en)) max = en->max_len;
|
---|
| 5245 | set_mml(&opt->len, min, max);
|
---|
| 5246 | }
|
---|
| 5247 | else
|
---|
| 5248 | #endif
|
---|
| 5249 | {
|
---|
| 5250 | r = optimize_node_left(en->target, opt, env);
|
---|
| 5251 |
|
---|
| 5252 | if (is_set_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK)) {
|
---|
| 5253 | if (BIT_STATUS_AT(env->scan_env->backrefed_mem, en->regnum))
|
---|
| 5254 | remove_opt_anc_info(&opt->anc, ANCHOR_ANYCHAR_STAR_MASK);
|
---|
| 5255 | }
|
---|
| 5256 | }
|
---|
| 5257 | break;
|
---|
| 5258 |
|
---|
| 5259 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 5260 | case ENCLOSE_CONDITION:
|
---|
| 5261 | r = optimize_node_left(en->target, opt, env);
|
---|
| 5262 | break;
|
---|
| 5263 | }
|
---|
| 5264 | }
|
---|
| 5265 | break;
|
---|
| 5266 |
|
---|
| 5267 | default:
|
---|
| 5268 | #ifdef ONIG_DEBUG
|
---|
| 5269 | fprintf(stderr, "optimize_node_left: undefined node type %d\n",
|
---|
| 5270 | NTYPE(node));
|
---|
| 5271 | #endif
|
---|
| 5272 | r = ONIGERR_TYPE_BUG;
|
---|
| 5273 | break;
|
---|
| 5274 | }
|
---|
| 5275 |
|
---|
| 5276 | return r;
|
---|
| 5277 | }
|
---|
| 5278 |
|
---|
| 5279 | static int
|
---|
| 5280 | set_optimize_exact_info(regex_t* reg, OptExactInfo* e)
|
---|
| 5281 | {
|
---|
| 5282 | int r;
|
---|
| 5283 | int allow_reverse;
|
---|
| 5284 |
|
---|
| 5285 | if (e->len == 0) return 0;
|
---|
| 5286 |
|
---|
| 5287 | reg->exact = (UChar* )xmalloc(e->len);
|
---|
| 5288 | CHECK_NULL_RETURN_MEMERR(reg->exact);
|
---|
| 5289 | xmemcpy(reg->exact, e->s, e->len);
|
---|
| 5290 | reg->exact_end = reg->exact + e->len;
|
---|
| 5291 |
|
---|
| 5292 | allow_reverse =
|
---|
| 5293 | ONIGENC_IS_ALLOWED_REVERSE_MATCH(reg->enc, reg->exact, reg->exact_end);
|
---|
| 5294 |
|
---|
| 5295 | if (e->ignore_case > 0) {
|
---|
| 5296 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
| 5297 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
| 5298 | reg->map, &(reg->int_map), 1);
|
---|
| 5299 | if (r == 0) {
|
---|
| 5300 | reg->optimize = (allow_reverse != 0
|
---|
| 5301 | ? ONIG_OPTIMIZE_EXACT_BM_IC : ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC);
|
---|
| 5302 | }
|
---|
| 5303 | else {
|
---|
| 5304 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
| 5305 | }
|
---|
| 5306 | }
|
---|
| 5307 | else {
|
---|
| 5308 | reg->optimize = ONIG_OPTIMIZE_EXACT_IC;
|
---|
| 5309 | }
|
---|
| 5310 | }
|
---|
| 5311 | else {
|
---|
| 5312 | if (e->len >= 3 || (e->len >= 2 && allow_reverse)) {
|
---|
| 5313 | r = set_bm_skip(reg->exact, reg->exact_end, reg,
|
---|
| 5314 | reg->map, &(reg->int_map), 0);
|
---|
| 5315 | if (r) return r;
|
---|
| 5316 |
|
---|
| 5317 | reg->optimize = (allow_reverse != 0
|
---|
| 5318 | ? ONIG_OPTIMIZE_EXACT_BM : ONIG_OPTIMIZE_EXACT_BM_NOT_REV);
|
---|
| 5319 | }
|
---|
| 5320 | else {
|
---|
| 5321 | reg->optimize = ONIG_OPTIMIZE_EXACT;
|
---|
| 5322 | }
|
---|
| 5323 | }
|
---|
| 5324 |
|
---|
| 5325 | reg->dmin = e->mmd.min;
|
---|
| 5326 | reg->dmax = e->mmd.max;
|
---|
| 5327 |
|
---|
| 5328 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
| 5329 | reg->threshold_len = (int )(reg->dmin + (reg->exact_end - reg->exact));
|
---|
| 5330 | }
|
---|
| 5331 |
|
---|
| 5332 | return 0;
|
---|
| 5333 | }
|
---|
| 5334 |
|
---|
| 5335 | static void
|
---|
| 5336 | set_optimize_map_info(regex_t* reg, OptMapInfo* m)
|
---|
| 5337 | {
|
---|
| 5338 | int i;
|
---|
| 5339 |
|
---|
| 5340 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
| 5341 | reg->map[i] = m->map[i];
|
---|
| 5342 |
|
---|
| 5343 | reg->optimize = ONIG_OPTIMIZE_MAP;
|
---|
| 5344 | reg->dmin = m->mmd.min;
|
---|
| 5345 | reg->dmax = m->mmd.max;
|
---|
| 5346 |
|
---|
| 5347 | if (reg->dmin != ONIG_INFINITE_DISTANCE) {
|
---|
| 5348 | reg->threshold_len = (int )(reg->dmin + 1);
|
---|
| 5349 | }
|
---|
| 5350 | }
|
---|
| 5351 |
|
---|
| 5352 | static void
|
---|
| 5353 | set_sub_anchor(regex_t* reg, OptAncInfo* anc)
|
---|
| 5354 | {
|
---|
| 5355 | reg->sub_anchor |= anc->left_anchor & ANCHOR_BEGIN_LINE;
|
---|
| 5356 | reg->sub_anchor |= anc->right_anchor & ANCHOR_END_LINE;
|
---|
| 5357 | }
|
---|
| 5358 |
|
---|
| 5359 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
| 5360 | static void print_optimize_info(FILE* f, regex_t* reg);
|
---|
| 5361 | #endif
|
---|
| 5362 |
|
---|
| 5363 | static int
|
---|
| 5364 | set_optimize_info_from_tree(Node* node, regex_t* reg, ScanEnv* scan_env)
|
---|
| 5365 | {
|
---|
| 5366 |
|
---|
| 5367 | int r;
|
---|
| 5368 | NodeOptInfo opt;
|
---|
| 5369 | OptEnv env;
|
---|
| 5370 |
|
---|
| 5371 | env.enc = reg->enc;
|
---|
| 5372 | env.options = reg->options;
|
---|
| 5373 | env.case_fold_flag = reg->case_fold_flag;
|
---|
| 5374 | env.scan_env = scan_env;
|
---|
| 5375 | clear_mml(&env.mmd);
|
---|
| 5376 |
|
---|
| 5377 | r = optimize_node_left(node, &opt, &env);
|
---|
| 5378 | if (r) return r;
|
---|
| 5379 |
|
---|
| 5380 | reg->anchor = opt.anc.left_anchor & (ANCHOR_BEGIN_BUF |
|
---|
| 5381 | ANCHOR_BEGIN_POSITION | ANCHOR_ANYCHAR_STAR | ANCHOR_ANYCHAR_STAR_ML |
|
---|
| 5382 | ANCHOR_LOOK_BEHIND);
|
---|
| 5383 |
|
---|
| 5384 | reg->anchor |= opt.anc.right_anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF |
|
---|
| 5385 | ANCHOR_PREC_READ_NOT);
|
---|
| 5386 |
|
---|
| 5387 | if (reg->anchor & (ANCHOR_END_BUF | ANCHOR_SEMI_END_BUF)) {
|
---|
| 5388 | reg->anchor_dmin = opt.len.min;
|
---|
| 5389 | reg->anchor_dmax = opt.len.max;
|
---|
| 5390 | }
|
---|
| 5391 |
|
---|
| 5392 | if (opt.exb.len > 0 || opt.exm.len > 0) {
|
---|
| 5393 | select_opt_exact_info(reg->enc, &opt.exb, &opt.exm);
|
---|
| 5394 | if (opt.map.value > 0 &&
|
---|
| 5395 | comp_opt_exact_or_map_info(&opt.exb, &opt.map) > 0) {
|
---|
| 5396 | goto set_map;
|
---|
| 5397 | }
|
---|
| 5398 | else {
|
---|
| 5399 | r = set_optimize_exact_info(reg, &opt.exb);
|
---|
| 5400 | set_sub_anchor(reg, &opt.exb.anc);
|
---|
| 5401 | }
|
---|
| 5402 | }
|
---|
| 5403 | else if (opt.map.value > 0) {
|
---|
| 5404 | set_map:
|
---|
| 5405 | set_optimize_map_info(reg, &opt.map);
|
---|
| 5406 | set_sub_anchor(reg, &opt.map.anc);
|
---|
| 5407 | }
|
---|
| 5408 | else {
|
---|
| 5409 | reg->sub_anchor |= opt.anc.left_anchor & ANCHOR_BEGIN_LINE;
|
---|
| 5410 | if (opt.len.max == 0)
|
---|
| 5411 | reg->sub_anchor |= opt.anc.right_anchor & ANCHOR_END_LINE;
|
---|
| 5412 | }
|
---|
| 5413 |
|
---|
| 5414 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
| 5415 | print_optimize_info(stderr, reg);
|
---|
| 5416 | #endif
|
---|
| 5417 | return r;
|
---|
| 5418 | }
|
---|
| 5419 |
|
---|
| 5420 | static void
|
---|
| 5421 | clear_optimize_info(regex_t* reg)
|
---|
| 5422 | {
|
---|
| 5423 | reg->optimize = ONIG_OPTIMIZE_NONE;
|
---|
| 5424 | reg->anchor = 0;
|
---|
| 5425 | reg->anchor_dmin = 0;
|
---|
| 5426 | reg->anchor_dmax = 0;
|
---|
| 5427 | reg->sub_anchor = 0;
|
---|
| 5428 | reg->exact_end = (UChar* )NULL;
|
---|
| 5429 | reg->threshold_len = 0;
|
---|
| 5430 | if (IS_NOT_NULL(reg->exact)) {
|
---|
| 5431 | xfree(reg->exact);
|
---|
| 5432 | reg->exact = (UChar* )NULL;
|
---|
| 5433 | }
|
---|
| 5434 | }
|
---|
| 5435 |
|
---|
| 5436 | #ifdef ONIG_DEBUG
|
---|
| 5437 |
|
---|
| 5438 | static void print_enc_string(FILE* fp, OnigEncoding enc,
|
---|
| 5439 | const UChar *s, const UChar *end)
|
---|
| 5440 | {
|
---|
| 5441 | fprintf(fp, "\nPATTERN: /");
|
---|
| 5442 |
|
---|
| 5443 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
| 5444 | const UChar *p;
|
---|
| 5445 | OnigCodePoint code;
|
---|
| 5446 |
|
---|
| 5447 | p = s;
|
---|
| 5448 | while (p < end) {
|
---|
| 5449 | code = ONIGENC_MBC_TO_CODE(enc, p, end);
|
---|
| 5450 | if (code >= 0x80) {
|
---|
| 5451 | fprintf(fp, " 0x%04x ", (int )code);
|
---|
| 5452 | }
|
---|
| 5453 | else {
|
---|
| 5454 | fputc((int )code, fp);
|
---|
| 5455 | }
|
---|
| 5456 |
|
---|
| 5457 | p += enclen(enc, p);
|
---|
| 5458 | }
|
---|
| 5459 | }
|
---|
| 5460 | else {
|
---|
| 5461 | while (s < end) {
|
---|
| 5462 | fputc((int )*s, fp);
|
---|
| 5463 | s++;
|
---|
| 5464 | }
|
---|
| 5465 | }
|
---|
| 5466 |
|
---|
| 5467 | fprintf(fp, "/ (%s)\n", enc->name);
|
---|
| 5468 | }
|
---|
| 5469 | #endif /* ONIG_DEBUG */
|
---|
| 5470 |
|
---|
| 5471 | #if defined(ONIG_DEBUG_COMPILE) || defined(ONIG_DEBUG_MATCH)
|
---|
| 5472 | static void
|
---|
| 5473 | print_distance_range(FILE* f, OnigDistance a, OnigDistance b)
|
---|
| 5474 | {
|
---|
| 5475 | if (a == ONIG_INFINITE_DISTANCE)
|
---|
| 5476 | fputs("inf", f);
|
---|
| 5477 | else
|
---|
| 5478 | fprintf(f, "(%"PRIuPTR")", a);
|
---|
| 5479 |
|
---|
| 5480 | fputs("-", f);
|
---|
| 5481 |
|
---|
| 5482 | if (b == ONIG_INFINITE_DISTANCE)
|
---|
| 5483 | fputs("inf", f);
|
---|
| 5484 | else
|
---|
| 5485 | fprintf(f, "(%"PRIuPTR")", b);
|
---|
| 5486 | }
|
---|
| 5487 |
|
---|
| 5488 | static void
|
---|
| 5489 | print_anchor(FILE* f, int anchor)
|
---|
| 5490 | {
|
---|
| 5491 | int q = 0;
|
---|
| 5492 |
|
---|
| 5493 | fprintf(f, "[");
|
---|
| 5494 |
|
---|
| 5495 | if (anchor & ANCHOR_BEGIN_BUF) {
|
---|
| 5496 | fprintf(f, "begin-buf");
|
---|
| 5497 | q = 1;
|
---|
| 5498 | }
|
---|
| 5499 | if (anchor & ANCHOR_BEGIN_LINE) {
|
---|
| 5500 | if (q) fprintf(f, ", ");
|
---|
| 5501 | q = 1;
|
---|
| 5502 | fprintf(f, "begin-line");
|
---|
| 5503 | }
|
---|
| 5504 | if (anchor & ANCHOR_BEGIN_POSITION) {
|
---|
| 5505 | if (q) fprintf(f, ", ");
|
---|
| 5506 | q = 1;
|
---|
| 5507 | fprintf(f, "begin-pos");
|
---|
| 5508 | }
|
---|
| 5509 | if (anchor & ANCHOR_END_BUF) {
|
---|
| 5510 | if (q) fprintf(f, ", ");
|
---|
| 5511 | q = 1;
|
---|
| 5512 | fprintf(f, "end-buf");
|
---|
| 5513 | }
|
---|
| 5514 | if (anchor & ANCHOR_SEMI_END_BUF) {
|
---|
| 5515 | if (q) fprintf(f, ", ");
|
---|
| 5516 | q = 1;
|
---|
| 5517 | fprintf(f, "semi-end-buf");
|
---|
| 5518 | }
|
---|
| 5519 | if (anchor & ANCHOR_END_LINE) {
|
---|
| 5520 | if (q) fprintf(f, ", ");
|
---|
| 5521 | q = 1;
|
---|
| 5522 | fprintf(f, "end-line");
|
---|
| 5523 | }
|
---|
| 5524 | if (anchor & ANCHOR_ANYCHAR_STAR) {
|
---|
| 5525 | if (q) fprintf(f, ", ");
|
---|
| 5526 | q = 1;
|
---|
| 5527 | fprintf(f, "anychar-star");
|
---|
| 5528 | }
|
---|
| 5529 | if (anchor & ANCHOR_ANYCHAR_STAR_ML) {
|
---|
| 5530 | if (q) fprintf(f, ", ");
|
---|
| 5531 | fprintf(f, "anychar-star-ml");
|
---|
| 5532 | }
|
---|
| 5533 |
|
---|
| 5534 | fprintf(f, "]");
|
---|
| 5535 | }
|
---|
| 5536 |
|
---|
| 5537 | static void
|
---|
| 5538 | print_optimize_info(FILE* f, regex_t* reg)
|
---|
| 5539 | {
|
---|
| 5540 | static const char* on[] = { "NONE", "EXACT", "EXACT_BM", "EXACT_BM_NOT_REV",
|
---|
| 5541 | "EXACT_IC", "MAP",
|
---|
| 5542 | "EXACT_BM_IC", "EXACT_BM_NOT_REV_IC" };
|
---|
| 5543 |
|
---|
| 5544 | fprintf(f, "optimize: %s\n", on[reg->optimize]);
|
---|
| 5545 | fprintf(f, " anchor: "); print_anchor(f, reg->anchor);
|
---|
| 5546 | if ((reg->anchor & ANCHOR_END_BUF_MASK) != 0)
|
---|
| 5547 | print_distance_range(f, reg->anchor_dmin, reg->anchor_dmax);
|
---|
| 5548 | fprintf(f, "\n");
|
---|
| 5549 |
|
---|
| 5550 | if (reg->optimize) {
|
---|
| 5551 | fprintf(f, " sub anchor: "); print_anchor(f, reg->sub_anchor);
|
---|
| 5552 | fprintf(f, "\n");
|
---|
| 5553 | }
|
---|
| 5554 | fprintf(f, "\n");
|
---|
| 5555 |
|
---|
| 5556 | if (reg->exact) {
|
---|
| 5557 | UChar *p;
|
---|
| 5558 | fprintf(f, "exact: [");
|
---|
| 5559 | for (p = reg->exact; p < reg->exact_end; p++) {
|
---|
| 5560 | fputc(*p, f);
|
---|
| 5561 | }
|
---|
| 5562 | fprintf(f, "]: length: %"PRIdPTR"\n", (reg->exact_end - reg->exact));
|
---|
| 5563 | }
|
---|
| 5564 | else if (reg->optimize & ONIG_OPTIMIZE_MAP) {
|
---|
| 5565 | int c, i, n = 0;
|
---|
| 5566 |
|
---|
| 5567 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
|
---|
| 5568 | if (reg->map[i]) n++;
|
---|
| 5569 |
|
---|
| 5570 | fprintf(f, "map: n=%d\n", n);
|
---|
| 5571 | if (n > 0) {
|
---|
| 5572 | c = 0;
|
---|
| 5573 | fputc('[', f);
|
---|
| 5574 | for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++) {
|
---|
| 5575 | if (reg->map[i] != 0) {
|
---|
| 5576 | if (c > 0) fputs(", ", f);
|
---|
| 5577 | c++;
|
---|
| 5578 | if (ONIGENC_MBC_MAXLEN(reg->enc) == 1 &&
|
---|
| 5579 | ONIGENC_IS_CODE_PRINT(reg->enc, (OnigCodePoint )i))
|
---|
| 5580 | fputc(i, f);
|
---|
| 5581 | else
|
---|
| 5582 | fprintf(f, "%d", i);
|
---|
| 5583 | }
|
---|
| 5584 | }
|
---|
| 5585 | fprintf(f, "]\n");
|
---|
| 5586 | }
|
---|
| 5587 | }
|
---|
| 5588 | }
|
---|
| 5589 | #endif /* ONIG_DEBUG_COMPILE || ONIG_DEBUG_MATCH */
|
---|
| 5590 |
|
---|
| 5591 |
|
---|
| 5592 | extern void
|
---|
| 5593 | onig_free_body(regex_t* reg)
|
---|
| 5594 | {
|
---|
| 5595 | if (IS_NOT_NULL(reg)) {
|
---|
| 5596 | if (IS_NOT_NULL(reg->p)) xfree(reg->p);
|
---|
| 5597 | if (IS_NOT_NULL(reg->exact)) xfree(reg->exact);
|
---|
| 5598 | if (IS_NOT_NULL(reg->int_map)) xfree(reg->int_map);
|
---|
| 5599 | if (IS_NOT_NULL(reg->int_map_backward)) xfree(reg->int_map_backward);
|
---|
| 5600 | if (IS_NOT_NULL(reg->repeat_range)) xfree(reg->repeat_range);
|
---|
| 5601 | if (IS_NOT_NULL(reg->chain)) onig_free(reg->chain);
|
---|
| 5602 |
|
---|
| 5603 | #ifdef USE_NAMED_GROUP
|
---|
| 5604 | onig_names_free(reg);
|
---|
| 5605 | #endif
|
---|
| 5606 | }
|
---|
| 5607 | }
|
---|
| 5608 |
|
---|
| 5609 | extern void
|
---|
| 5610 | onig_free(regex_t* reg)
|
---|
| 5611 | {
|
---|
| 5612 | if (IS_NOT_NULL(reg)) {
|
---|
| 5613 | onig_free_body(reg);
|
---|
| 5614 | xfree(reg);
|
---|
| 5615 | }
|
---|
| 5616 | }
|
---|
| 5617 |
|
---|
| 5618 | size_t
|
---|
| 5619 | onig_memsize(const regex_t *reg)
|
---|
| 5620 | {
|
---|
| 5621 | size_t size = sizeof(regex_t);
|
---|
| 5622 | if (IS_NULL(reg)) return 0;
|
---|
| 5623 | if (IS_NOT_NULL(reg->p)) size += reg->alloc;
|
---|
| 5624 | if (IS_NOT_NULL(reg->exact)) size += reg->exact_end - reg->exact;
|
---|
| 5625 | if (IS_NOT_NULL(reg->int_map)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
| 5626 | if (IS_NOT_NULL(reg->int_map_backward)) size += sizeof(int) * ONIG_CHAR_TABLE_SIZE;
|
---|
| 5627 | if (IS_NOT_NULL(reg->repeat_range)) size += reg->repeat_range_alloc * sizeof(OnigRepeatRange);
|
---|
| 5628 | if (IS_NOT_NULL(reg->chain)) size += onig_memsize(reg->chain);
|
---|
| 5629 |
|
---|
| 5630 | return size;
|
---|
| 5631 | }
|
---|
| 5632 |
|
---|
| 5633 | size_t
|
---|
| 5634 | onig_region_memsize(const OnigRegion *regs)
|
---|
| 5635 | {
|
---|
| 5636 | size_t size = sizeof(*regs);
|
---|
| 5637 | if (IS_NULL(regs)) return 0;
|
---|
| 5638 | size += regs->allocated * (sizeof(*regs->beg) + sizeof(*regs->end));
|
---|
| 5639 | return size;
|
---|
| 5640 | }
|
---|
| 5641 |
|
---|
| 5642 | #define REGEX_TRANSFER(to,from) do {\
|
---|
| 5643 | (to)->state = ONIG_STATE_MODIFY;\
|
---|
| 5644 | onig_free_body(to);\
|
---|
| 5645 | xmemcpy(to, from, sizeof(regex_t));\
|
---|
| 5646 | xfree(from);\
|
---|
| 5647 | } while (0)
|
---|
| 5648 |
|
---|
| 5649 | extern void
|
---|
| 5650 | onig_transfer(regex_t* to, regex_t* from)
|
---|
| 5651 | {
|
---|
| 5652 | THREAD_ATOMIC_START;
|
---|
| 5653 | REGEX_TRANSFER(to, from);
|
---|
| 5654 | THREAD_ATOMIC_END;
|
---|
| 5655 | }
|
---|
| 5656 |
|
---|
| 5657 | #define REGEX_CHAIN_HEAD(reg) do {\
|
---|
| 5658 | while (IS_NOT_NULL((reg)->chain)) {\
|
---|
| 5659 | (reg) = (reg)->chain;\
|
---|
| 5660 | }\
|
---|
| 5661 | } while (0)
|
---|
| 5662 |
|
---|
| 5663 | extern void
|
---|
| 5664 | onig_chain_link_add(regex_t* to, regex_t* add)
|
---|
| 5665 | {
|
---|
| 5666 | THREAD_ATOMIC_START;
|
---|
| 5667 | REGEX_CHAIN_HEAD(to);
|
---|
| 5668 | to->chain = add;
|
---|
| 5669 | THREAD_ATOMIC_END;
|
---|
| 5670 | }
|
---|
| 5671 |
|
---|
| 5672 | extern void
|
---|
| 5673 | onig_chain_reduce(regex_t* reg)
|
---|
| 5674 | {
|
---|
| 5675 | regex_t *head, *prev;
|
---|
| 5676 |
|
---|
| 5677 | prev = reg;
|
---|
| 5678 | head = prev->chain;
|
---|
| 5679 | if (IS_NOT_NULL(head)) {
|
---|
| 5680 | reg->state = ONIG_STATE_MODIFY;
|
---|
| 5681 | while (IS_NOT_NULL(head->chain)) {
|
---|
| 5682 | prev = head;
|
---|
| 5683 | head = head->chain;
|
---|
| 5684 | }
|
---|
| 5685 | prev->chain = (regex_t* )NULL;
|
---|
| 5686 | REGEX_TRANSFER(reg, head);
|
---|
| 5687 | }
|
---|
| 5688 | }
|
---|
| 5689 |
|
---|
| 5690 | #ifdef ONIG_DEBUG_COMPILE
|
---|
| 5691 | static void print_compiled_byte_code_list P_((FILE* f, regex_t* reg));
|
---|
| 5692 | #endif
|
---|
| 5693 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
| 5694 | static void print_tree P_((FILE* f, Node* node));
|
---|
| 5695 | #endif
|
---|
| 5696 |
|
---|
| 5697 | extern int
|
---|
| 5698 | onig_compile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
| 5699 | OnigErrorInfo* einfo)
|
---|
| 5700 | {
|
---|
| 5701 | #define COMPILE_INIT_SIZE 20
|
---|
| 5702 |
|
---|
| 5703 | int r;
|
---|
| 5704 | OnigDistance init_size;
|
---|
| 5705 | Node* root;
|
---|
| 5706 | ScanEnv scan_env = {0};
|
---|
| 5707 | #ifdef USE_SUBEXP_CALL
|
---|
| 5708 | UnsetAddrList uslist;
|
---|
| 5709 | #endif
|
---|
| 5710 |
|
---|
| 5711 | if (IS_NOT_NULL(einfo)) einfo->par = (UChar* )NULL;
|
---|
| 5712 |
|
---|
| 5713 | reg->state = ONIG_STATE_COMPILING;
|
---|
| 5714 |
|
---|
| 5715 | #ifdef ONIG_DEBUG
|
---|
| 5716 | print_enc_string(stderr, reg->enc, pattern, pattern_end);
|
---|
| 5717 | #endif
|
---|
| 5718 |
|
---|
| 5719 | if (reg->alloc == 0) {
|
---|
| 5720 | init_size = (pattern_end - pattern) * 2;
|
---|
| 5721 | if (init_size <= 0) init_size = COMPILE_INIT_SIZE;
|
---|
| 5722 | r = BBUF_INIT(reg, init_size);
|
---|
| 5723 | if (r != 0) goto end;
|
---|
| 5724 | }
|
---|
| 5725 | else
|
---|
| 5726 | reg->used = 0;
|
---|
| 5727 |
|
---|
| 5728 | reg->num_mem = 0;
|
---|
| 5729 | reg->num_repeat = 0;
|
---|
| 5730 | reg->num_null_check = 0;
|
---|
| 5731 | reg->repeat_range_alloc = 0;
|
---|
| 5732 | reg->repeat_range = (OnigRepeatRange* )NULL;
|
---|
| 5733 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 5734 | reg->num_comb_exp_check = 0;
|
---|
| 5735 | #endif
|
---|
| 5736 |
|
---|
| 5737 | r = onig_parse_make_tree(&root, pattern, pattern_end, reg, &scan_env);
|
---|
| 5738 | if (r != 0) goto err;
|
---|
| 5739 |
|
---|
| 5740 | #ifdef USE_NAMED_GROUP
|
---|
| 5741 | /* mixed use named group and no-named group */
|
---|
| 5742 | if (scan_env.num_named > 0 &&
|
---|
| 5743 | IS_SYNTAX_BV(scan_env.syntax, ONIG_SYN_CAPTURE_ONLY_NAMED_GROUP) &&
|
---|
| 5744 | !ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 5745 | if (scan_env.num_named != scan_env.num_mem)
|
---|
| 5746 | r = disable_noname_group_capture(&root, reg, &scan_env);
|
---|
| 5747 | else
|
---|
| 5748 | r = numbered_ref_check(root);
|
---|
| 5749 |
|
---|
| 5750 | if (r != 0) goto err;
|
---|
| 5751 | }
|
---|
| 5752 | #endif
|
---|
| 5753 |
|
---|
| 5754 | #ifdef USE_SUBEXP_CALL
|
---|
| 5755 | if (scan_env.num_call > 0) {
|
---|
| 5756 | r = unset_addr_list_init(&uslist, scan_env.num_call);
|
---|
| 5757 | if (r != 0) goto err;
|
---|
| 5758 | scan_env.unset_addr_list = &uslist;
|
---|
| 5759 | r = setup_subexp_call(root, &scan_env);
|
---|
| 5760 | if (r != 0) goto err_unset;
|
---|
| 5761 | r = subexp_recursive_check_trav(root, &scan_env);
|
---|
| 5762 | if (r < 0) goto err_unset;
|
---|
| 5763 | r = subexp_inf_recursive_check_trav(root, &scan_env);
|
---|
| 5764 | if (r != 0) goto err_unset;
|
---|
| 5765 |
|
---|
| 5766 | reg->num_call = scan_env.num_call;
|
---|
| 5767 | }
|
---|
| 5768 | else
|
---|
| 5769 | reg->num_call = 0;
|
---|
| 5770 | #endif
|
---|
| 5771 |
|
---|
| 5772 | r = setup_tree(root, reg, IN_ROOT, &scan_env);
|
---|
| 5773 | if (r != 0) goto err_unset;
|
---|
| 5774 |
|
---|
| 5775 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
| 5776 | print_tree(stderr, root);
|
---|
| 5777 | #endif
|
---|
| 5778 |
|
---|
| 5779 | reg->capture_history = scan_env.capture_history;
|
---|
| 5780 | reg->bt_mem_start = scan_env.bt_mem_start;
|
---|
| 5781 | reg->bt_mem_start |= reg->capture_history;
|
---|
| 5782 | if (IS_FIND_CONDITION(reg->options))
|
---|
| 5783 | BIT_STATUS_ON_ALL(reg->bt_mem_end);
|
---|
| 5784 | else {
|
---|
| 5785 | reg->bt_mem_end = scan_env.bt_mem_end;
|
---|
| 5786 | reg->bt_mem_end |= reg->capture_history;
|
---|
| 5787 | }
|
---|
| 5788 |
|
---|
| 5789 | #ifdef USE_COMBINATION_EXPLOSION_CHECK
|
---|
| 5790 | if (scan_env.backrefed_mem == 0
|
---|
| 5791 | #ifdef USE_SUBEXP_CALL
|
---|
| 5792 | || scan_env.num_call == 0
|
---|
| 5793 | #endif
|
---|
| 5794 | ) {
|
---|
| 5795 | setup_comb_exp_check(root, 0, &scan_env);
|
---|
| 5796 | #ifdef USE_SUBEXP_CALL
|
---|
| 5797 | if (scan_env.has_recursion != 0) {
|
---|
| 5798 | scan_env.num_comb_exp_check = 0;
|
---|
| 5799 | }
|
---|
| 5800 | else
|
---|
| 5801 | #endif
|
---|
| 5802 | if (scan_env.comb_exp_max_regnum > 0) {
|
---|
| 5803 | int i;
|
---|
| 5804 | for (i = 1; i <= scan_env.comb_exp_max_regnum; i++) {
|
---|
| 5805 | if (BIT_STATUS_AT(scan_env.backrefed_mem, i) != 0) {
|
---|
| 5806 | scan_env.num_comb_exp_check = 0;
|
---|
| 5807 | break;
|
---|
| 5808 | }
|
---|
| 5809 | }
|
---|
| 5810 | }
|
---|
| 5811 | }
|
---|
| 5812 |
|
---|
| 5813 | reg->num_comb_exp_check = scan_env.num_comb_exp_check;
|
---|
| 5814 | #endif
|
---|
| 5815 |
|
---|
| 5816 | clear_optimize_info(reg);
|
---|
| 5817 | #ifndef ONIG_DONT_OPTIMIZE
|
---|
| 5818 | r = set_optimize_info_from_tree(root, reg, &scan_env);
|
---|
| 5819 | if (r != 0) goto err_unset;
|
---|
| 5820 | #endif
|
---|
| 5821 |
|
---|
| 5822 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic)) {
|
---|
| 5823 | xfree(scan_env.mem_nodes_dynamic);
|
---|
| 5824 | scan_env.mem_nodes_dynamic = (Node** )NULL;
|
---|
| 5825 | }
|
---|
| 5826 |
|
---|
| 5827 | r = compile_tree(root, reg);
|
---|
| 5828 | if (r == 0) {
|
---|
| 5829 | r = add_opcode(reg, OP_END);
|
---|
| 5830 | #ifdef USE_SUBEXP_CALL
|
---|
| 5831 | if (scan_env.num_call > 0) {
|
---|
| 5832 | r = unset_addr_list_fix(&uslist, reg);
|
---|
| 5833 | unset_addr_list_end(&uslist);
|
---|
| 5834 | if (r) goto err;
|
---|
| 5835 | }
|
---|
| 5836 | #endif
|
---|
| 5837 |
|
---|
| 5838 | if ((reg->num_repeat != 0) || (reg->bt_mem_end != 0))
|
---|
| 5839 | reg->stack_pop_level = STACK_POP_LEVEL_ALL;
|
---|
| 5840 | else {
|
---|
| 5841 | if (reg->bt_mem_start != 0)
|
---|
| 5842 | reg->stack_pop_level = STACK_POP_LEVEL_MEM_START;
|
---|
| 5843 | else
|
---|
| 5844 | reg->stack_pop_level = STACK_POP_LEVEL_FREE;
|
---|
| 5845 | }
|
---|
| 5846 | }
|
---|
| 5847 | #ifdef USE_SUBEXP_CALL
|
---|
| 5848 | else if (scan_env.num_call > 0) {
|
---|
| 5849 | unset_addr_list_end(&uslist);
|
---|
| 5850 | }
|
---|
| 5851 | #endif
|
---|
| 5852 | onig_node_free(root);
|
---|
| 5853 |
|
---|
| 5854 | #ifdef ONIG_DEBUG_COMPILE
|
---|
| 5855 | #ifdef USE_NAMED_GROUP
|
---|
| 5856 | onig_print_names(stderr, reg);
|
---|
| 5857 | #endif
|
---|
| 5858 | print_compiled_byte_code_list(stderr, reg);
|
---|
| 5859 | #endif
|
---|
| 5860 |
|
---|
| 5861 | end:
|
---|
| 5862 | reg->state = ONIG_STATE_NORMAL;
|
---|
| 5863 | return r;
|
---|
| 5864 |
|
---|
| 5865 | err_unset:
|
---|
| 5866 | #ifdef USE_SUBEXP_CALL
|
---|
| 5867 | if (scan_env.num_call > 0) {
|
---|
| 5868 | unset_addr_list_end(&uslist);
|
---|
| 5869 | }
|
---|
| 5870 | #endif
|
---|
| 5871 | err:
|
---|
| 5872 | if (IS_NOT_NULL(scan_env.error)) {
|
---|
| 5873 | if (IS_NOT_NULL(einfo)) {
|
---|
| 5874 | einfo->enc = scan_env.enc;
|
---|
| 5875 | einfo->par = scan_env.error;
|
---|
| 5876 | einfo->par_end = scan_env.error_end;
|
---|
| 5877 | }
|
---|
| 5878 | }
|
---|
| 5879 |
|
---|
| 5880 | onig_node_free(root);
|
---|
| 5881 | if (IS_NOT_NULL(scan_env.mem_nodes_dynamic))
|
---|
| 5882 | xfree(scan_env.mem_nodes_dynamic);
|
---|
| 5883 | return r;
|
---|
| 5884 | }
|
---|
| 5885 |
|
---|
| 5886 | #ifdef USE_RECOMPILE_API
|
---|
| 5887 | extern int
|
---|
| 5888 | onig_recompile(regex_t* reg, const UChar* pattern, const UChar* pattern_end,
|
---|
| 5889 | OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
---|
| 5890 | OnigErrorInfo* einfo)
|
---|
| 5891 | {
|
---|
| 5892 | int r;
|
---|
| 5893 | regex_t *new_reg;
|
---|
| 5894 |
|
---|
| 5895 | r = onig_new(&new_reg, pattern, pattern_end, option, enc, syntax, einfo);
|
---|
| 5896 | if (r) return r;
|
---|
| 5897 | if (ONIG_STATE(reg) == ONIG_STATE_NORMAL) {
|
---|
| 5898 | onig_transfer(reg, new_reg);
|
---|
| 5899 | }
|
---|
| 5900 | else {
|
---|
| 5901 | onig_chain_link_add(reg, new_reg);
|
---|
| 5902 | }
|
---|
| 5903 | return 0;
|
---|
| 5904 | }
|
---|
| 5905 | #endif
|
---|
| 5906 |
|
---|
| 5907 | static int onig_inited = 0;
|
---|
| 5908 |
|
---|
| 5909 | extern int
|
---|
| 5910 | onig_reg_init(regex_t* reg, OnigOptionType option,
|
---|
| 5911 | OnigCaseFoldType case_fold_flag,
|
---|
| 5912 | OnigEncoding enc, OnigSyntaxType* syntax)
|
---|
| 5913 | {
|
---|
| 5914 | if (! onig_inited)
|
---|
| 5915 | onig_init();
|
---|
| 5916 |
|
---|
| 5917 | if (IS_NULL(reg))
|
---|
| 5918 | return ONIGERR_INVALID_ARGUMENT;
|
---|
| 5919 |
|
---|
| 5920 | if (ONIGENC_IS_UNDEF(enc))
|
---|
| 5921 | return ONIGERR_DEFAULT_ENCODING_IS_NOT_SET;
|
---|
| 5922 |
|
---|
| 5923 | if ((option & (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP))
|
---|
| 5924 | == (ONIG_OPTION_DONT_CAPTURE_GROUP|ONIG_OPTION_CAPTURE_GROUP)) {
|
---|
| 5925 | return ONIGERR_INVALID_COMBINATION_OF_OPTIONS;
|
---|
| 5926 | }
|
---|
| 5927 |
|
---|
| 5928 | (reg)->state = ONIG_STATE_MODIFY;
|
---|
| 5929 |
|
---|
| 5930 | if ((option & ONIG_OPTION_NEGATE_SINGLELINE) != 0) {
|
---|
| 5931 | option |= syntax->options;
|
---|
| 5932 | option &= ~ONIG_OPTION_SINGLELINE;
|
---|
| 5933 | }
|
---|
| 5934 | else
|
---|
| 5935 | option |= syntax->options;
|
---|
| 5936 |
|
---|
| 5937 | (reg)->enc = enc;
|
---|
| 5938 | (reg)->options = option;
|
---|
| 5939 | (reg)->syntax = syntax;
|
---|
| 5940 | (reg)->optimize = 0;
|
---|
| 5941 | (reg)->exact = (UChar* )NULL;
|
---|
| 5942 | (reg)->int_map = (int* )NULL;
|
---|
| 5943 | (reg)->int_map_backward = (int* )NULL;
|
---|
| 5944 | (reg)->chain = (regex_t* )NULL;
|
---|
| 5945 |
|
---|
| 5946 | (reg)->p = (UChar* )NULL;
|
---|
| 5947 | (reg)->alloc = 0;
|
---|
| 5948 | (reg)->used = 0;
|
---|
| 5949 | (reg)->name_table = (void* )NULL;
|
---|
| 5950 |
|
---|
| 5951 | (reg)->case_fold_flag = case_fold_flag;
|
---|
| 5952 | return 0;
|
---|
| 5953 | }
|
---|
| 5954 |
|
---|
| 5955 | extern int
|
---|
| 5956 | onig_new_without_alloc(regex_t* reg, const UChar* pattern,
|
---|
| 5957 | const UChar* pattern_end, OnigOptionType option, OnigEncoding enc,
|
---|
| 5958 | OnigSyntaxType* syntax, OnigErrorInfo* einfo)
|
---|
| 5959 | {
|
---|
| 5960 | int r;
|
---|
| 5961 |
|
---|
| 5962 | r = onig_reg_init(reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
| 5963 | if (r) return r;
|
---|
| 5964 |
|
---|
| 5965 | r = onig_compile(reg, pattern, pattern_end, einfo);
|
---|
| 5966 | return r;
|
---|
| 5967 | }
|
---|
| 5968 |
|
---|
| 5969 | extern int
|
---|
| 5970 | onig_new(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
---|
| 5971 | OnigOptionType option, OnigEncoding enc, OnigSyntaxType* syntax,
|
---|
| 5972 | OnigErrorInfo* einfo)
|
---|
| 5973 | {
|
---|
| 5974 | int r;
|
---|
| 5975 |
|
---|
| 5976 | *reg = (regex_t* )xmalloc(sizeof(regex_t));
|
---|
| 5977 | if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
---|
| 5978 |
|
---|
| 5979 | r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
---|
| 5980 | if (r) goto err;
|
---|
| 5981 |
|
---|
| 5982 | r = onig_compile(*reg, pattern, pattern_end, einfo);
|
---|
| 5983 | if (r) {
|
---|
| 5984 | err:
|
---|
| 5985 | onig_free(*reg);
|
---|
| 5986 | *reg = NULL;
|
---|
| 5987 | }
|
---|
| 5988 | return r;
|
---|
| 5989 | }
|
---|
| 5990 |
|
---|
| 5991 |
|
---|
| 5992 | extern int
|
---|
| 5993 | onig_init(void)
|
---|
| 5994 | {
|
---|
| 5995 | if (onig_inited != 0)
|
---|
| 5996 | return 0;
|
---|
| 5997 |
|
---|
| 5998 | THREAD_SYSTEM_INIT;
|
---|
| 5999 | THREAD_ATOMIC_START;
|
---|
| 6000 |
|
---|
| 6001 | onig_inited = 1;
|
---|
| 6002 |
|
---|
| 6003 | onigenc_init();
|
---|
| 6004 | /* onigenc_set_default_caseconv_table((UChar* )0); */
|
---|
| 6005 |
|
---|
| 6006 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
| 6007 | onig_statistics_init();
|
---|
| 6008 | #endif
|
---|
| 6009 |
|
---|
| 6010 | THREAD_ATOMIC_END;
|
---|
| 6011 | return 0;
|
---|
| 6012 | }
|
---|
| 6013 |
|
---|
| 6014 |
|
---|
| 6015 | static OnigEndCallListItemType* EndCallTop;
|
---|
| 6016 |
|
---|
| 6017 | extern void onig_add_end_call(void (*func)(void))
|
---|
| 6018 | {
|
---|
| 6019 | OnigEndCallListItemType* item;
|
---|
| 6020 |
|
---|
| 6021 | item = (OnigEndCallListItemType* )xmalloc(sizeof(*item));
|
---|
| 6022 | if (item == 0) return ;
|
---|
| 6023 |
|
---|
| 6024 | item->next = EndCallTop;
|
---|
| 6025 | item->func = func;
|
---|
| 6026 |
|
---|
| 6027 | EndCallTop = item;
|
---|
| 6028 | }
|
---|
| 6029 |
|
---|
| 6030 | static void
|
---|
| 6031 | exec_end_call_list(void)
|
---|
| 6032 | {
|
---|
| 6033 | OnigEndCallListItemType* prev;
|
---|
| 6034 | void (*func)(void);
|
---|
| 6035 |
|
---|
| 6036 | while (EndCallTop != 0) {
|
---|
| 6037 | func = EndCallTop->func;
|
---|
| 6038 | (*func)();
|
---|
| 6039 |
|
---|
| 6040 | prev = EndCallTop;
|
---|
| 6041 | EndCallTop = EndCallTop->next;
|
---|
| 6042 | xfree(prev);
|
---|
| 6043 | }
|
---|
| 6044 | }
|
---|
| 6045 |
|
---|
| 6046 | extern int
|
---|
| 6047 | onig_end(void)
|
---|
| 6048 | {
|
---|
| 6049 | THREAD_ATOMIC_START;
|
---|
| 6050 |
|
---|
| 6051 | exec_end_call_list();
|
---|
| 6052 |
|
---|
| 6053 | #ifdef ONIG_DEBUG_STATISTICS
|
---|
| 6054 | onig_print_statistics(stderr);
|
---|
| 6055 | #endif
|
---|
| 6056 |
|
---|
| 6057 | #ifdef USE_SHARED_CCLASS_TABLE
|
---|
| 6058 | onig_free_shared_cclass_table();
|
---|
| 6059 | #endif
|
---|
| 6060 |
|
---|
| 6061 | #ifdef USE_PARSE_TREE_NODE_RECYCLE
|
---|
| 6062 | onig_free_node_list();
|
---|
| 6063 | #endif
|
---|
| 6064 |
|
---|
| 6065 | onig_inited = 0;
|
---|
| 6066 |
|
---|
| 6067 | THREAD_ATOMIC_END;
|
---|
| 6068 | THREAD_SYSTEM_END;
|
---|
| 6069 | return 0;
|
---|
| 6070 | }
|
---|
| 6071 |
|
---|
| 6072 | extern int
|
---|
| 6073 | onig_is_in_code_range(const UChar* p, OnigCodePoint code)
|
---|
| 6074 | {
|
---|
| 6075 | OnigCodePoint n, *data;
|
---|
| 6076 | OnigCodePoint low, high, x;
|
---|
| 6077 |
|
---|
| 6078 | GET_CODE_POINT(n, p);
|
---|
| 6079 | data = (OnigCodePoint* )p;
|
---|
| 6080 | data++;
|
---|
| 6081 |
|
---|
| 6082 | for (low = 0, high = n; low < high; ) {
|
---|
| 6083 | x = (low + high) >> 1;
|
---|
| 6084 | if (code > data[x * 2 + 1])
|
---|
| 6085 | low = x + 1;
|
---|
| 6086 | else
|
---|
| 6087 | high = x;
|
---|
| 6088 | }
|
---|
| 6089 |
|
---|
| 6090 | return ((low < n && code >= data[low * 2]) ? 1 : 0);
|
---|
| 6091 | }
|
---|
| 6092 |
|
---|
| 6093 | extern int
|
---|
| 6094 | onig_is_code_in_cc_len(int elen, OnigCodePoint code, CClassNode* cc)
|
---|
| 6095 | {
|
---|
| 6096 | int found;
|
---|
| 6097 |
|
---|
| 6098 | if (elen > 1 || (code >= SINGLE_BYTE_SIZE)) {
|
---|
| 6099 | if (IS_NULL(cc->mbuf)) {
|
---|
| 6100 | found = 0;
|
---|
| 6101 | }
|
---|
| 6102 | else {
|
---|
| 6103 | found = (onig_is_in_code_range(cc->mbuf->p, code) != 0 ? 1 : 0);
|
---|
| 6104 | }
|
---|
| 6105 | }
|
---|
| 6106 | else {
|
---|
| 6107 | found = (BITSET_AT(cc->bs, code) == 0 ? 0 : 1);
|
---|
| 6108 | }
|
---|
| 6109 |
|
---|
| 6110 | if (IS_NCCLASS_NOT(cc))
|
---|
| 6111 | return !found;
|
---|
| 6112 | else
|
---|
| 6113 | return found;
|
---|
| 6114 | }
|
---|
| 6115 |
|
---|
| 6116 | extern int
|
---|
| 6117 | onig_is_code_in_cc(OnigEncoding enc, OnigCodePoint code, CClassNode* cc)
|
---|
| 6118 | {
|
---|
| 6119 | int len;
|
---|
| 6120 |
|
---|
| 6121 | if (ONIGENC_MBC_MINLEN(enc) > 1) {
|
---|
| 6122 | len = 2;
|
---|
| 6123 | }
|
---|
| 6124 | else {
|
---|
| 6125 | len = ONIGENC_CODE_TO_MBCLEN(enc, code);
|
---|
| 6126 | }
|
---|
| 6127 | return onig_is_code_in_cc_len(len, code, cc);
|
---|
| 6128 | }
|
---|
| 6129 |
|
---|
| 6130 |
|
---|
| 6131 | #ifdef ONIG_DEBUG
|
---|
| 6132 |
|
---|
| 6133 | /* arguments type */
|
---|
| 6134 | #define ARG_SPECIAL -1
|
---|
| 6135 | #define ARG_NON 0
|
---|
| 6136 | #define ARG_RELADDR 1
|
---|
| 6137 | #define ARG_ABSADDR 2
|
---|
| 6138 | #define ARG_LENGTH 3
|
---|
| 6139 | #define ARG_MEMNUM 4
|
---|
| 6140 | #define ARG_OPTION 5
|
---|
| 6141 | #define ARG_STATE_CHECK 6
|
---|
| 6142 |
|
---|
| 6143 | OnigOpInfoType OnigOpInfo[] = {
|
---|
| 6144 | { OP_FINISH, "finish", ARG_NON },
|
---|
| 6145 | { OP_END, "end", ARG_NON },
|
---|
| 6146 | { OP_EXACT1, "exact1", ARG_SPECIAL },
|
---|
| 6147 | { OP_EXACT2, "exact2", ARG_SPECIAL },
|
---|
| 6148 | { OP_EXACT3, "exact3", ARG_SPECIAL },
|
---|
| 6149 | { OP_EXACT4, "exact4", ARG_SPECIAL },
|
---|
| 6150 | { OP_EXACT5, "exact5", ARG_SPECIAL },
|
---|
| 6151 | { OP_EXACTN, "exactn", ARG_SPECIAL },
|
---|
| 6152 | { OP_EXACTMB2N1, "exactmb2-n1", ARG_SPECIAL },
|
---|
| 6153 | { OP_EXACTMB2N2, "exactmb2-n2", ARG_SPECIAL },
|
---|
| 6154 | { OP_EXACTMB2N3, "exactmb2-n3", ARG_SPECIAL },
|
---|
| 6155 | { OP_EXACTMB2N, "exactmb2-n", ARG_SPECIAL },
|
---|
| 6156 | { OP_EXACTMB3N, "exactmb3n" , ARG_SPECIAL },
|
---|
| 6157 | { OP_EXACTMBN, "exactmbn", ARG_SPECIAL },
|
---|
| 6158 | { OP_EXACT1_IC, "exact1-ic", ARG_SPECIAL },
|
---|
| 6159 | { OP_EXACTN_IC, "exactn-ic", ARG_SPECIAL },
|
---|
| 6160 | { OP_CCLASS, "cclass", ARG_SPECIAL },
|
---|
| 6161 | { OP_CCLASS_MB, "cclass-mb", ARG_SPECIAL },
|
---|
| 6162 | { OP_CCLASS_MIX, "cclass-mix", ARG_SPECIAL },
|
---|
| 6163 | { OP_CCLASS_NOT, "cclass-not", ARG_SPECIAL },
|
---|
| 6164 | { OP_CCLASS_MB_NOT, "cclass-mb-not", ARG_SPECIAL },
|
---|
| 6165 | { OP_CCLASS_MIX_NOT, "cclass-mix-not", ARG_SPECIAL },
|
---|
| 6166 | { OP_CCLASS_NODE, "cclass-node", ARG_SPECIAL },
|
---|
| 6167 | { OP_ANYCHAR, "anychar", ARG_NON },
|
---|
| 6168 | { OP_ANYCHAR_ML, "anychar-ml", ARG_NON },
|
---|
| 6169 | { OP_ANYCHAR_STAR, "anychar*", ARG_NON },
|
---|
| 6170 | { OP_ANYCHAR_ML_STAR, "anychar-ml*", ARG_NON },
|
---|
| 6171 | { OP_ANYCHAR_STAR_PEEK_NEXT, "anychar*-peek-next", ARG_SPECIAL },
|
---|
| 6172 | { OP_ANYCHAR_ML_STAR_PEEK_NEXT, "anychar-ml*-peek-next", ARG_SPECIAL },
|
---|
| 6173 | { OP_WORD, "word", ARG_NON },
|
---|
| 6174 | { OP_NOT_WORD, "not-word", ARG_NON },
|
---|
| 6175 | { OP_WORD_BOUND, "word-bound", ARG_NON },
|
---|
| 6176 | { OP_NOT_WORD_BOUND, "not-word-bound", ARG_NON },
|
---|
| 6177 | { OP_WORD_BEGIN, "word-begin", ARG_NON },
|
---|
| 6178 | { OP_WORD_END, "word-end", ARG_NON },
|
---|
| 6179 | { OP_ASCII_WORD, "ascii-word", ARG_NON },
|
---|
| 6180 | { OP_NOT_ASCII_WORD, "not-ascii-word", ARG_NON },
|
---|
| 6181 | { OP_ASCII_WORD_BOUND, "ascii-word-bound", ARG_NON },
|
---|
| 6182 | { OP_NOT_ASCII_WORD_BOUND,"not-ascii-word-bound", ARG_NON },
|
---|
| 6183 | { OP_ASCII_WORD_BEGIN, "ascii-word-begin", ARG_NON },
|
---|
| 6184 | { OP_ASCII_WORD_END, "ascii-word-end", ARG_NON },
|
---|
| 6185 | { OP_BEGIN_BUF, "begin-buf", ARG_NON },
|
---|
| 6186 | { OP_END_BUF, "end-buf", ARG_NON },
|
---|
| 6187 | { OP_BEGIN_LINE, "begin-line", ARG_NON },
|
---|
| 6188 | { OP_END_LINE, "end-line", ARG_NON },
|
---|
| 6189 | { OP_SEMI_END_BUF, "semi-end-buf", ARG_NON },
|
---|
| 6190 | { OP_BEGIN_POSITION, "begin-position", ARG_NON },
|
---|
| 6191 | { OP_BEGIN_POS_OR_LINE, "begin-pos-or-line", ARG_NON },
|
---|
| 6192 | { OP_BACKREF1, "backref1", ARG_NON },
|
---|
| 6193 | { OP_BACKREF2, "backref2", ARG_NON },
|
---|
| 6194 | { OP_BACKREFN, "backrefn", ARG_MEMNUM },
|
---|
| 6195 | { OP_BACKREFN_IC, "backrefn-ic", ARG_SPECIAL },
|
---|
| 6196 | { OP_BACKREF_MULTI, "backref_multi", ARG_SPECIAL },
|
---|
| 6197 | { OP_BACKREF_MULTI_IC, "backref_multi-ic", ARG_SPECIAL },
|
---|
| 6198 | { OP_BACKREF_WITH_LEVEL, "backref_at_level", ARG_SPECIAL },
|
---|
| 6199 | { OP_MEMORY_START_PUSH, "mem-start-push", ARG_MEMNUM },
|
---|
| 6200 | { OP_MEMORY_START, "mem-start", ARG_MEMNUM },
|
---|
| 6201 | { OP_MEMORY_END_PUSH, "mem-end-push", ARG_MEMNUM },
|
---|
| 6202 | { OP_MEMORY_END_PUSH_REC, "mem-end-push-rec", ARG_MEMNUM },
|
---|
| 6203 | { OP_MEMORY_END, "mem-end", ARG_MEMNUM },
|
---|
| 6204 | { OP_MEMORY_END_REC, "mem-end-rec", ARG_MEMNUM },
|
---|
| 6205 | { OP_SET_OPTION_PUSH, "set-option-push", ARG_OPTION },
|
---|
| 6206 | { OP_SET_OPTION, "set-option", ARG_OPTION },
|
---|
| 6207 | { OP_KEEP, "keep", ARG_NON },
|
---|
| 6208 | { OP_FAIL, "fail", ARG_NON },
|
---|
| 6209 | { OP_JUMP, "jump", ARG_RELADDR },
|
---|
| 6210 | { OP_PUSH, "push", ARG_RELADDR },
|
---|
| 6211 | { OP_POP, "pop", ARG_NON },
|
---|
| 6212 | { OP_PUSH_OR_JUMP_EXACT1, "push-or-jump-e1", ARG_SPECIAL },
|
---|
| 6213 | { OP_PUSH_IF_PEEK_NEXT, "push-if-peek-next", ARG_SPECIAL },
|
---|
| 6214 | { OP_REPEAT, "repeat", ARG_SPECIAL },
|
---|
| 6215 | { OP_REPEAT_NG, "repeat-ng", ARG_SPECIAL },
|
---|
| 6216 | { OP_REPEAT_INC, "repeat-inc", ARG_MEMNUM },
|
---|
| 6217 | { OP_REPEAT_INC_NG, "repeat-inc-ng", ARG_MEMNUM },
|
---|
| 6218 | { OP_REPEAT_INC_SG, "repeat-inc-sg", ARG_MEMNUM },
|
---|
| 6219 | { OP_REPEAT_INC_NG_SG, "repeat-inc-ng-sg", ARG_MEMNUM },
|
---|
| 6220 | { OP_NULL_CHECK_START, "null-check-start", ARG_MEMNUM },
|
---|
| 6221 | { OP_NULL_CHECK_END, "null-check-end", ARG_MEMNUM },
|
---|
| 6222 | { OP_NULL_CHECK_END_MEMST,"null-check-end-memst", ARG_MEMNUM },
|
---|
| 6223 | { OP_NULL_CHECK_END_MEMST_PUSH,"null-check-end-memst-push", ARG_MEMNUM },
|
---|
| 6224 | { OP_PUSH_POS, "push-pos", ARG_NON },
|
---|
| 6225 | { OP_POP_POS, "pop-pos", ARG_NON },
|
---|
| 6226 | { OP_PUSH_POS_NOT, "push-pos-not", ARG_RELADDR },
|
---|
| 6227 | { OP_FAIL_POS, "fail-pos", ARG_NON },
|
---|
| 6228 | { OP_PUSH_STOP_BT, "push-stop-bt", ARG_NON },
|
---|
| 6229 | { OP_POP_STOP_BT, "pop-stop-bt", ARG_NON },
|
---|
| 6230 | { OP_LOOK_BEHIND, "look-behind", ARG_SPECIAL },
|
---|
| 6231 | { OP_PUSH_LOOK_BEHIND_NOT, "push-look-behind-not", ARG_SPECIAL },
|
---|
| 6232 | { OP_FAIL_LOOK_BEHIND_NOT, "fail-look-behind-not", ARG_NON },
|
---|
| 6233 | { OP_CALL, "call", ARG_ABSADDR },
|
---|
| 6234 | { OP_RETURN, "return", ARG_NON },
|
---|
| 6235 | { OP_CONDITION, "condition", ARG_SPECIAL },
|
---|
| 6236 | { OP_STATE_CHECK_PUSH, "state-check-push", ARG_SPECIAL },
|
---|
| 6237 | { OP_STATE_CHECK_PUSH_OR_JUMP, "state-check-push-or-jump", ARG_SPECIAL },
|
---|
| 6238 | { OP_STATE_CHECK, "state-check", ARG_STATE_CHECK },
|
---|
| 6239 | { OP_STATE_CHECK_ANYCHAR_STAR, "state-check-anychar*", ARG_STATE_CHECK },
|
---|
| 6240 | { OP_STATE_CHECK_ANYCHAR_ML_STAR,
|
---|
| 6241 | "state-check-anychar-ml*", ARG_STATE_CHECK },
|
---|
| 6242 | { -1, "", ARG_NON }
|
---|
| 6243 | };
|
---|
| 6244 |
|
---|
| 6245 | static const char*
|
---|
| 6246 | op2name(int opcode)
|
---|
| 6247 | {
|
---|
| 6248 | int i;
|
---|
| 6249 |
|
---|
| 6250 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
| 6251 | if (opcode == OnigOpInfo[i].opcode)
|
---|
| 6252 | return OnigOpInfo[i].name;
|
---|
| 6253 | }
|
---|
| 6254 | return "";
|
---|
| 6255 | }
|
---|
| 6256 |
|
---|
| 6257 | static int
|
---|
| 6258 | op2arg_type(int opcode)
|
---|
| 6259 | {
|
---|
| 6260 | int i;
|
---|
| 6261 |
|
---|
| 6262 | for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
|
---|
| 6263 | if (opcode == OnigOpInfo[i].opcode)
|
---|
| 6264 | return OnigOpInfo[i].arg_type;
|
---|
| 6265 | }
|
---|
| 6266 | return ARG_SPECIAL;
|
---|
| 6267 | }
|
---|
| 6268 |
|
---|
| 6269 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
| 6270 | static void
|
---|
| 6271 | Indent(FILE* f, int indent)
|
---|
| 6272 | {
|
---|
| 6273 | int i;
|
---|
| 6274 | for (i = 0; i < indent; i++) putc(' ', f);
|
---|
| 6275 | }
|
---|
| 6276 | #endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
| 6277 |
|
---|
| 6278 | static void
|
---|
| 6279 | p_string(FILE* f, ptrdiff_t len, UChar* s)
|
---|
| 6280 | {
|
---|
| 6281 | fputs(":", f);
|
---|
| 6282 | while (len-- > 0) { fputc(*s++, f); }
|
---|
| 6283 | }
|
---|
| 6284 |
|
---|
| 6285 | static void
|
---|
| 6286 | p_len_string(FILE* f, LengthType len, int mb_len, UChar* s)
|
---|
| 6287 | {
|
---|
| 6288 | int x = len * mb_len;
|
---|
| 6289 |
|
---|
| 6290 | fprintf(f, ":%d:", len);
|
---|
| 6291 | while (x-- > 0) { fputc(*s++, f); }
|
---|
| 6292 | }
|
---|
| 6293 |
|
---|
| 6294 | extern void
|
---|
| 6295 | onig_print_compiled_byte_code(FILE* f, UChar* bp, UChar** nextp,
|
---|
| 6296 | OnigEncoding enc)
|
---|
| 6297 | {
|
---|
| 6298 | int i, n, arg_type;
|
---|
| 6299 | RelAddrType addr;
|
---|
| 6300 | LengthType len;
|
---|
| 6301 | MemNumType mem;
|
---|
| 6302 | StateCheckNumType scn;
|
---|
| 6303 | OnigCodePoint code;
|
---|
| 6304 | UChar *q;
|
---|
| 6305 |
|
---|
| 6306 | fprintf(f, "[%s", op2name(*bp));
|
---|
| 6307 | arg_type = op2arg_type(*bp);
|
---|
| 6308 | if (arg_type != ARG_SPECIAL) {
|
---|
| 6309 | bp++;
|
---|
| 6310 | switch (arg_type) {
|
---|
| 6311 | case ARG_NON:
|
---|
| 6312 | break;
|
---|
| 6313 | case ARG_RELADDR:
|
---|
| 6314 | GET_RELADDR_INC(addr, bp);
|
---|
| 6315 | fprintf(f, ":(+%d)", addr);
|
---|
| 6316 | break;
|
---|
| 6317 | case ARG_ABSADDR:
|
---|
| 6318 | GET_ABSADDR_INC(addr, bp);
|
---|
| 6319 | fprintf(f, ":(%d)", addr);
|
---|
| 6320 | break;
|
---|
| 6321 | case ARG_LENGTH:
|
---|
| 6322 | GET_LENGTH_INC(len, bp);
|
---|
| 6323 | fprintf(f, ":%d", len);
|
---|
| 6324 | break;
|
---|
| 6325 | case ARG_MEMNUM:
|
---|
| 6326 | mem = *((MemNumType* )bp);
|
---|
| 6327 | bp += SIZE_MEMNUM;
|
---|
| 6328 | fprintf(f, ":%d", mem);
|
---|
| 6329 | break;
|
---|
| 6330 | case ARG_OPTION:
|
---|
| 6331 | {
|
---|
| 6332 | OnigOptionType option = *((OnigOptionType* )bp);
|
---|
| 6333 | bp += SIZE_OPTION;
|
---|
| 6334 | fprintf(f, ":%d", option);
|
---|
| 6335 | }
|
---|
| 6336 | break;
|
---|
| 6337 |
|
---|
| 6338 | case ARG_STATE_CHECK:
|
---|
| 6339 | scn = *((StateCheckNumType* )bp);
|
---|
| 6340 | bp += SIZE_STATE_CHECK_NUM;
|
---|
| 6341 | fprintf(f, ":%d", scn);
|
---|
| 6342 | break;
|
---|
| 6343 | }
|
---|
| 6344 | }
|
---|
| 6345 | else {
|
---|
| 6346 | switch (*bp++) {
|
---|
| 6347 | case OP_EXACT1:
|
---|
| 6348 | case OP_ANYCHAR_STAR_PEEK_NEXT:
|
---|
| 6349 | case OP_ANYCHAR_ML_STAR_PEEK_NEXT:
|
---|
| 6350 | p_string(f, 1, bp++); break;
|
---|
| 6351 | case OP_EXACT2:
|
---|
| 6352 | p_string(f, 2, bp); bp += 2; break;
|
---|
| 6353 | case OP_EXACT3:
|
---|
| 6354 | p_string(f, 3, bp); bp += 3; break;
|
---|
| 6355 | case OP_EXACT4:
|
---|
| 6356 | p_string(f, 4, bp); bp += 4; break;
|
---|
| 6357 | case OP_EXACT5:
|
---|
| 6358 | p_string(f, 5, bp); bp += 5; break;
|
---|
| 6359 | case OP_EXACTN:
|
---|
| 6360 | GET_LENGTH_INC(len, bp);
|
---|
| 6361 | p_len_string(f, len, 1, bp);
|
---|
| 6362 | bp += len;
|
---|
| 6363 | break;
|
---|
| 6364 |
|
---|
| 6365 | case OP_EXACTMB2N1:
|
---|
| 6366 | p_string(f, 2, bp); bp += 2; break;
|
---|
| 6367 | case OP_EXACTMB2N2:
|
---|
| 6368 | p_string(f, 4, bp); bp += 4; break;
|
---|
| 6369 | case OP_EXACTMB2N3:
|
---|
| 6370 | p_string(f, 6, bp); bp += 6; break;
|
---|
| 6371 | case OP_EXACTMB2N:
|
---|
| 6372 | GET_LENGTH_INC(len, bp);
|
---|
| 6373 | p_len_string(f, len, 2, bp);
|
---|
| 6374 | bp += len * 2;
|
---|
| 6375 | break;
|
---|
| 6376 | case OP_EXACTMB3N:
|
---|
| 6377 | GET_LENGTH_INC(len, bp);
|
---|
| 6378 | p_len_string(f, len, 3, bp);
|
---|
| 6379 | bp += len * 3;
|
---|
| 6380 | break;
|
---|
| 6381 | case OP_EXACTMBN:
|
---|
| 6382 | {
|
---|
| 6383 | int mb_len;
|
---|
| 6384 |
|
---|
| 6385 | GET_LENGTH_INC(mb_len, bp);
|
---|
| 6386 | GET_LENGTH_INC(len, bp);
|
---|
| 6387 | fprintf(f, ":%d:%d:", mb_len, len);
|
---|
| 6388 | n = len * mb_len;
|
---|
| 6389 | while (n-- > 0) { fputc(*bp++, f); }
|
---|
| 6390 | }
|
---|
| 6391 | break;
|
---|
| 6392 |
|
---|
| 6393 | case OP_EXACT1_IC:
|
---|
| 6394 | len = enclen(enc, bp);
|
---|
| 6395 | p_string(f, len, bp);
|
---|
| 6396 | bp += len;
|
---|
| 6397 | break;
|
---|
| 6398 | case OP_EXACTN_IC:
|
---|
| 6399 | GET_LENGTH_INC(len, bp);
|
---|
| 6400 | p_len_string(f, len, 1, bp);
|
---|
| 6401 | bp += len;
|
---|
| 6402 | break;
|
---|
| 6403 |
|
---|
| 6404 | case OP_CCLASS:
|
---|
| 6405 | n = bitset_on_num((BitSetRef )bp);
|
---|
| 6406 | bp += SIZE_BITSET;
|
---|
| 6407 | fprintf(f, ":%d", n);
|
---|
| 6408 | break;
|
---|
| 6409 |
|
---|
| 6410 | case OP_CCLASS_NOT:
|
---|
| 6411 | n = bitset_on_num((BitSetRef )bp);
|
---|
| 6412 | bp += SIZE_BITSET;
|
---|
| 6413 | fprintf(f, ":%d", n);
|
---|
| 6414 | break;
|
---|
| 6415 |
|
---|
| 6416 | case OP_CCLASS_MB:
|
---|
| 6417 | case OP_CCLASS_MB_NOT:
|
---|
| 6418 | GET_LENGTH_INC(len, bp);
|
---|
| 6419 | q = bp;
|
---|
| 6420 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 6421 | ALIGNMENT_RIGHT(q);
|
---|
| 6422 | #endif
|
---|
| 6423 | GET_CODE_POINT(code, q);
|
---|
| 6424 | bp += len;
|
---|
| 6425 | fprintf(f, ":%d:%d", (int )code, len);
|
---|
| 6426 | break;
|
---|
| 6427 |
|
---|
| 6428 | case OP_CCLASS_MIX:
|
---|
| 6429 | case OP_CCLASS_MIX_NOT:
|
---|
| 6430 | n = bitset_on_num((BitSetRef )bp);
|
---|
| 6431 | bp += SIZE_BITSET;
|
---|
| 6432 | GET_LENGTH_INC(len, bp);
|
---|
| 6433 | q = bp;
|
---|
| 6434 | #ifndef PLATFORM_UNALIGNED_WORD_ACCESS
|
---|
| 6435 | ALIGNMENT_RIGHT(q);
|
---|
| 6436 | #endif
|
---|
| 6437 | GET_CODE_POINT(code, q);
|
---|
| 6438 | bp += len;
|
---|
| 6439 | fprintf(f, ":%d:%d:%d", n, (int )code, len);
|
---|
| 6440 | break;
|
---|
| 6441 |
|
---|
| 6442 | case OP_CCLASS_NODE:
|
---|
| 6443 | {
|
---|
| 6444 | CClassNode *cc;
|
---|
| 6445 |
|
---|
| 6446 | GET_POINTER_INC(cc, bp);
|
---|
| 6447 | n = bitset_on_num(cc->bs);
|
---|
| 6448 | fprintf(f, ":%"PRIuPTR":%d", (uintptr_t )cc, n);
|
---|
| 6449 | }
|
---|
| 6450 | break;
|
---|
| 6451 |
|
---|
| 6452 | case OP_BACKREFN_IC:
|
---|
| 6453 | mem = *((MemNumType* )bp);
|
---|
| 6454 | bp += SIZE_MEMNUM;
|
---|
| 6455 | fprintf(f, ":%d", mem);
|
---|
| 6456 | break;
|
---|
| 6457 |
|
---|
| 6458 | case OP_BACKREF_MULTI_IC:
|
---|
| 6459 | case OP_BACKREF_MULTI:
|
---|
| 6460 | fputs(" ", f);
|
---|
| 6461 | GET_LENGTH_INC(len, bp);
|
---|
| 6462 | for (i = 0; i < len; i++) {
|
---|
| 6463 | GET_MEMNUM_INC(mem, bp);
|
---|
| 6464 | if (i > 0) fputs(", ", f);
|
---|
| 6465 | fprintf(f, "%d", mem);
|
---|
| 6466 | }
|
---|
| 6467 | break;
|
---|
| 6468 |
|
---|
| 6469 | case OP_BACKREF_WITH_LEVEL:
|
---|
| 6470 | {
|
---|
| 6471 | OnigOptionType option;
|
---|
| 6472 | LengthType level;
|
---|
| 6473 |
|
---|
| 6474 | GET_OPTION_INC(option, bp);
|
---|
| 6475 | fprintf(f, ":%d", option);
|
---|
| 6476 | GET_LENGTH_INC(level, bp);
|
---|
| 6477 | fprintf(f, ":%d", level);
|
---|
| 6478 |
|
---|
| 6479 | fputs(" ", f);
|
---|
| 6480 | GET_LENGTH_INC(len, bp);
|
---|
| 6481 | for (i = 0; i < len; i++) {
|
---|
| 6482 | GET_MEMNUM_INC(mem, bp);
|
---|
| 6483 | if (i > 0) fputs(", ", f);
|
---|
| 6484 | fprintf(f, "%d", mem);
|
---|
| 6485 | }
|
---|
| 6486 | }
|
---|
| 6487 | break;
|
---|
| 6488 |
|
---|
| 6489 | case OP_REPEAT:
|
---|
| 6490 | case OP_REPEAT_NG:
|
---|
| 6491 | {
|
---|
| 6492 | mem = *((MemNumType* )bp);
|
---|
| 6493 | bp += SIZE_MEMNUM;
|
---|
| 6494 | addr = *((RelAddrType* )bp);
|
---|
| 6495 | bp += SIZE_RELADDR;
|
---|
| 6496 | fprintf(f, ":%d:%d", mem, addr);
|
---|
| 6497 | }
|
---|
| 6498 | break;
|
---|
| 6499 |
|
---|
| 6500 | case OP_PUSH_OR_JUMP_EXACT1:
|
---|
| 6501 | case OP_PUSH_IF_PEEK_NEXT:
|
---|
| 6502 | addr = *((RelAddrType* )bp);
|
---|
| 6503 | bp += SIZE_RELADDR;
|
---|
| 6504 | fprintf(f, ":(%d)", addr);
|
---|
| 6505 | p_string(f, 1, bp);
|
---|
| 6506 | bp += 1;
|
---|
| 6507 | break;
|
---|
| 6508 |
|
---|
| 6509 | case OP_LOOK_BEHIND:
|
---|
| 6510 | GET_LENGTH_INC(len, bp);
|
---|
| 6511 | fprintf(f, ":%d", len);
|
---|
| 6512 | break;
|
---|
| 6513 |
|
---|
| 6514 | case OP_PUSH_LOOK_BEHIND_NOT:
|
---|
| 6515 | GET_RELADDR_INC(addr, bp);
|
---|
| 6516 | GET_LENGTH_INC(len, bp);
|
---|
| 6517 | fprintf(f, ":%d:(%d)", len, addr);
|
---|
| 6518 | break;
|
---|
| 6519 |
|
---|
| 6520 | case OP_STATE_CHECK_PUSH:
|
---|
| 6521 | case OP_STATE_CHECK_PUSH_OR_JUMP:
|
---|
| 6522 | scn = *((StateCheckNumType* )bp);
|
---|
| 6523 | bp += SIZE_STATE_CHECK_NUM;
|
---|
| 6524 | addr = *((RelAddrType* )bp);
|
---|
| 6525 | bp += SIZE_RELADDR;
|
---|
| 6526 | fprintf(f, ":%d:(%d)", scn, addr);
|
---|
| 6527 | break;
|
---|
| 6528 |
|
---|
| 6529 | case OP_CONDITION:
|
---|
| 6530 | GET_MEMNUM_INC(mem, bp);
|
---|
| 6531 | GET_RELADDR_INC(addr, bp);
|
---|
| 6532 | fprintf(f, ":%d:(%d)", mem, addr);
|
---|
| 6533 | break;
|
---|
| 6534 |
|
---|
| 6535 | default:
|
---|
| 6536 | fprintf(stderr, "onig_print_compiled_byte_code: undefined code %d\n",
|
---|
| 6537 | *--bp);
|
---|
| 6538 | }
|
---|
| 6539 | }
|
---|
| 6540 | fputs("]", f);
|
---|
| 6541 | if (nextp) *nextp = bp;
|
---|
| 6542 | }
|
---|
| 6543 |
|
---|
| 6544 | #ifdef ONIG_DEBUG_COMPILE
|
---|
| 6545 | static void
|
---|
| 6546 | print_compiled_byte_code_list(FILE* f, regex_t* reg)
|
---|
| 6547 | {
|
---|
| 6548 | int ncode;
|
---|
| 6549 | UChar* bp = reg->p;
|
---|
| 6550 | UChar* end = reg->p + reg->used;
|
---|
| 6551 |
|
---|
| 6552 | fprintf(f, "code length: %d", reg->used);
|
---|
| 6553 |
|
---|
| 6554 | ncode = -1;
|
---|
| 6555 | while (bp < end) {
|
---|
| 6556 | ncode++;
|
---|
| 6557 | if (ncode % 5 == 0)
|
---|
| 6558 | fprintf(f, "\n%ld:", bp - reg->p);
|
---|
| 6559 | else
|
---|
| 6560 | fprintf(f, " %ld:", bp - reg->p);
|
---|
| 6561 | onig_print_compiled_byte_code(f, bp, &bp, reg->enc);
|
---|
| 6562 | }
|
---|
| 6563 |
|
---|
| 6564 | fprintf(f, "\n");
|
---|
| 6565 | }
|
---|
| 6566 | #endif /* ONIG_DEBUG_COMPILE */
|
---|
| 6567 |
|
---|
| 6568 | #ifdef ONIG_DEBUG_PARSE_TREE
|
---|
| 6569 | static void
|
---|
| 6570 | print_indent_tree(FILE* f, Node* node, int indent)
|
---|
| 6571 | {
|
---|
| 6572 | int i, type, container_p = 0;
|
---|
| 6573 | int add = 3;
|
---|
| 6574 | UChar* p;
|
---|
| 6575 |
|
---|
| 6576 | Indent(f, indent);
|
---|
| 6577 | if (IS_NULL(node)) {
|
---|
| 6578 | fprintf(f, "ERROR: null node!!!\n");
|
---|
| 6579 | exit (0);
|
---|
| 6580 | }
|
---|
| 6581 |
|
---|
| 6582 | type = NTYPE(node);
|
---|
| 6583 | switch (type) {
|
---|
| 6584 | case NT_LIST:
|
---|
| 6585 | case NT_ALT:
|
---|
| 6586 | if (NTYPE(node) == NT_LIST)
|
---|
| 6587 | fprintf(f, "<list:%"PRIxPTR">\n", (intptr_t )node);
|
---|
| 6588 | else
|
---|
| 6589 | fprintf(f, "<alt:%"PRIxPTR">\n", (intptr_t )node);
|
---|
| 6590 |
|
---|
| 6591 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
| 6592 | while (IS_NOT_NULL(node = NCDR(node))) {
|
---|
| 6593 | if (NTYPE(node) != type) {
|
---|
| 6594 | fprintf(f, "ERROR: list/alt right is not a cons. %d\n", NTYPE(node));
|
---|
| 6595 | exit(0);
|
---|
| 6596 | }
|
---|
| 6597 | print_indent_tree(f, NCAR(node), indent + add);
|
---|
| 6598 | }
|
---|
| 6599 | break;
|
---|
| 6600 |
|
---|
| 6601 | case NT_STR:
|
---|
| 6602 | fprintf(f, "<string%s:%"PRIxPTR">",
|
---|
| 6603 | (NSTRING_IS_RAW(node) ? "-raw" : ""), (intptr_t )node);
|
---|
| 6604 | for (p = NSTR(node)->s; p < NSTR(node)->end; p++) {
|
---|
| 6605 | if (*p >= 0x20 && *p < 0x7f)
|
---|
| 6606 | fputc(*p, f);
|
---|
| 6607 | else {
|
---|
| 6608 | fprintf(f, " 0x%02x", *p);
|
---|
| 6609 | }
|
---|
| 6610 | }
|
---|
| 6611 | break;
|
---|
| 6612 |
|
---|
| 6613 | case NT_CCLASS:
|
---|
| 6614 | fprintf(f, "<cclass:%"PRIxPTR">", (intptr_t )node);
|
---|
| 6615 | if (IS_NCCLASS_NOT(NCCLASS(node))) fputs(" not", f);
|
---|
| 6616 | if (NCCLASS(node)->mbuf) {
|
---|
| 6617 | BBuf* bbuf = NCCLASS(node)->mbuf;
|
---|
| 6618 | for (i = 0; i < (int )bbuf->used; i++) {
|
---|
| 6619 | if (i > 0) fprintf(f, ",");
|
---|
| 6620 | fprintf(f, "%0x", bbuf->p[i]);
|
---|
| 6621 | }
|
---|
| 6622 | }
|
---|
| 6623 | break;
|
---|
| 6624 |
|
---|
| 6625 | case NT_CTYPE:
|
---|
| 6626 | fprintf(f, "<ctype:%"PRIxPTR"> ", (intptr_t )node);
|
---|
| 6627 | switch (NCTYPE(node)->ctype) {
|
---|
| 6628 | case ONIGENC_CTYPE_WORD:
|
---|
| 6629 | if (NCTYPE(node)->not != 0)
|
---|
| 6630 | fputs("not word", f);
|
---|
| 6631 | else
|
---|
| 6632 | fputs("word", f);
|
---|
| 6633 | break;
|
---|
| 6634 |
|
---|
| 6635 | default:
|
---|
| 6636 | fprintf(f, "ERROR: undefined ctype.\n");
|
---|
| 6637 | exit(0);
|
---|
| 6638 | }
|
---|
| 6639 | break;
|
---|
| 6640 |
|
---|
| 6641 | case NT_CANY:
|
---|
| 6642 | fprintf(f, "<anychar:%"PRIxPTR">", (intptr_t )node);
|
---|
| 6643 | break;
|
---|
| 6644 |
|
---|
| 6645 | case NT_ANCHOR:
|
---|
| 6646 | fprintf(f, "<anchor:%"PRIxPTR"> ", (intptr_t )node);
|
---|
| 6647 | switch (NANCHOR(node)->type) {
|
---|
| 6648 | case ANCHOR_BEGIN_BUF: fputs("begin buf", f); break;
|
---|
| 6649 | case ANCHOR_END_BUF: fputs("end buf", f); break;
|
---|
| 6650 | case ANCHOR_BEGIN_LINE: fputs("begin line", f); break;
|
---|
| 6651 | case ANCHOR_END_LINE: fputs("end line", f); break;
|
---|
| 6652 | case ANCHOR_SEMI_END_BUF: fputs("semi end buf", f); break;
|
---|
| 6653 | case ANCHOR_BEGIN_POSITION: fputs("begin position", f); break;
|
---|
| 6654 | case ANCHOR_ANYCHAR_STAR: fputs("begin position/line", f); break;
|
---|
| 6655 |
|
---|
| 6656 | case ANCHOR_WORD_BOUND: fputs("word bound", f); break;
|
---|
| 6657 | case ANCHOR_NOT_WORD_BOUND: fputs("not word bound", f); break;
|
---|
| 6658 | #ifdef USE_WORD_BEGIN_END
|
---|
| 6659 | case ANCHOR_WORD_BEGIN: fputs("word begin", f); break;
|
---|
| 6660 | case ANCHOR_WORD_END: fputs("word end", f); break;
|
---|
| 6661 | #endif
|
---|
| 6662 | case ANCHOR_PREC_READ: fputs("prec read", f); container_p = TRUE; break;
|
---|
| 6663 | case ANCHOR_PREC_READ_NOT: fputs("prec read not", f); container_p = TRUE; break;
|
---|
| 6664 | case ANCHOR_LOOK_BEHIND: fputs("look_behind", f); container_p = TRUE; break;
|
---|
| 6665 | case ANCHOR_LOOK_BEHIND_NOT: fputs("look_behind_not",f); container_p = TRUE; break;
|
---|
| 6666 | case ANCHOR_KEEP: fputs("keep",f); break;
|
---|
| 6667 |
|
---|
| 6668 | default:
|
---|
| 6669 | fprintf(f, "ERROR: undefined anchor type.\n");
|
---|
| 6670 | break;
|
---|
| 6671 | }
|
---|
| 6672 | break;
|
---|
| 6673 |
|
---|
| 6674 | case NT_BREF:
|
---|
| 6675 | {
|
---|
| 6676 | int* p;
|
---|
| 6677 | BRefNode* br = NBREF(node);
|
---|
| 6678 | p = BACKREFS_P(br);
|
---|
| 6679 | fprintf(f, "<backref:%"PRIxPTR">", (intptr_t )node);
|
---|
| 6680 | for (i = 0; i < br->back_num; i++) {
|
---|
| 6681 | if (i > 0) fputs(", ", f);
|
---|
| 6682 | fprintf(f, "%d", p[i]);
|
---|
| 6683 | }
|
---|
| 6684 | }
|
---|
| 6685 | break;
|
---|
| 6686 |
|
---|
| 6687 | #ifdef USE_SUBEXP_CALL
|
---|
| 6688 | case NT_CALL:
|
---|
| 6689 | {
|
---|
| 6690 | CallNode* cn = NCALL(node);
|
---|
| 6691 | fprintf(f, "<call:%"PRIxPTR">", (intptr_t )node);
|
---|
| 6692 | p_string(f, cn->name_end - cn->name, cn->name);
|
---|
| 6693 | }
|
---|
| 6694 | break;
|
---|
| 6695 | #endif
|
---|
| 6696 |
|
---|
| 6697 | case NT_QTFR:
|
---|
| 6698 | fprintf(f, "<quantifier:%"PRIxPTR">{%d,%d}%s\n", (intptr_t )node,
|
---|
| 6699 | NQTFR(node)->lower, NQTFR(node)->upper,
|
---|
| 6700 | (NQTFR(node)->greedy ? "" : "?"));
|
---|
| 6701 | print_indent_tree(f, NQTFR(node)->target, indent + add);
|
---|
| 6702 | break;
|
---|
| 6703 |
|
---|
| 6704 | case NT_ENCLOSE:
|
---|
| 6705 | fprintf(f, "<enclose:%"PRIxPTR"> ", (intptr_t )node);
|
---|
| 6706 | switch (NENCLOSE(node)->type) {
|
---|
| 6707 | case ENCLOSE_OPTION:
|
---|
| 6708 | fprintf(f, "option:%d", NENCLOSE(node)->option);
|
---|
| 6709 | break;
|
---|
| 6710 | case ENCLOSE_MEMORY:
|
---|
| 6711 | fprintf(f, "memory:%d", NENCLOSE(node)->regnum);
|
---|
| 6712 | break;
|
---|
| 6713 | case ENCLOSE_STOP_BACKTRACK:
|
---|
| 6714 | fprintf(f, "stop-bt");
|
---|
| 6715 | break;
|
---|
| 6716 | case ENCLOSE_CONDITION:
|
---|
| 6717 | fprintf(f, "condition:%d", NENCLOSE(node)->regnum);
|
---|
| 6718 | break;
|
---|
| 6719 |
|
---|
| 6720 | default:
|
---|
| 6721 | break;
|
---|
| 6722 | }
|
---|
| 6723 | fprintf(f, "\n");
|
---|
| 6724 | print_indent_tree(f, NENCLOSE(node)->target, indent + add);
|
---|
| 6725 | break;
|
---|
| 6726 |
|
---|
| 6727 | default:
|
---|
| 6728 | fprintf(f, "print_indent_tree: undefined node type %d\n", NTYPE(node));
|
---|
| 6729 | break;
|
---|
| 6730 | }
|
---|
| 6731 |
|
---|
| 6732 | if (type != NT_LIST && type != NT_ALT && type != NT_QTFR &&
|
---|
| 6733 | type != NT_ENCLOSE)
|
---|
| 6734 | fprintf(f, "\n");
|
---|
| 6735 |
|
---|
| 6736 | if (container_p) print_indent_tree(f, NANCHOR(node)->target, indent + add);
|
---|
| 6737 |
|
---|
| 6738 | fflush(f);
|
---|
| 6739 | }
|
---|
| 6740 |
|
---|
| 6741 | static void
|
---|
| 6742 | print_tree(FILE* f, Node* node)
|
---|
| 6743 | {
|
---|
| 6744 | print_indent_tree(f, node, 0);
|
---|
| 6745 | }
|
---|
| 6746 | #endif /* ONIG_DEBUG_PARSE_TREE */
|
---|
| 6747 | #endif /* ONIG_DEBUG */
|
---|