source: EcnlProtoTool/trunk/onigmo-6.1.3/src/regexec.c@ 331

Last change on this file since 331 was 331, checked in by coas-nagasima, 6 years ago

prototoolに関連するプロジェクトをnewlibからmuslを使うよう変更・更新
ntshellをnewlibの下位の実装から、muslのsyscallの実装に変更・更新
以下のOSSをアップデート
・mruby-1.3.0
・musl-1.1.18
・onigmo-6.1.3
・tcc-0.9.27
以下のOSSを追加
・openssl-1.1.0e
・curl-7.57.0
・zlib-1.2.11
以下のmrbgemsを追加
・iij/mruby-digest
・iij/mruby-env
・iij/mruby-errno
・iij/mruby-iijson
・iij/mruby-ipaddr
・iij/mruby-mock
・iij/mruby-require
・iij/mruby-tls-openssl

  • Property svn:eol-style set to native
  • Property svn:mime-type set to text/x-csrc;charset=UTF-8
File size: 115.5 KB
Line 
1/**********************************************************************
2 regexec.c - Onigmo (Oniguruma-mod) (regular expression library)
3**********************************************************************/
4/*-
5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp>
6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp>
7 * All rights reserved.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 */
30
31#include "regint.h"
32
33#ifdef RUBY
34# undef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
35#else
36# define USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
37#endif
38
39#ifndef USE_TOKEN_THREADED_VM
40# ifdef __GNUC__
41# define USE_TOKEN_THREADED_VM 1
42# else
43# define USE_TOKEN_THREADED_VM 0
44# endif
45#endif
46
47#ifdef RUBY
48# define ENC_DUMMY_FLAG (1<<24)
49static inline int
50rb_enc_asciicompat(OnigEncoding enc)
51{
52 return ONIGENC_MBC_MINLEN(enc)==1 && !((enc)->ruby_encoding_index & ENC_DUMMY_FLAG);
53}
54# undef ONIGENC_IS_MBC_ASCII_WORD
55# define ONIGENC_IS_MBC_ASCII_WORD(enc,s,end) \
56 (rb_enc_asciicompat(enc) ? (ISALNUM(*s) || *s=='_') : \
57 onigenc_ascii_is_code_ctype( \
58 ONIGENC_MBC_TO_CODE(enc,s,end),ONIGENC_CTYPE_WORD,enc))
59#endif /* RUBY */
60
61#ifdef USE_CRNL_AS_LINE_TERMINATOR
62# define ONIGENC_IS_MBC_CRNL(enc,p,end) \
63 (ONIGENC_MBC_TO_CODE(enc,p,end) == 13 && \
64 ONIGENC_MBC_TO_CODE(enc,(p+enclen(enc,p,end)),end) == 10)
65# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
66 is_mbc_newline_ex((enc),(p),(start),(end),(option),(check_prev))
67static int
68is_mbc_newline_ex(OnigEncoding enc, const UChar *p, const UChar *start,
69 const UChar *end, OnigOptionType option, int check_prev)
70{
71 if (IS_NEWLINE_CRLF(option)) {
72 if (ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0a) {
73 if (check_prev) {
74 const UChar *prev = onigenc_get_prev_char_head(enc, start, p, end);
75 if ((prev != NULL) && ONIGENC_MBC_TO_CODE(enc, prev, end) == 0x0d)
76 return 0;
77 else
78 return 1;
79 }
80 else
81 return 1;
82 }
83 else {
84 const UChar *pnext = p + enclen(enc, p, end);
85 if (pnext < end &&
86 ONIGENC_MBC_TO_CODE(enc, p, end) == 0x0d &&
87 ONIGENC_MBC_TO_CODE(enc, pnext, end) == 0x0a)
88 return 1;
89 if (ONIGENC_IS_MBC_NEWLINE(enc, p, end))
90 return 1;
91 return 0;
92 }
93 }
94 else {
95 return ONIGENC_IS_MBC_NEWLINE(enc, p, end);
96 }
97}
98#else /* USE_CRNL_AS_LINE_TERMINATOR */
99# define ONIGENC_IS_MBC_NEWLINE_EX(enc,p,start,end,option,check_prev) \
100 ONIGENC_IS_MBC_NEWLINE((enc), (p), (end))
101#endif /* USE_CRNL_AS_LINE_TERMINATOR */
102
103#ifdef USE_CAPTURE_HISTORY
104static void history_tree_free(OnigCaptureTreeNode* node);
105
106static void
107history_tree_clear(OnigCaptureTreeNode* node)
108{
109 int i;
110
111 if (IS_NOT_NULL(node)) {
112 for (i = 0; i < node->num_childs; i++) {
113 if (IS_NOT_NULL(node->childs[i])) {
114 history_tree_free(node->childs[i]);
115 }
116 }
117 for (i = 0; i < node->allocated; i++) {
118 node->childs[i] = (OnigCaptureTreeNode* )0;
119 }
120 node->num_childs = 0;
121 node->beg = ONIG_REGION_NOTPOS;
122 node->end = ONIG_REGION_NOTPOS;
123 node->group = -1;
124 xfree(node->childs);
125 node->childs = (OnigCaptureTreeNode** )0;
126 }
127}
128
129static void
130history_tree_free(OnigCaptureTreeNode* node)
131{
132 history_tree_clear(node);
133 xfree(node);
134}
135
136static void
137history_root_free(OnigRegion* r)
138{
139 if (IS_NOT_NULL(r->history_root)) {
140 history_tree_free(r->history_root);
141 r->history_root = (OnigCaptureTreeNode* )0;
142 }
143}
144
145static OnigCaptureTreeNode*
146history_node_new(void)
147{
148 OnigCaptureTreeNode* node;
149
150 node = (OnigCaptureTreeNode* )xmalloc(sizeof(OnigCaptureTreeNode));
151 CHECK_NULL_RETURN(node);
152 node->childs = (OnigCaptureTreeNode** )0;
153 node->allocated = 0;
154 node->num_childs = 0;
155 node->group = -1;
156 node->beg = ONIG_REGION_NOTPOS;
157 node->end = ONIG_REGION_NOTPOS;
158
159 return node;
160}
161
162static int
163history_tree_add_child(OnigCaptureTreeNode* parent, OnigCaptureTreeNode* child)
164{
165# define HISTORY_TREE_INIT_ALLOC_SIZE 8
166
167 if (parent->num_childs >= parent->allocated) {
168 int n, i;
169
170 if (IS_NULL(parent->childs)) {
171 n = HISTORY_TREE_INIT_ALLOC_SIZE;
172 parent->childs =
173 (OnigCaptureTreeNode** )xmalloc(sizeof(OnigCaptureTreeNode*) * n);
174 CHECK_NULL_RETURN_MEMERR(parent->childs);
175 }
176 else {
177 OnigCaptureTreeNode** tmp;
178 n = parent->allocated * 2;
179 tmp =
180 (OnigCaptureTreeNode** )xrealloc(parent->childs,
181 sizeof(OnigCaptureTreeNode*) * n);
182 if (tmp == 0) {
183 history_tree_clear(parent);
184 return ONIGERR_MEMORY;
185 }
186 parent->childs = tmp;
187 }
188 for (i = parent->allocated; i < n; i++) {
189 parent->childs[i] = (OnigCaptureTreeNode* )0;
190 }
191 parent->allocated = n;
192 }
193
194 parent->childs[parent->num_childs] = child;
195 parent->num_childs++;
196 return 0;
197}
198
199static OnigCaptureTreeNode*
200history_tree_clone(OnigCaptureTreeNode* node)
201{
202 int i, r;
203 OnigCaptureTreeNode *clone, *child;
204
205 clone = history_node_new();
206 CHECK_NULL_RETURN(clone);
207
208 clone->beg = node->beg;
209 clone->end = node->end;
210 for (i = 0; i < node->num_childs; i++) {
211 child = history_tree_clone(node->childs[i]);
212 if (IS_NULL(child)) {
213 history_tree_free(clone);
214 return (OnigCaptureTreeNode* )0;
215 }
216 r = history_tree_add_child(clone, child);
217 if (r != 0) {
218 history_tree_free(child);
219 history_tree_free(clone);
220 return (OnigCaptureTreeNode* )0;
221 }
222 }
223
224 return clone;
225}
226
227extern OnigCaptureTreeNode*
228onig_get_capture_tree(OnigRegion* region)
229{
230 return region->history_root;
231}
232#endif /* USE_CAPTURE_HISTORY */
233
234extern void
235onig_region_clear(OnigRegion* region)
236{
237 int i;
238
239 for (i = 0; i < region->num_regs; i++) {
240 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
241 }
242#ifdef USE_CAPTURE_HISTORY
243 history_root_free(region);
244#endif
245}
246
247extern int
248onig_region_resize(OnigRegion* region, int n)
249{
250 region->num_regs = n;
251
252 if (n < ONIG_NREGION)
253 n = ONIG_NREGION;
254
255 if (region->allocated == 0) {
256 region->beg = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
257 if (region->beg == 0)
258 return ONIGERR_MEMORY;
259
260 region->end = (OnigPosition* )xmalloc(n * sizeof(OnigPosition));
261 if (region->end == 0) {
262 xfree(region->beg);
263 return ONIGERR_MEMORY;
264 }
265
266 region->allocated = n;
267 }
268 else if (region->allocated < n) {
269 OnigPosition *tmp;
270
271 region->allocated = 0;
272 tmp = (OnigPosition* )xrealloc(region->beg, n * sizeof(OnigPosition));
273 if (tmp == 0) {
274 xfree(region->beg);
275 xfree(region->end);
276 return ONIGERR_MEMORY;
277 }
278 region->beg = tmp;
279 tmp = (OnigPosition* )xrealloc(region->end, n * sizeof(OnigPosition));
280 if (tmp == 0) {
281 xfree(region->beg);
282 xfree(region->end);
283 return ONIGERR_MEMORY;
284 }
285 region->end = tmp;
286
287 region->allocated = n;
288 }
289
290 return 0;
291}
292
293static int
294onig_region_resize_clear(OnigRegion* region, int n)
295{
296 int r;
297
298 r = onig_region_resize(region, n);
299 if (r != 0) return r;
300 onig_region_clear(region);
301 return 0;
302}
303
304extern int
305onig_region_set(OnigRegion* region, int at, int beg, int end)
306{
307 if (at < 0) return ONIGERR_INVALID_ARGUMENT;
308
309 if (at >= region->allocated) {
310 int r = onig_region_resize(region, at + 1);
311 if (r < 0) return r;
312 }
313
314 region->beg[at] = beg;
315 region->end[at] = end;
316 return 0;
317}
318
319extern void
320onig_region_init(OnigRegion* region)
321{
322 region->num_regs = 0;
323 region->allocated = 0;
324 region->beg = (OnigPosition* )0;
325 region->end = (OnigPosition* )0;
326 region->history_root = (OnigCaptureTreeNode* )0;
327}
328
329extern OnigRegion*
330onig_region_new(void)
331{
332 OnigRegion* r;
333
334 r = (OnigRegion* )xmalloc(sizeof(OnigRegion));
335 if (r)
336 onig_region_init(r);
337 return r;
338}
339
340extern void
341onig_region_free(OnigRegion* r, int free_self)
342{
343 if (r) {
344 if (r->allocated > 0) {
345 if (r->beg) xfree(r->beg);
346 if (r->end) xfree(r->end);
347 r->allocated = 0;
348 }
349#ifdef USE_CAPTURE_HISTORY
350 history_root_free(r);
351#endif
352 if (free_self) xfree(r);
353 }
354}
355
356extern void
357onig_region_copy(OnigRegion* to, const OnigRegion* from)
358{
359#define RREGC_SIZE (sizeof(int) * from->num_regs)
360 int i, r;
361
362 if (to == from) return;
363
364 r = onig_region_resize(to, from->num_regs);
365 if (r) return;
366
367 for (i = 0; i < from->num_regs; i++) {
368 to->beg[i] = from->beg[i];
369 to->end[i] = from->end[i];
370 }
371 to->num_regs = from->num_regs;
372
373#ifdef USE_CAPTURE_HISTORY
374 history_root_free(to);
375
376 if (IS_NOT_NULL(from->history_root)) {
377 to->history_root = history_tree_clone(from->history_root);
378 }
379#endif
380}
381
382
383/** stack **/
384#define INVALID_STACK_INDEX -1
385
386/* stack type */
387/* used by normal-POP */
388#define STK_ALT 0x0001
389#define STK_LOOK_BEHIND_NOT 0x0002
390#define STK_POS_NOT 0x0003
391/* handled by normal-POP */
392#define STK_MEM_START 0x0100
393#define STK_MEM_END 0x8200
394#define STK_REPEAT_INC 0x0300
395#define STK_STATE_CHECK_MARK 0x1000
396/* avoided by normal-POP */
397#define STK_NULL_CHECK_START 0x3000
398#define STK_NULL_CHECK_END 0x5000 /* for recursive call */
399#define STK_MEM_END_MARK 0x8400
400#define STK_POS 0x0500 /* used when POP-POS */
401#define STK_STOP_BT 0x0600 /* mark for "(?>...)" */
402#define STK_REPEAT 0x0700
403#define STK_CALL_FRAME 0x0800
404#define STK_RETURN 0x0900
405#define STK_VOID 0x0a00 /* for fill a blank */
406#define STK_ABSENT_POS 0x0b00 /* for absent */
407#define STK_ABSENT 0x0c00 /* absent inner loop marker */
408
409/* stack type check mask */
410#define STK_MASK_POP_USED 0x00ff
411#define STK_MASK_TO_VOID_TARGET 0x10ff
412#define STK_MASK_MEM_END_OR_MARK 0x8000 /* MEM_END or MEM_END_MARK */
413
414#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
415# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
416 (msa).stack_p = (void* )0;\
417 (msa).options = (arg_option);\
418 (msa).region = (arg_region);\
419 (msa).start = (arg_start);\
420 (msa).gpos = (arg_gpos);\
421 (msa).best_len = ONIG_MISMATCH;\
422} while(0)
423#else
424# define MATCH_ARG_INIT(msa, arg_option, arg_region, arg_start, arg_gpos) do {\
425 (msa).stack_p = (void* )0;\
426 (msa).options = (arg_option);\
427 (msa).region = (arg_region);\
428 (msa).start = (arg_start);\
429 (msa).gpos = (arg_gpos);\
430} while(0)
431#endif
432
433#ifdef USE_COMBINATION_EXPLOSION_CHECK
434
435# define STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE 16
436
437# define STATE_CHECK_BUFF_INIT(msa, str_len, offset, state_num) do { \
438 if ((state_num) > 0 && str_len >= STATE_CHECK_STRING_THRESHOLD_LEN) {\
439 unsigned int size = (unsigned int )(((str_len) + 1) * (state_num) + 7) >> 3;\
440 offset = ((offset) * (state_num)) >> 3;\
441 if (size > 0 && offset < size && size < STATE_CHECK_BUFF_MAX_SIZE) {\
442 if (size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) {\
443 (msa).state_check_buff = (void* )xmalloc(size);\
444 CHECK_NULL_RETURN_MEMERR((msa).state_check_buff);\
445 }\
446 else \
447 (msa).state_check_buff = (void* )xalloca(size);\
448 xmemset(((char* )((msa).state_check_buff)+(offset)), 0, \
449 (size_t )(size - (offset))); \
450 (msa).state_check_buff_size = size;\
451 }\
452 else {\
453 (msa).state_check_buff = (void* )0;\
454 (msa).state_check_buff_size = 0;\
455 }\
456 }\
457 else {\
458 (msa).state_check_buff = (void* )0;\
459 (msa).state_check_buff_size = 0;\
460 }\
461 } while(0)
462
463# define MATCH_ARG_FREE(msa) do {\
464 if ((msa).stack_p) xfree((msa).stack_p);\
465 if ((msa).state_check_buff_size >= STATE_CHECK_BUFF_MALLOC_THRESHOLD_SIZE) { \
466 if ((msa).state_check_buff) xfree((msa).state_check_buff);\
467 }\
468} while(0)
469#else /* USE_COMBINATION_EXPLOSION_CHECK */
470# define MATCH_ARG_FREE(msa) if ((msa).stack_p) xfree((msa).stack_p)
471#endif /* USE_COMBINATION_EXPLOSION_CHECK */
472
473
474
475#define MAX_PTR_NUM 100
476
477#define STACK_INIT(alloc_addr, heap_addr, ptr_num, stack_num) do {\
478 if (ptr_num > MAX_PTR_NUM) {\
479 alloc_addr = (char* )xmalloc(sizeof(OnigStackIndex) * (ptr_num));\
480 heap_addr = alloc_addr;\
481 if (msa->stack_p) {\
482 stk_alloc = (OnigStackType* )(msa->stack_p);\
483 stk_base = stk_alloc;\
484 stk = stk_base;\
485 stk_end = stk_base + msa->stack_n;\
486 } else {\
487 stk_alloc = (OnigStackType* )xalloca(sizeof(OnigStackType) * (stack_num));\
488 stk_base = stk_alloc;\
489 stk = stk_base;\
490 stk_end = stk_base + (stack_num);\
491 }\
492 } else if (msa->stack_p) {\
493 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num));\
494 heap_addr = NULL;\
495 stk_alloc = (OnigStackType* )(msa->stack_p);\
496 stk_base = stk_alloc;\
497 stk = stk_base;\
498 stk_end = stk_base + msa->stack_n;\
499 }\
500 else {\
501 alloc_addr = (char* )xalloca(sizeof(OnigStackIndex) * (ptr_num)\
502 + sizeof(OnigStackType) * (stack_num));\
503 heap_addr = NULL;\
504 stk_alloc = (OnigStackType* )(alloc_addr + sizeof(OnigStackIndex) * (ptr_num));\
505 stk_base = stk_alloc;\
506 stk = stk_base;\
507 stk_end = stk_base + (stack_num);\
508 }\
509} while(0)
510
511#define STACK_SAVE do{\
512 if (stk_base != stk_alloc) {\
513 msa->stack_p = stk_base;\
514 msa->stack_n = stk_end - stk_base; /* TODO: check overflow */\
515 };\
516} while(0)
517
518static unsigned int MatchStackLimitSize = DEFAULT_MATCH_STACK_LIMIT_SIZE;
519
520extern unsigned int
521onig_get_match_stack_limit_size(void)
522{
523 return MatchStackLimitSize;
524}
525
526extern int
527onig_set_match_stack_limit_size(unsigned int size)
528{
529 MatchStackLimitSize = size;
530 return 0;
531}
532
533static int
534stack_double(OnigStackType** arg_stk_base, OnigStackType** arg_stk_end,
535 OnigStackType** arg_stk, OnigStackType* stk_alloc, OnigMatchArg* msa)
536{
537 size_t n;
538 OnigStackType *x, *stk_base, *stk_end, *stk;
539
540 stk_base = *arg_stk_base;
541 stk_end = *arg_stk_end;
542 stk = *arg_stk;
543
544 n = stk_end - stk_base;
545 if (stk_base == stk_alloc && IS_NULL(msa->stack_p)) {
546 x = (OnigStackType* )xmalloc(sizeof(OnigStackType) * n * 2);
547 if (IS_NULL(x)) {
548 STACK_SAVE;
549 return ONIGERR_MEMORY;
550 }
551 xmemcpy(x, stk_base, n * sizeof(OnigStackType));
552 n *= 2;
553 }
554 else {
555 unsigned int limit_size = MatchStackLimitSize;
556 n *= 2;
557 if (limit_size != 0 && n > limit_size) {
558 if ((unsigned int )(stk_end - stk_base) == limit_size)
559 return ONIGERR_MATCH_STACK_LIMIT_OVER;
560 else
561 n = limit_size;
562 }
563 x = (OnigStackType* )xrealloc(stk_base, sizeof(OnigStackType) * n);
564 if (IS_NULL(x)) {
565 STACK_SAVE;
566 return ONIGERR_MEMORY;
567 }
568 }
569 *arg_stk = x + (stk - stk_base);
570 *arg_stk_base = x;
571 *arg_stk_end = x + n;
572 return 0;
573}
574
575#define STACK_ENSURE(n) do {\
576 if (stk_end - stk < (n)) {\
577 int r = stack_double(&stk_base, &stk_end, &stk, stk_alloc, msa);\
578 if (r != 0) {\
579 STACK_SAVE;\
580 if (xmalloc_base) xfree(xmalloc_base);\
581 return r;\
582 }\
583 }\
584} while(0)
585
586#define STACK_AT(index) (stk_base + (index))
587#define GET_STACK_INDEX(stk) ((stk) - stk_base)
588
589#define STACK_PUSH_TYPE(stack_type) do {\
590 STACK_ENSURE(1);\
591 stk->type = (stack_type);\
592 STACK_INC;\
593} while(0)
594
595#define IS_TO_VOID_TARGET(stk) (((stk)->type & STK_MASK_TO_VOID_TARGET) != 0)
596
597#ifdef USE_COMBINATION_EXPLOSION_CHECK
598# define STATE_CHECK_POS(s,snum) \
599 (((s) - str) * num_comb_exp_check + ((snum) - 1))
600# define STATE_CHECK_VAL(v,snum) do {\
601 if (state_check_buff != NULL) {\
602 int x = STATE_CHECK_POS(s,snum);\
603 (v) = state_check_buff[x/8] & (1<<(x%8));\
604 }\
605 else (v) = 0;\
606} while(0)
607
608
609# define ELSE_IF_STATE_CHECK_MARK(stk) \
610 else if ((stk)->type == STK_STATE_CHECK_MARK) { \
611 int x = STATE_CHECK_POS(stk->u.state.pstr, stk->u.state.state_check);\
612 state_check_buff[x/8] |= (1<<(x%8)); \
613 }
614
615# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
616 STACK_ENSURE(1);\
617 stk->type = (stack_type);\
618 stk->u.state.pcode = (pat);\
619 stk->u.state.pstr = (s);\
620 stk->u.state.pstr_prev = (sprev);\
621 stk->u.state.state_check = 0;\
622 stk->u.state.pkeep = (keep);\
623 STACK_INC;\
624} while(0)
625
626# define STACK_PUSH_ENSURED(stack_type,pat) do {\
627 stk->type = (stack_type);\
628 stk->u.state.pcode = (pat);\
629 stk->u.state.state_check = 0;\
630 STACK_INC;\
631} while(0)
632
633# define STACK_PUSH_ALT_WITH_STATE_CHECK(pat,s,sprev,snum,keep) do {\
634 STACK_ENSURE(1);\
635 stk->type = STK_ALT;\
636 stk->u.state.pcode = (pat);\
637 stk->u.state.pstr = (s);\
638 stk->u.state.pstr_prev = (sprev);\
639 stk->u.state.state_check = ((state_check_buff != NULL) ? (snum) : 0);\
640 stk->u.state.pkeep = (keep);\
641 STACK_INC;\
642} while(0)
643
644# define STACK_PUSH_STATE_CHECK(s,snum) do {\
645 if (state_check_buff != NULL) {\
646 STACK_ENSURE(1);\
647 stk->type = STK_STATE_CHECK_MARK;\
648 stk->u.state.pstr = (s);\
649 stk->u.state.state_check = (snum);\
650 STACK_INC;\
651 }\
652} while(0)
653
654#else /* USE_COMBINATION_EXPLOSION_CHECK */
655
656# define ELSE_IF_STATE_CHECK_MARK(stk)
657
658# define STACK_PUSH(stack_type,pat,s,sprev,keep) do {\
659 STACK_ENSURE(1);\
660 stk->type = (stack_type);\
661 stk->u.state.pcode = (pat);\
662 stk->u.state.pstr = (s);\
663 stk->u.state.pstr_prev = (sprev);\
664 stk->u.state.pkeep = (keep);\
665 STACK_INC;\
666} while(0)
667
668# define STACK_PUSH_ENSURED(stack_type,pat) do {\
669 stk->type = (stack_type);\
670 stk->u.state.pcode = (pat);\
671 STACK_INC;\
672} while(0)
673#endif /* USE_COMBINATION_EXPLOSION_CHECK */
674
675#define STACK_PUSH_ALT(pat,s,sprev,keep) STACK_PUSH(STK_ALT,pat,s,sprev,keep)
676#define STACK_PUSH_POS(s,sprev,keep) STACK_PUSH(STK_POS,NULL_UCHARP,s,sprev,keep)
677#define STACK_PUSH_POS_NOT(pat,s,sprev,keep) STACK_PUSH(STK_POS_NOT,pat,s,sprev,keep)
678#define STACK_PUSH_ABSENT STACK_PUSH_TYPE(STK_ABSENT)
679#define STACK_PUSH_STOP_BT STACK_PUSH_TYPE(STK_STOP_BT)
680#define STACK_PUSH_LOOK_BEHIND_NOT(pat,s,sprev,keep) \
681 STACK_PUSH(STK_LOOK_BEHIND_NOT,pat,s,sprev,keep)
682
683#define STACK_PUSH_REPEAT(id, pat) do {\
684 STACK_ENSURE(1);\
685 stk->type = STK_REPEAT;\
686 stk->u.repeat.num = (id);\
687 stk->u.repeat.pcode = (pat);\
688 stk->u.repeat.count = 0;\
689 STACK_INC;\
690} while(0)
691
692#define STACK_PUSH_REPEAT_INC(sindex) do {\
693 STACK_ENSURE(1);\
694 stk->type = STK_REPEAT_INC;\
695 stk->u.repeat_inc.si = (sindex);\
696 STACK_INC;\
697} while(0)
698
699#define STACK_PUSH_MEM_START(mnum, s) do {\
700 STACK_ENSURE(1);\
701 stk->type = STK_MEM_START;\
702 stk->u.mem.num = (mnum);\
703 stk->u.mem.pstr = (s);\
704 stk->u.mem.start = mem_start_stk[mnum];\
705 stk->u.mem.end = mem_end_stk[mnum];\
706 mem_start_stk[mnum] = GET_STACK_INDEX(stk);\
707 mem_end_stk[mnum] = INVALID_STACK_INDEX;\
708 STACK_INC;\
709} while(0)
710
711#define STACK_PUSH_MEM_END(mnum, s) do {\
712 STACK_ENSURE(1);\
713 stk->type = STK_MEM_END;\
714 stk->u.mem.num = (mnum);\
715 stk->u.mem.pstr = (s);\
716 stk->u.mem.start = mem_start_stk[mnum];\
717 stk->u.mem.end = mem_end_stk[mnum];\
718 mem_end_stk[mnum] = GET_STACK_INDEX(stk);\
719 STACK_INC;\
720} while(0)
721
722#define STACK_PUSH_MEM_END_MARK(mnum) do {\
723 STACK_ENSURE(1);\
724 stk->type = STK_MEM_END_MARK;\
725 stk->u.mem.num = (mnum);\
726 STACK_INC;\
727} while(0)
728
729#define STACK_GET_MEM_START(mnum, k) do {\
730 int level = 0;\
731 k = stk;\
732 while (k > stk_base) {\
733 k--;\
734 if ((k->type & STK_MASK_MEM_END_OR_MARK) != 0 \
735 && k->u.mem.num == (mnum)) {\
736 level++;\
737 }\
738 else if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
739 if (level == 0) break;\
740 level--;\
741 }\
742 }\
743} while(0)
744
745#define STACK_GET_MEM_RANGE(k, mnum, start, end) do {\
746 int level = 0;\
747 while (k < stk) {\
748 if (k->type == STK_MEM_START && k->u.mem.num == (mnum)) {\
749 if (level == 0) (start) = k->u.mem.pstr;\
750 level++;\
751 }\
752 else if (k->type == STK_MEM_END && k->u.mem.num == (mnum)) {\
753 level--;\
754 if (level == 0) {\
755 (end) = k->u.mem.pstr;\
756 break;\
757 }\
758 }\
759 k++;\
760 }\
761} while(0)
762
763#define STACK_PUSH_NULL_CHECK_START(cnum, s) do {\
764 STACK_ENSURE(1);\
765 stk->type = STK_NULL_CHECK_START;\
766 stk->u.null_check.num = (cnum);\
767 stk->u.null_check.pstr = (s);\
768 STACK_INC;\
769} while(0)
770
771#define STACK_PUSH_NULL_CHECK_END(cnum) do {\
772 STACK_ENSURE(1);\
773 stk->type = STK_NULL_CHECK_END;\
774 stk->u.null_check.num = (cnum);\
775 STACK_INC;\
776} while(0)
777
778#define STACK_PUSH_CALL_FRAME(pat) do {\
779 STACK_ENSURE(1);\
780 stk->type = STK_CALL_FRAME;\
781 stk->u.call_frame.ret_addr = (pat);\
782 STACK_INC;\
783} while(0)
784
785#define STACK_PUSH_RETURN do {\
786 STACK_ENSURE(1);\
787 stk->type = STK_RETURN;\
788 STACK_INC;\
789} while(0)
790
791#define STACK_PUSH_ABSENT_POS(start, end) do {\
792 STACK_ENSURE(1);\
793 stk->type = STK_ABSENT_POS;\
794 stk->u.absent_pos.abs_pstr = (start);\
795 stk->u.absent_pos.end_pstr = (end);\
796 STACK_INC;\
797} while(0)
798
799
800#ifdef ONIG_DEBUG
801# define STACK_BASE_CHECK(p, at) \
802 if ((p) < stk_base) {\
803 fprintf(stderr, "at %s\n", at);\
804 goto stack_error;\
805 }
806#else
807# define STACK_BASE_CHECK(p, at)
808#endif
809
810#define STACK_POP_ONE do {\
811 stk--;\
812 STACK_BASE_CHECK(stk, "STACK_POP_ONE"); \
813} while(0)
814
815#define STACK_POP do {\
816 switch (pop_level) {\
817 case STACK_POP_LEVEL_FREE:\
818 while (1) {\
819 stk--;\
820 STACK_BASE_CHECK(stk, "STACK_POP"); \
821 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
822 ELSE_IF_STATE_CHECK_MARK(stk);\
823 }\
824 break;\
825 case STACK_POP_LEVEL_MEM_START:\
826 while (1) {\
827 stk--;\
828 STACK_BASE_CHECK(stk, "STACK_POP 2"); \
829 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
830 else if (stk->type == STK_MEM_START) {\
831 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
832 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
833 }\
834 ELSE_IF_STATE_CHECK_MARK(stk);\
835 }\
836 break;\
837 default:\
838 while (1) {\
839 stk--;\
840 STACK_BASE_CHECK(stk, "STACK_POP 3"); \
841 if ((stk->type & STK_MASK_POP_USED) != 0) break;\
842 else if (stk->type == STK_MEM_START) {\
843 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
844 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
845 }\
846 else if (stk->type == STK_REPEAT_INC) {\
847 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
848 }\
849 else if (stk->type == STK_MEM_END) {\
850 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
851 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
852 }\
853 ELSE_IF_STATE_CHECK_MARK(stk);\
854 }\
855 break;\
856 }\
857} while(0)
858
859#define STACK_POP_TIL_POS_NOT do {\
860 while (1) {\
861 stk--;\
862 STACK_BASE_CHECK(stk, "STACK_POP_TIL_POS_NOT"); \
863 if (stk->type == STK_POS_NOT) break;\
864 else if (stk->type == STK_MEM_START) {\
865 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
866 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
867 }\
868 else if (stk->type == STK_REPEAT_INC) {\
869 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
870 }\
871 else if (stk->type == STK_MEM_END) {\
872 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
873 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
874 }\
875 ELSE_IF_STATE_CHECK_MARK(stk);\
876 }\
877} while(0)
878
879#define STACK_POP_TIL_LOOK_BEHIND_NOT do {\
880 while (1) {\
881 stk--;\
882 STACK_BASE_CHECK(stk, "STACK_POP_TIL_LOOK_BEHIND_NOT"); \
883 if (stk->type == STK_LOOK_BEHIND_NOT) break;\
884 else if (stk->type == STK_MEM_START) {\
885 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
886 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
887 }\
888 else if (stk->type == STK_REPEAT_INC) {\
889 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
890 }\
891 else if (stk->type == STK_MEM_END) {\
892 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
893 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
894 }\
895 ELSE_IF_STATE_CHECK_MARK(stk);\
896 }\
897} while(0)
898
899#define STACK_POP_TIL_ABSENT do {\
900 while (1) {\
901 stk--;\
902 STACK_BASE_CHECK(stk, "STACK_POP_TIL_ABSENT"); \
903 if (stk->type == STK_ABSENT) break;\
904 else if (stk->type == STK_MEM_START) {\
905 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
906 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
907 }\
908 else if (stk->type == STK_REPEAT_INC) {\
909 STACK_AT(stk->u.repeat_inc.si)->u.repeat.count--;\
910 }\
911 else if (stk->type == STK_MEM_END) {\
912 mem_start_stk[stk->u.mem.num] = stk->u.mem.start;\
913 mem_end_stk[stk->u.mem.num] = stk->u.mem.end;\
914 }\
915 ELSE_IF_STATE_CHECK_MARK(stk);\
916 }\
917} while(0)
918
919#define STACK_POP_ABSENT_POS(start, end) do {\
920 stk--;\
921 STACK_BASE_CHECK(stk, "STACK_POP_ABSENT_POS"); \
922 (start) = stk->u.absent_pos.abs_pstr;\
923 (end) = stk->u.absent_pos.end_pstr;\
924} while(0)
925
926#define STACK_POS_END(k) do {\
927 k = stk;\
928 while (1) {\
929 k--;\
930 STACK_BASE_CHECK(k, "STACK_POS_END"); \
931 if (IS_TO_VOID_TARGET(k)) {\
932 k->type = STK_VOID;\
933 }\
934 else if (k->type == STK_POS) {\
935 k->type = STK_VOID;\
936 break;\
937 }\
938 }\
939} while(0)
940
941#define STACK_STOP_BT_END do {\
942 OnigStackType *k = stk;\
943 while (1) {\
944 k--;\
945 STACK_BASE_CHECK(k, "STACK_STOP_BT_END"); \
946 if (IS_TO_VOID_TARGET(k)) {\
947 k->type = STK_VOID;\
948 }\
949 else if (k->type == STK_STOP_BT) {\
950 k->type = STK_VOID;\
951 break;\
952 }\
953 }\
954} while(0)
955
956#define STACK_NULL_CHECK(isnull,id,s) do {\
957 OnigStackType* k = stk;\
958 while (1) {\
959 k--;\
960 STACK_BASE_CHECK(k, "STACK_NULL_CHECK"); \
961 if (k->type == STK_NULL_CHECK_START) {\
962 if (k->u.null_check.num == (id)) {\
963 (isnull) = (k->u.null_check.pstr == (s));\
964 break;\
965 }\
966 }\
967 }\
968} while(0)
969
970#define STACK_NULL_CHECK_REC(isnull,id,s) do {\
971 int level = 0;\
972 OnigStackType* k = stk;\
973 while (1) {\
974 k--;\
975 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_REC"); \
976 if (k->type == STK_NULL_CHECK_START) {\
977 if (k->u.null_check.num == (id)) {\
978 if (level == 0) {\
979 (isnull) = (k->u.null_check.pstr == (s));\
980 break;\
981 }\
982 else level--;\
983 }\
984 }\
985 else if (k->type == STK_NULL_CHECK_END) {\
986 level++;\
987 }\
988 }\
989} while(0)
990
991#define STACK_NULL_CHECK_MEMST(isnull,id,s,reg) do {\
992 OnigStackType* k = stk;\
993 while (1) {\
994 k--;\
995 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST"); \
996 if (k->type == STK_NULL_CHECK_START) {\
997 if (k->u.null_check.num == (id)) {\
998 if (k->u.null_check.pstr != (s)) {\
999 (isnull) = 0;\
1000 break;\
1001 }\
1002 else {\
1003 UChar* endp;\
1004 (isnull) = 1;\
1005 while (k < stk) {\
1006 if (k->type == STK_MEM_START) {\
1007 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1008 (isnull) = 0; break;\
1009 }\
1010 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1011 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1012 else\
1013 endp = (UChar* )k->u.mem.end;\
1014 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1015 (isnull) = 0; break;\
1016 }\
1017 else if (endp != s) {\
1018 (isnull) = -1; /* empty, but position changed */ \
1019 }\
1020 }\
1021 k++;\
1022 }\
1023 break;\
1024 }\
1025 }\
1026 }\
1027 }\
1028} while(0)
1029
1030#define STACK_NULL_CHECK_MEMST_REC(isnull,id,s,reg) do {\
1031 int level = 0;\
1032 OnigStackType* k = stk;\
1033 while (1) {\
1034 k--;\
1035 STACK_BASE_CHECK(k, "STACK_NULL_CHECK_MEMST_REC"); \
1036 if (k->type == STK_NULL_CHECK_START) {\
1037 if (k->u.null_check.num == (id)) {\
1038 if (level == 0) {\
1039 if (k->u.null_check.pstr != (s)) {\
1040 (isnull) = 0;\
1041 break;\
1042 }\
1043 else {\
1044 UChar* endp;\
1045 (isnull) = 1;\
1046 while (k < stk) {\
1047 if (k->type == STK_MEM_START) {\
1048 if (k->u.mem.end == INVALID_STACK_INDEX) {\
1049 (isnull) = 0; break;\
1050 }\
1051 if (BIT_STATUS_AT(reg->bt_mem_end, k->u.mem.num))\
1052 endp = STACK_AT(k->u.mem.end)->u.mem.pstr;\
1053 else\
1054 endp = (UChar* )k->u.mem.end;\
1055 if (STACK_AT(k->u.mem.start)->u.mem.pstr != endp) {\
1056 (isnull) = 0; break;\
1057 }\
1058 else if (endp != s) {\
1059 (isnull) = -1; /* empty, but position changed */ \
1060 }\
1061 }\
1062 k++;\
1063 }\
1064 break;\
1065 }\
1066 }\
1067 else {\
1068 level--;\
1069 }\
1070 }\
1071 }\
1072 else if (k->type == STK_NULL_CHECK_END) {\
1073 if (k->u.null_check.num == (id)) level++;\
1074 }\
1075 }\
1076} while(0)
1077
1078#define STACK_GET_REPEAT(id, k) do {\
1079 int level = 0;\
1080 k = stk;\
1081 while (1) {\
1082 k--;\
1083 STACK_BASE_CHECK(k, "STACK_GET_REPEAT"); \
1084 if (k->type == STK_REPEAT) {\
1085 if (level == 0) {\
1086 if (k->u.repeat.num == (id)) {\
1087 break;\
1088 }\
1089 }\
1090 }\
1091 else if (k->type == STK_CALL_FRAME) level--;\
1092 else if (k->type == STK_RETURN) level++;\
1093 }\
1094} while(0)
1095
1096#define STACK_RETURN(addr) do {\
1097 int level = 0;\
1098 OnigStackType* k = stk;\
1099 while (1) {\
1100 k--;\
1101 STACK_BASE_CHECK(k, "STACK_RETURN"); \
1102 if (k->type == STK_CALL_FRAME) {\
1103 if (level == 0) {\
1104 (addr) = k->u.call_frame.ret_addr;\
1105 break;\
1106 }\
1107 else level--;\
1108 }\
1109 else if (k->type == STK_RETURN)\
1110 level++;\
1111 }\
1112} while(0)
1113
1114
1115#define STRING_CMP(s1,s2,len) do {\
1116 while (len-- > 0) {\
1117 if (*s1++ != *s2++) goto fail;\
1118 }\
1119} while(0)
1120
1121#define STRING_CMP_IC(case_fold_flag,s1,ps2,len,text_end) do {\
1122 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1123 goto fail; \
1124} while(0)
1125
1126static int string_cmp_ic(OnigEncoding enc, int case_fold_flag,
1127 UChar* s1, UChar** ps2, OnigDistance mblen, const UChar* text_end)
1128{
1129 UChar buf1[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1130 UChar buf2[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1131 UChar *p1, *p2, *end1, *s2;
1132 int len1, len2;
1133
1134 s2 = *ps2;
1135 end1 = s1 + mblen;
1136 while (s1 < end1) {
1137 len1 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s1, text_end, buf1);
1138 len2 = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &s2, text_end, buf2);
1139 if (len1 != len2) return 0;
1140 p1 = buf1;
1141 p2 = buf2;
1142 while (len1-- > 0) {
1143 if (*p1 != *p2) return 0;
1144 p1++;
1145 p2++;
1146 }
1147 }
1148
1149 *ps2 = s2;
1150 return 1;
1151}
1152
1153#define STRING_CMP_VALUE(s1,s2,len,is_fail) do {\
1154 is_fail = 0;\
1155 while (len-- > 0) {\
1156 if (*s1++ != *s2++) {\
1157 is_fail = 1; break;\
1158 }\
1159 }\
1160} while(0)
1161
1162#define STRING_CMP_VALUE_IC(case_fold_flag,s1,ps2,len,text_end,is_fail) do {\
1163 if (string_cmp_ic(encode, case_fold_flag, s1, ps2, len, text_end) == 0) \
1164 is_fail = 1; \
1165 else \
1166 is_fail = 0; \
1167} while(0)
1168
1169
1170#define IS_EMPTY_STR (str == end)
1171#define ON_STR_BEGIN(s) ((s) == str)
1172#define ON_STR_END(s) ((s) == end)
1173#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1174# define DATA_ENSURE_CHECK1 (s < right_range)
1175# define DATA_ENSURE_CHECK(n) (s + (n) <= right_range)
1176# define DATA_ENSURE(n) if (s + (n) > right_range) goto fail
1177# define ABSENT_END_POS right_range
1178#else
1179# define DATA_ENSURE_CHECK1 (s < end)
1180# define DATA_ENSURE_CHECK(n) (s + (n) <= end)
1181# define DATA_ENSURE(n) if (s + (n) > end) goto fail
1182# define ABSENT_END_POS end
1183#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
1184
1185
1186#ifdef USE_CAPTURE_HISTORY
1187static int
1188make_capture_history_tree(OnigCaptureTreeNode* node, OnigStackType** kp,
1189 OnigStackType* stk_top, UChar* str, regex_t* reg)
1190{
1191 int n, r;
1192 OnigCaptureTreeNode* child;
1193 OnigStackType* k = *kp;
1194
1195 while (k < stk_top) {
1196 if (k->type == STK_MEM_START) {
1197 n = k->u.mem.num;
1198 if (n <= ONIG_MAX_CAPTURE_HISTORY_GROUP &&
1199 BIT_STATUS_AT(reg->capture_history, n) != 0) {
1200 child = history_node_new();
1201 CHECK_NULL_RETURN_MEMERR(child);
1202 child->group = n;
1203 child->beg = k->u.mem.pstr - str;
1204 r = history_tree_add_child(node, child);
1205 if (r != 0) {
1206 history_tree_free(child);
1207 return r;
1208 }
1209 *kp = (k + 1);
1210 r = make_capture_history_tree(child, kp, stk_top, str, reg);
1211 if (r != 0) return r;
1212
1213 k = *kp;
1214 child->end = k->u.mem.pstr - str;
1215 }
1216 }
1217 else if (k->type == STK_MEM_END) {
1218 if (k->u.mem.num == node->group) {
1219 node->end = k->u.mem.pstr - str;
1220 *kp = k;
1221 return 0;
1222 }
1223 }
1224 k++;
1225 }
1226
1227 return 1; /* 1: root node ending. */
1228}
1229#endif /* USE_CAPTURE_HISTORY */
1230
1231#ifdef USE_BACKREF_WITH_LEVEL
1232static int mem_is_in_memp(int mem, int num, UChar* memp)
1233{
1234 int i;
1235 MemNumType m;
1236
1237 for (i = 0; i < num; i++) {
1238 GET_MEMNUM_INC(m, memp);
1239 if (mem == (int )m) return 1;
1240 }
1241 return 0;
1242}
1243
1244static int backref_match_at_nested_level(regex_t* reg,
1245 OnigStackType* top, OnigStackType* stk_base,
1246 int ignore_case, int case_fold_flag,
1247 int nest, int mem_num, UChar* memp, UChar** s, const UChar* send)
1248{
1249 UChar *ss, *p, *pstart, *pend = NULL_UCHARP;
1250 int level;
1251 OnigStackType* k;
1252
1253 level = 0;
1254 k = top;
1255 k--;
1256 while (k >= stk_base) {
1257 if (k->type == STK_CALL_FRAME) {
1258 level--;
1259 }
1260 else if (k->type == STK_RETURN) {
1261 level++;
1262 }
1263 else if (level == nest) {
1264 if (k->type == STK_MEM_START) {
1265 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1266 pstart = k->u.mem.pstr;
1267 if (pend != NULL_UCHARP) {
1268 if (pend - pstart > send - *s) return 0; /* or goto next_mem; */
1269 p = pstart;
1270 ss = *s;
1271
1272 if (ignore_case != 0) {
1273 if (string_cmp_ic(reg->enc, case_fold_flag,
1274 pstart, &ss, pend - pstart, send) == 0)
1275 return 0; /* or goto next_mem; */
1276 }
1277 else {
1278 while (p < pend) {
1279 if (*p++ != *ss++) return 0; /* or goto next_mem; */
1280 }
1281 }
1282
1283 *s = ss;
1284 return 1;
1285 }
1286 }
1287 }
1288 else if (k->type == STK_MEM_END) {
1289 if (mem_is_in_memp(k->u.mem.num, mem_num, memp)) {
1290 pend = k->u.mem.pstr;
1291 }
1292 }
1293 }
1294 k--;
1295 }
1296
1297 return 0;
1298}
1299#endif /* USE_BACKREF_WITH_LEVEL */
1300
1301
1302#ifdef ONIG_DEBUG_STATISTICS
1303
1304# ifdef _WIN32
1305# include <windows.h>
1306static LARGE_INTEGER ts, te, freq;
1307# define GETTIME(t) QueryPerformanceCounter(&(t))
1308# define TIMEDIFF(te,ts) (unsigned long )(((te).QuadPart - (ts).QuadPart) \
1309 * 1000000 / freq.QuadPart)
1310# else /* _WIN32 */
1311
1312# define USE_TIMEOFDAY
1313
1314# ifdef USE_TIMEOFDAY
1315# ifdef HAVE_SYS_TIME_H
1316# include <sys/time.h>
1317# endif
1318# ifdef HAVE_UNISTD_H
1319# include <unistd.h>
1320# endif
1321static struct timeval ts, te;
1322# define GETTIME(t) gettimeofday(&(t), (struct timezone* )0)
1323# define TIMEDIFF(te,ts) (((te).tv_usec - (ts).tv_usec) + \
1324 (((te).tv_sec - (ts).tv_sec)*1000000))
1325# else /* USE_TIMEOFDAY */
1326# ifdef HAVE_SYS_TIMES_H
1327# include <sys/times.h>
1328# endif
1329static struct tms ts, te;
1330# define GETTIME(t) times(&(t))
1331# define TIMEDIFF(te,ts) ((te).tms_utime - (ts).tms_utime)
1332# endif /* USE_TIMEOFDAY */
1333
1334# endif /* _WIN32 */
1335
1336static int OpCounter[256];
1337static int OpPrevCounter[256];
1338static unsigned long OpTime[256];
1339static int OpCurr = OP_FINISH;
1340static int OpPrevTarget = OP_FAIL;
1341static int MaxStackDepth = 0;
1342
1343# define MOP_IN(opcode) do {\
1344 if (opcode == OpPrevTarget) OpPrevCounter[OpCurr]++;\
1345 OpCurr = opcode;\
1346 OpCounter[opcode]++;\
1347 GETTIME(ts);\
1348} while(0)
1349
1350# define MOP_OUT do {\
1351 GETTIME(te);\
1352 OpTime[OpCurr] += TIMEDIFF(te, ts);\
1353} while(0)
1354
1355extern void
1356onig_statistics_init(void)
1357{
1358 int i;
1359 for (i = 0; i < 256; i++) {
1360 OpCounter[i] = OpPrevCounter[i] = 0; OpTime[i] = 0;
1361 }
1362 MaxStackDepth = 0;
1363# ifdef _WIN32
1364 QueryPerformanceFrequency(&freq);
1365# endif
1366}
1367
1368extern void
1369onig_print_statistics(FILE* f)
1370{
1371 int i;
1372 fprintf(f, " count prev time\n");
1373 for (i = 0; OnigOpInfo[i].opcode >= 0; i++) {
1374 fprintf(f, "%8d: %8d: %10lu: %s\n",
1375 OpCounter[i], OpPrevCounter[i], OpTime[i], OnigOpInfo[i].name);
1376 }
1377 fprintf(f, "\nmax stack depth: %d\n", MaxStackDepth);
1378}
1379
1380# define STACK_INC do {\
1381 stk++;\
1382 if (stk - stk_base > MaxStackDepth) \
1383 MaxStackDepth = stk - stk_base;\
1384} while(0)
1385
1386#else /* ONIG_DEBUG_STATISTICS */
1387# define STACK_INC stk++
1388
1389# define MOP_IN(opcode)
1390# define MOP_OUT
1391#endif /* ONIG_DEBUG_STATISTICS */
1392
1393
1394#ifdef ONIG_DEBUG_MATCH
1395static char *
1396stack_type_str(int stack_type)
1397{
1398 switch (stack_type) {
1399 case STK_ALT: return "Alt ";
1400 case STK_LOOK_BEHIND_NOT: return "LBNot ";
1401 case STK_POS_NOT: return "PosNot";
1402 case STK_MEM_START: return "MemS ";
1403 case STK_MEM_END: return "MemE ";
1404 case STK_REPEAT_INC: return "RepInc";
1405 case STK_STATE_CHECK_MARK: return "StChMk";
1406 case STK_NULL_CHECK_START: return "NulChS";
1407 case STK_NULL_CHECK_END: return "NulChE";
1408 case STK_MEM_END_MARK: return "MemEMk";
1409 case STK_POS: return "Pos ";
1410 case STK_STOP_BT: return "StopBt";
1411 case STK_REPEAT: return "Rep ";
1412 case STK_CALL_FRAME: return "Call ";
1413 case STK_RETURN: return "Ret ";
1414 case STK_VOID: return "Void ";
1415 case STK_ABSENT_POS: return "AbsPos";
1416 case STK_ABSENT: return "Absent";
1417 default: return " ";
1418 }
1419}
1420#endif
1421
1422/* match data(str - end) from position (sstart). */
1423/* if sstart == str then set sprev to NULL. */
1424static OnigPosition
1425match_at(regex_t* reg, const UChar* str, const UChar* end,
1426#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
1427 const UChar* right_range,
1428#endif
1429 const UChar* sstart, UChar* sprev, OnigMatchArg* msa)
1430{
1431 static const UChar FinishCode[] = { OP_FINISH };
1432
1433 int i, num_mem, pop_level;
1434 ptrdiff_t n, best_len;
1435 LengthType tlen, tlen2;
1436 MemNumType mem;
1437 RelAddrType addr;
1438 OnigOptionType option = reg->options;
1439 OnigEncoding encode = reg->enc;
1440 OnigCaseFoldType case_fold_flag = reg->case_fold_flag;
1441 UChar *s, *q, *sbegin;
1442 UChar *p = reg->p;
1443 UChar *pkeep;
1444 char *alloca_base;
1445 char *xmalloc_base = NULL;
1446 OnigStackType *stk_alloc, *stk_base, *stk, *stk_end;
1447 OnigStackType *stkp; /* used as any purpose. */
1448 OnigStackIndex si;
1449 OnigStackIndex *repeat_stk;
1450 OnigStackIndex *mem_start_stk, *mem_end_stk;
1451#ifdef USE_COMBINATION_EXPLOSION_CHECK
1452 int scv;
1453 unsigned char* state_check_buff = msa->state_check_buff;
1454 int num_comb_exp_check = reg->num_comb_exp_check;
1455#endif
1456
1457#if USE_TOKEN_THREADED_VM
1458# define OP_OFFSET 1
1459# define VM_LOOP JUMP;
1460# define VM_LOOP_END
1461# define CASE(x) L_##x: sbegin = s; OPCODE_EXEC_HOOK;
1462# define DEFAULT L_DEFAULT:
1463# define NEXT sprev = sbegin; JUMP
1464# define JUMP goto *oplabels[*p++]
1465
1466 static const void *oplabels[] = {
1467 &&L_OP_FINISH, /* matching process terminator (no more alternative) */
1468 &&L_OP_END, /* pattern code terminator (success end) */
1469
1470 &&L_OP_EXACT1, /* single byte, N = 1 */
1471 &&L_OP_EXACT2, /* single byte, N = 2 */
1472 &&L_OP_EXACT3, /* single byte, N = 3 */
1473 &&L_OP_EXACT4, /* single byte, N = 4 */
1474 &&L_OP_EXACT5, /* single byte, N = 5 */
1475 &&L_OP_EXACTN, /* single byte */
1476 &&L_OP_EXACTMB2N1, /* mb-length = 2 N = 1 */
1477 &&L_OP_EXACTMB2N2, /* mb-length = 2 N = 2 */
1478 &&L_OP_EXACTMB2N3, /* mb-length = 2 N = 3 */
1479 &&L_OP_EXACTMB2N, /* mb-length = 2 */
1480 &&L_OP_EXACTMB3N, /* mb-length = 3 */
1481 &&L_OP_EXACTMBN, /* other length */
1482
1483 &&L_OP_EXACT1_IC, /* single byte, N = 1, ignore case */
1484 &&L_OP_EXACTN_IC, /* single byte, ignore case */
1485
1486 &&L_OP_CCLASS,
1487 &&L_OP_CCLASS_MB,
1488 &&L_OP_CCLASS_MIX,
1489 &&L_OP_CCLASS_NOT,
1490 &&L_OP_CCLASS_MB_NOT,
1491 &&L_OP_CCLASS_MIX_NOT,
1492
1493 &&L_OP_ANYCHAR, /* "." */
1494 &&L_OP_ANYCHAR_ML, /* "." multi-line */
1495 &&L_OP_ANYCHAR_STAR, /* ".*" */
1496 &&L_OP_ANYCHAR_ML_STAR, /* ".*" multi-line */
1497 &&L_OP_ANYCHAR_STAR_PEEK_NEXT,
1498 &&L_OP_ANYCHAR_ML_STAR_PEEK_NEXT,
1499
1500 &&L_OP_WORD,
1501 &&L_OP_NOT_WORD,
1502 &&L_OP_WORD_BOUND,
1503 &&L_OP_NOT_WORD_BOUND,
1504# ifdef USE_WORD_BEGIN_END
1505 &&L_OP_WORD_BEGIN,
1506 &&L_OP_WORD_END,
1507# else
1508 &&L_DEFAULT,
1509 &&L_DEFAULT,
1510# endif
1511 &&L_OP_ASCII_WORD,
1512 &&L_OP_NOT_ASCII_WORD,
1513 &&L_OP_ASCII_WORD_BOUND,
1514 &&L_OP_NOT_ASCII_WORD_BOUND,
1515# ifdef USE_WORD_BEGIN_END
1516 &&L_OP_ASCII_WORD_BEGIN,
1517 &&L_OP_ASCII_WORD_END,
1518# else
1519 &&L_DEFAULT,
1520 &&L_DEFAULT,
1521# endif
1522
1523 &&L_OP_BEGIN_BUF,
1524 &&L_OP_END_BUF,
1525 &&L_OP_BEGIN_LINE,
1526 &&L_OP_END_LINE,
1527 &&L_OP_SEMI_END_BUF,
1528 &&L_OP_BEGIN_POSITION,
1529
1530 &&L_OP_BACKREF1,
1531 &&L_OP_BACKREF2,
1532 &&L_OP_BACKREFN,
1533 &&L_OP_BACKREFN_IC,
1534 &&L_OP_BACKREF_MULTI,
1535 &&L_OP_BACKREF_MULTI_IC,
1536# ifdef USE_BACKREF_WITH_LEVEL
1537 &&L_OP_BACKREF_WITH_LEVEL, /* \k<xxx+n>, \k<xxx-n> */
1538# else
1539 &&L_DEFAULT,
1540# endif
1541 &&L_OP_MEMORY_START,
1542 &&L_OP_MEMORY_START_PUSH, /* push back-tracker to stack */
1543 &&L_OP_MEMORY_END_PUSH, /* push back-tracker to stack */
1544# ifdef USE_SUBEXP_CALL
1545 &&L_OP_MEMORY_END_PUSH_REC, /* push back-tracker to stack */
1546# else
1547 &&L_DEFAULT,
1548# endif
1549 &&L_OP_MEMORY_END,
1550# ifdef USE_SUBEXP_CALL
1551 &&L_OP_MEMORY_END_REC, /* push marker to stack */
1552# else
1553 &&L_DEFAULT,
1554# endif
1555
1556 &&L_OP_KEEP,
1557
1558 &&L_OP_FAIL, /* pop stack and move */
1559 &&L_OP_JUMP,
1560 &&L_OP_PUSH,
1561 &&L_OP_POP,
1562# ifdef USE_OP_PUSH_OR_JUMP_EXACT
1563 &&L_OP_PUSH_OR_JUMP_EXACT1, /* if match exact then push, else jump. */
1564# else
1565 &&L_DEFAULT,
1566# endif
1567 &&L_OP_PUSH_IF_PEEK_NEXT, /* if match exact then push, else none. */
1568 &&L_OP_REPEAT, /* {n,m} */
1569 &&L_OP_REPEAT_NG, /* {n,m}? (non greedy) */
1570 &&L_OP_REPEAT_INC,
1571 &&L_OP_REPEAT_INC_NG, /* non greedy */
1572 &&L_OP_REPEAT_INC_SG, /* search and get in stack */
1573 &&L_OP_REPEAT_INC_NG_SG, /* search and get in stack (non greedy) */
1574 &&L_OP_NULL_CHECK_START, /* null loop checker start */
1575 &&L_OP_NULL_CHECK_END, /* null loop checker end */
1576# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
1577 &&L_OP_NULL_CHECK_END_MEMST, /* null loop checker end (with capture status) */
1578# else
1579 &&L_DEFAULT,
1580# endif
1581# ifdef USE_SUBEXP_CALL
1582 &&L_OP_NULL_CHECK_END_MEMST_PUSH, /* with capture status and push check-end */
1583# else
1584 &&L_DEFAULT,
1585# endif
1586
1587 &&L_OP_PUSH_POS, /* (?=...) start */
1588 &&L_OP_POP_POS, /* (?=...) end */
1589 &&L_OP_PUSH_POS_NOT, /* (?!...) start */
1590 &&L_OP_FAIL_POS, /* (?!...) end */
1591 &&L_OP_PUSH_STOP_BT, /* (?>...) start */
1592 &&L_OP_POP_STOP_BT, /* (?>...) end */
1593 &&L_OP_LOOK_BEHIND, /* (?<=...) start (no needs end opcode) */
1594 &&L_OP_PUSH_LOOK_BEHIND_NOT, /* (?<!...) start */
1595 &&L_OP_FAIL_LOOK_BEHIND_NOT, /* (?<!...) end */
1596 &&L_OP_PUSH_ABSENT_POS, /* (?~...) start */
1597 &&L_OP_ABSENT, /* (?~...) start of inner loop */
1598 &&L_OP_ABSENT_END, /* (?~...) end */
1599
1600# ifdef USE_SUBEXP_CALL
1601 &&L_OP_CALL, /* \g<name> */
1602 &&L_OP_RETURN,
1603# else
1604 &&L_DEFAULT,
1605 &&L_DEFAULT,
1606# endif
1607 &&L_OP_CONDITION,
1608
1609# ifdef USE_COMBINATION_EXPLOSION_CHECK
1610 &&L_OP_STATE_CHECK_PUSH, /* combination explosion check and push */
1611 &&L_OP_STATE_CHECK_PUSH_OR_JUMP, /* check ok -> push, else jump */
1612 &&L_OP_STATE_CHECK, /* check only */
1613# else
1614 &&L_DEFAULT,
1615 &&L_DEFAULT,
1616 &&L_DEFAULT,
1617# endif
1618# ifdef USE_COMBINATION_EXPLOSION_CHECK
1619 &&L_OP_STATE_CHECK_ANYCHAR_STAR,
1620 &&L_OP_STATE_CHECK_ANYCHAR_ML_STAR,
1621# else
1622 &&L_DEFAULT,
1623 &&L_DEFAULT,
1624# endif
1625 /* no need: IS_DYNAMIC_OPTION() == 0 */
1626# if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
1627 &&L_OP_SET_OPTION_PUSH, /* set option and push recover option */
1628 &&L_OP_SET_OPTION /* set option */
1629# else
1630 &&L_DEFAULT,
1631 &&L_DEFAULT
1632# endif
1633 };
1634#else /* USE_TOKEN_THREADED_VM */
1635
1636# define OP_OFFSET 0
1637# define VM_LOOP \
1638 while (1) { \
1639 OPCODE_EXEC_HOOK; \
1640 sbegin = s; \
1641 switch (*p++) {
1642# define VM_LOOP_END } sprev = sbegin; }
1643# define CASE(x) case x:
1644# define DEFAULT default:
1645# define NEXT break
1646# define JUMP continue; break
1647#endif /* USE_TOKEN_THREADED_VM */
1648
1649
1650#ifdef USE_SUBEXP_CALL
1651/* Stack #0 is used to store the pattern itself and used for (?R), \g<0>,
1652 etc. Additional space is required. */
1653# define ADD_NUMMEM 1
1654#else
1655/* Stack #0 not is used. */
1656# define ADD_NUMMEM 0
1657#endif
1658
1659 n = reg->num_repeat + (reg->num_mem + ADD_NUMMEM) * 2;
1660
1661 STACK_INIT(alloca_base, xmalloc_base, n, INIT_MATCH_STACK_SIZE);
1662 pop_level = reg->stack_pop_level;
1663 num_mem = reg->num_mem;
1664 repeat_stk = (OnigStackIndex* )alloca_base;
1665
1666 mem_start_stk = (OnigStackIndex* )(repeat_stk + reg->num_repeat);
1667 mem_end_stk = mem_start_stk + (num_mem + ADD_NUMMEM);
1668 {
1669 OnigStackIndex *pp = mem_start_stk;
1670 for (; pp < repeat_stk + n; pp += 2) {
1671 pp[0] = INVALID_STACK_INDEX;
1672 pp[1] = INVALID_STACK_INDEX;
1673 }
1674 }
1675#ifndef USE_SUBEXP_CALL
1676 mem_start_stk--; /* for index start from 1,
1677 mem_start_stk[1]..mem_start_stk[num_mem] */
1678 mem_end_stk--; /* for index start from 1,
1679 mem_end_stk[1]..mem_end_stk[num_mem] */
1680#endif
1681
1682#ifdef ONIG_DEBUG_MATCH
1683 fprintf(stderr, "match_at: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), start: %"PRIuPTR" (%p), sprev: %"PRIuPTR" (%p)\n",
1684 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )sstart, sstart, (uintptr_t )sprev, sprev);
1685 fprintf(stderr, "size: %d, start offset: %d\n",
1686 (int )(end - str), (int )(sstart - str));
1687 fprintf(stderr, "\n ofs> str stk:type addr:opcode\n");
1688#endif
1689
1690 STACK_PUSH_ENSURED(STK_ALT, (UChar* )FinishCode); /* bottom stack */
1691 best_len = ONIG_MISMATCH;
1692 s = (UChar* )sstart;
1693 pkeep = (UChar* )sstart;
1694
1695
1696#ifdef ONIG_DEBUG_MATCH
1697# define OPCODE_EXEC_HOOK \
1698 if (s) { \
1699 UChar *op, *q, *bp, buf[50]; \
1700 int len; \
1701 op = p - OP_OFFSET; \
1702 fprintf(stderr, "%4"PRIdPTR"> \"", (*op == OP_FINISH) ? (ptrdiff_t )-1 : s - str); \
1703 bp = buf; \
1704 q = s; \
1705 if (*op != OP_FINISH) { /* s may not be a valid pointer if OP_FINISH. */ \
1706 for (i = 0; i < 7 && q < end; i++) { \
1707 len = enclen(encode, q, end); \
1708 while (len-- > 0) *bp++ = *q++; \
1709 } \
1710 if (q < end) { xmemcpy(bp, "...", 3); bp += 3; } \
1711 } \
1712 xmemcpy(bp, "\"", 1); bp += 1; \
1713 *bp = 0; \
1714 fputs((char* )buf, stderr); \
1715 for (i = 0; i < 20 - (bp - buf); i++) fputc(' ', stderr); \
1716 fprintf(stderr, "%4"PRIdPTR":%s %4"PRIdPTR":", \
1717 stk - stk_base - 1, \
1718 (stk > stk_base) ? stack_type_str(stk[-1].type) : " ", \
1719 (op == FinishCode) ? (ptrdiff_t )-1 : op - reg->p); \
1720 onig_print_compiled_byte_code(stderr, op, reg->p+reg->used, NULL, encode); \
1721 fprintf(stderr, "\n"); \
1722 }
1723#else
1724# define OPCODE_EXEC_HOOK ((void) 0)
1725#endif
1726
1727
1728 VM_LOOP {
1729 CASE(OP_END) MOP_IN(OP_END);
1730 n = s - sstart;
1731 if (n > best_len) {
1732 OnigRegion* region;
1733#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1734 if (IS_FIND_LONGEST(option)) {
1735 if (n > msa->best_len) {
1736 msa->best_len = n;
1737 msa->best_s = (UChar* )sstart;
1738 }
1739 else
1740 goto end_best_len;
1741 }
1742#endif
1743 best_len = n;
1744 region = msa->region;
1745 if (region) {
1746 region->beg[0] = ((pkeep > s) ? s : pkeep) - str;
1747 region->end[0] = s - str;
1748 for (i = 1; i <= num_mem; i++) {
1749 if (mem_end_stk[i] != INVALID_STACK_INDEX) {
1750 if (BIT_STATUS_AT(reg->bt_mem_start, i))
1751 region->beg[i] = STACK_AT(mem_start_stk[i])->u.mem.pstr - str;
1752 else
1753 region->beg[i] = (UChar* )((void* )mem_start_stk[i]) - str;
1754
1755 region->end[i] = (BIT_STATUS_AT(reg->bt_mem_end, i)
1756 ? STACK_AT(mem_end_stk[i])->u.mem.pstr
1757 : (UChar* )((void* )mem_end_stk[i])) - str;
1758 }
1759 else {
1760 region->beg[i] = region->end[i] = ONIG_REGION_NOTPOS;
1761 }
1762 }
1763
1764#ifdef USE_CAPTURE_HISTORY
1765 if (reg->capture_history != 0) {
1766 int r;
1767 OnigCaptureTreeNode* node;
1768
1769 if (IS_NULL(region->history_root)) {
1770 region->history_root = node = history_node_new();
1771 CHECK_NULL_RETURN_MEMERR(node);
1772 }
1773 else {
1774 node = region->history_root;
1775 history_tree_clear(node);
1776 }
1777
1778 node->group = 0;
1779 node->beg = ((pkeep > s) ? s : pkeep) - str;
1780 node->end = s - str;
1781
1782 stkp = stk_base;
1783 r = make_capture_history_tree(region->history_root, &stkp,
1784 stk, (UChar* )str, reg);
1785 if (r < 0) {
1786 best_len = r; /* error code */
1787 goto finish;
1788 }
1789 }
1790#endif /* USE_CAPTURE_HISTORY */
1791 } /* if (region) */
1792 } /* n > best_len */
1793
1794#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
1795 end_best_len:
1796#endif
1797 MOP_OUT;
1798
1799 if (IS_FIND_CONDITION(option)) {
1800 if (IS_FIND_NOT_EMPTY(option) && s == sstart) {
1801 best_len = ONIG_MISMATCH;
1802 goto fail; /* for retry */
1803 }
1804 if (IS_FIND_LONGEST(option) && DATA_ENSURE_CHECK1) {
1805 goto fail; /* for retry */
1806 }
1807 }
1808
1809 /* default behavior: return first-matching result. */
1810 goto finish;
1811 NEXT;
1812
1813 CASE(OP_EXACT1) MOP_IN(OP_EXACT1);
1814 DATA_ENSURE(1);
1815 if (*p != *s) goto fail;
1816 p++; s++;
1817 MOP_OUT;
1818 NEXT;
1819
1820 CASE(OP_EXACT1_IC) MOP_IN(OP_EXACT1_IC);
1821 {
1822 int len;
1823 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1824
1825 DATA_ENSURE(1);
1826 len = ONIGENC_MBC_CASE_FOLD(encode,
1827 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1828 case_fold_flag,
1829 &s, end, lowbuf);
1830 DATA_ENSURE(0);
1831 q = lowbuf;
1832 while (len-- > 0) {
1833 if (*p != *q) {
1834 goto fail;
1835 }
1836 p++; q++;
1837 }
1838 }
1839 MOP_OUT;
1840 NEXT;
1841
1842 CASE(OP_EXACT2) MOP_IN(OP_EXACT2);
1843 DATA_ENSURE(2);
1844 if (*p != *s) goto fail;
1845 p++; s++;
1846 if (*p != *s) goto fail;
1847 sprev = s;
1848 p++; s++;
1849 MOP_OUT;
1850 JUMP;
1851
1852 CASE(OP_EXACT3) MOP_IN(OP_EXACT3);
1853 DATA_ENSURE(3);
1854 if (*p != *s) goto fail;
1855 p++; s++;
1856 if (*p != *s) goto fail;
1857 p++; s++;
1858 if (*p != *s) goto fail;
1859 sprev = s;
1860 p++; s++;
1861 MOP_OUT;
1862 JUMP;
1863
1864 CASE(OP_EXACT4) MOP_IN(OP_EXACT4);
1865 DATA_ENSURE(4);
1866 if (*p != *s) goto fail;
1867 p++; s++;
1868 if (*p != *s) goto fail;
1869 p++; s++;
1870 if (*p != *s) goto fail;
1871 p++; s++;
1872 if (*p != *s) goto fail;
1873 sprev = s;
1874 p++; s++;
1875 MOP_OUT;
1876 JUMP;
1877
1878 CASE(OP_EXACT5) MOP_IN(OP_EXACT5);
1879 DATA_ENSURE(5);
1880 if (*p != *s) goto fail;
1881 p++; s++;
1882 if (*p != *s) goto fail;
1883 p++; s++;
1884 if (*p != *s) goto fail;
1885 p++; s++;
1886 if (*p != *s) goto fail;
1887 p++; s++;
1888 if (*p != *s) goto fail;
1889 sprev = s;
1890 p++; s++;
1891 MOP_OUT;
1892 JUMP;
1893
1894 CASE(OP_EXACTN) MOP_IN(OP_EXACTN);
1895 GET_LENGTH_INC(tlen, p);
1896 DATA_ENSURE(tlen);
1897 while (tlen-- > 0) {
1898 if (*p++ != *s++) goto fail;
1899 }
1900 sprev = s - 1;
1901 MOP_OUT;
1902 JUMP;
1903
1904 CASE(OP_EXACTN_IC) MOP_IN(OP_EXACTN_IC);
1905 {
1906 int len;
1907 UChar *q, *endp, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
1908
1909 GET_LENGTH_INC(tlen, p);
1910 endp = p + tlen;
1911
1912 while (p < endp) {
1913 sprev = s;
1914 DATA_ENSURE(1);
1915 len = ONIGENC_MBC_CASE_FOLD(encode,
1916 /* DISABLE_CASE_FOLD_MULTI_CHAR(case_fold_flag), */
1917 case_fold_flag,
1918 &s, end, lowbuf);
1919 DATA_ENSURE(0);
1920 q = lowbuf;
1921 while (len-- > 0) {
1922 if (*p != *q) goto fail;
1923 p++; q++;
1924 }
1925 }
1926 }
1927
1928 MOP_OUT;
1929 JUMP;
1930
1931 CASE(OP_EXACTMB2N1) MOP_IN(OP_EXACTMB2N1);
1932 DATA_ENSURE(2);
1933 if (*p != *s) goto fail;
1934 p++; s++;
1935 if (*p != *s) goto fail;
1936 p++; s++;
1937 MOP_OUT;
1938 NEXT;
1939
1940 CASE(OP_EXACTMB2N2) MOP_IN(OP_EXACTMB2N2);
1941 DATA_ENSURE(4);
1942 if (*p != *s) goto fail;
1943 p++; s++;
1944 if (*p != *s) goto fail;
1945 p++; s++;
1946 sprev = s;
1947 if (*p != *s) goto fail;
1948 p++; s++;
1949 if (*p != *s) goto fail;
1950 p++; s++;
1951 MOP_OUT;
1952 JUMP;
1953
1954 CASE(OP_EXACTMB2N3) MOP_IN(OP_EXACTMB2N3);
1955 DATA_ENSURE(6);
1956 if (*p != *s) goto fail;
1957 p++; s++;
1958 if (*p != *s) goto fail;
1959 p++; s++;
1960 if (*p != *s) goto fail;
1961 p++; s++;
1962 if (*p != *s) goto fail;
1963 p++; s++;
1964 sprev = s;
1965 if (*p != *s) goto fail;
1966 p++; s++;
1967 if (*p != *s) goto fail;
1968 p++; s++;
1969 MOP_OUT;
1970 JUMP;
1971
1972 CASE(OP_EXACTMB2N) MOP_IN(OP_EXACTMB2N);
1973 GET_LENGTH_INC(tlen, p);
1974 DATA_ENSURE(tlen * 2);
1975 while (tlen-- > 0) {
1976 if (*p != *s) goto fail;
1977 p++; s++;
1978 if (*p != *s) goto fail;
1979 p++; s++;
1980 }
1981 sprev = s - 2;
1982 MOP_OUT;
1983 JUMP;
1984
1985 CASE(OP_EXACTMB3N) MOP_IN(OP_EXACTMB3N);
1986 GET_LENGTH_INC(tlen, p);
1987 DATA_ENSURE(tlen * 3);
1988 while (tlen-- > 0) {
1989 if (*p != *s) goto fail;
1990 p++; s++;
1991 if (*p != *s) goto fail;
1992 p++; s++;
1993 if (*p != *s) goto fail;
1994 p++; s++;
1995 }
1996 sprev = s - 3;
1997 MOP_OUT;
1998 JUMP;
1999
2000 CASE(OP_EXACTMBN) MOP_IN(OP_EXACTMBN);
2001 GET_LENGTH_INC(tlen, p); /* mb-len */
2002 GET_LENGTH_INC(tlen2, p); /* string len */
2003 tlen2 *= tlen;
2004 DATA_ENSURE(tlen2);
2005 while (tlen2-- > 0) {
2006 if (*p != *s) goto fail;
2007 p++; s++;
2008 }
2009 sprev = s - tlen;
2010 MOP_OUT;
2011 JUMP;
2012
2013 CASE(OP_CCLASS) MOP_IN(OP_CCLASS);
2014 DATA_ENSURE(1);
2015 if (BITSET_AT(((BitSetRef )p), *s) == 0) goto fail;
2016 p += SIZE_BITSET;
2017 s += enclen(encode, s, end); /* OP_CCLASS can match mb-code. \D, \S */
2018 MOP_OUT;
2019 NEXT;
2020
2021 CASE(OP_CCLASS_MB) MOP_IN(OP_CCLASS_MB);
2022 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) goto fail;
2023
2024 cclass_mb:
2025 GET_LENGTH_INC(tlen, p);
2026 {
2027 OnigCodePoint code;
2028 UChar *ss;
2029 int mb_len;
2030
2031 DATA_ENSURE(1);
2032 mb_len = enclen(encode, s, end);
2033 DATA_ENSURE(mb_len);
2034 ss = s;
2035 s += mb_len;
2036 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2037
2038#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2039 if (! onig_is_in_code_range(p, code)) goto fail;
2040#else
2041 q = p;
2042 ALIGNMENT_RIGHT(q);
2043 if (! onig_is_in_code_range(q, code)) goto fail;
2044#endif
2045 }
2046 p += tlen;
2047 MOP_OUT;
2048 NEXT;
2049
2050 CASE(OP_CCLASS_MIX) MOP_IN(OP_CCLASS_MIX);
2051 DATA_ENSURE(1);
2052 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2053 p += SIZE_BITSET;
2054 goto cclass_mb;
2055 }
2056 else {
2057 if (BITSET_AT(((BitSetRef )p), *s) == 0)
2058 goto fail;
2059
2060 p += SIZE_BITSET;
2061 GET_LENGTH_INC(tlen, p);
2062 p += tlen;
2063 s++;
2064 }
2065 MOP_OUT;
2066 NEXT;
2067
2068 CASE(OP_CCLASS_NOT) MOP_IN(OP_CCLASS_NOT);
2069 DATA_ENSURE(1);
2070 if (BITSET_AT(((BitSetRef )p), *s) != 0) goto fail;
2071 p += SIZE_BITSET;
2072 s += enclen(encode, s, end);
2073 MOP_OUT;
2074 NEXT;
2075
2076 CASE(OP_CCLASS_MB_NOT) MOP_IN(OP_CCLASS_MB_NOT);
2077 DATA_ENSURE(1);
2078 if (! ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2079 s++;
2080 GET_LENGTH_INC(tlen, p);
2081 p += tlen;
2082 goto cc_mb_not_success;
2083 }
2084
2085 cclass_mb_not:
2086 GET_LENGTH_INC(tlen, p);
2087 {
2088 OnigCodePoint code;
2089 UChar *ss;
2090 int mb_len = enclen(encode, s, end);
2091
2092 if (! DATA_ENSURE_CHECK(mb_len)) {
2093 DATA_ENSURE(1);
2094 s = (UChar* )end;
2095 p += tlen;
2096 goto cc_mb_not_success;
2097 }
2098
2099 ss = s;
2100 s += mb_len;
2101 code = ONIGENC_MBC_TO_CODE(encode, ss, s);
2102
2103#ifdef PLATFORM_UNALIGNED_WORD_ACCESS
2104 if (onig_is_in_code_range(p, code)) goto fail;
2105#else
2106 q = p;
2107 ALIGNMENT_RIGHT(q);
2108 if (onig_is_in_code_range(q, code)) goto fail;
2109#endif
2110 }
2111 p += tlen;
2112
2113 cc_mb_not_success:
2114 MOP_OUT;
2115 NEXT;
2116
2117 CASE(OP_CCLASS_MIX_NOT) MOP_IN(OP_CCLASS_MIX_NOT);
2118 DATA_ENSURE(1);
2119 if (ONIGENC_IS_MBC_HEAD(encode, s, end)) {
2120 p += SIZE_BITSET;
2121 goto cclass_mb_not;
2122 }
2123 else {
2124 if (BITSET_AT(((BitSetRef )p), *s) != 0)
2125 goto fail;
2126
2127 p += SIZE_BITSET;
2128 GET_LENGTH_INC(tlen, p);
2129 p += tlen;
2130 s++;
2131 }
2132 MOP_OUT;
2133 NEXT;
2134
2135 CASE(OP_ANYCHAR) MOP_IN(OP_ANYCHAR);
2136 DATA_ENSURE(1);
2137 n = enclen(encode, s, end);
2138 DATA_ENSURE(n);
2139 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2140 s += n;
2141 MOP_OUT;
2142 NEXT;
2143
2144 CASE(OP_ANYCHAR_ML) MOP_IN(OP_ANYCHAR_ML);
2145 DATA_ENSURE(1);
2146 n = enclen(encode, s, end);
2147 DATA_ENSURE(n);
2148 s += n;
2149 MOP_OUT;
2150 NEXT;
2151
2152 CASE(OP_ANYCHAR_STAR) MOP_IN(OP_ANYCHAR_STAR);
2153 while (DATA_ENSURE_CHECK1) {
2154 STACK_PUSH_ALT(p, s, sprev, pkeep);
2155 n = enclen(encode, s, end);
2156 DATA_ENSURE(n);
2157 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2158 sprev = s;
2159 s += n;
2160 }
2161 MOP_OUT;
2162 JUMP;
2163
2164 CASE(OP_ANYCHAR_ML_STAR) MOP_IN(OP_ANYCHAR_ML_STAR);
2165 while (DATA_ENSURE_CHECK1) {
2166 STACK_PUSH_ALT(p, s, sprev, pkeep);
2167 n = enclen(encode, s, end);
2168 if (n > 1) {
2169 DATA_ENSURE(n);
2170 sprev = s;
2171 s += n;
2172 }
2173 else {
2174 sprev = s;
2175 s++;
2176 }
2177 }
2178 MOP_OUT;
2179 JUMP;
2180
2181 CASE(OP_ANYCHAR_STAR_PEEK_NEXT) MOP_IN(OP_ANYCHAR_STAR_PEEK_NEXT);
2182 while (DATA_ENSURE_CHECK1) {
2183 if (*p == *s) {
2184 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2185 }
2186 n = enclen(encode, s, end);
2187 DATA_ENSURE(n);
2188 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2189 sprev = s;
2190 s += n;
2191 }
2192 p++;
2193 MOP_OUT;
2194 NEXT;
2195
2196 CASE(OP_ANYCHAR_ML_STAR_PEEK_NEXT)MOP_IN(OP_ANYCHAR_ML_STAR_PEEK_NEXT);
2197 while (DATA_ENSURE_CHECK1) {
2198 if (*p == *s) {
2199 STACK_PUSH_ALT(p + 1, s, sprev, pkeep);
2200 }
2201 n = enclen(encode, s, end);
2202 if (n > 1) {
2203 DATA_ENSURE(n);
2204 sprev = s;
2205 s += n;
2206 }
2207 else {
2208 sprev = s;
2209 s++;
2210 }
2211 }
2212 p++;
2213 MOP_OUT;
2214 NEXT;
2215
2216#ifdef USE_COMBINATION_EXPLOSION_CHECK
2217 CASE(OP_STATE_CHECK_ANYCHAR_STAR) MOP_IN(OP_STATE_CHECK_ANYCHAR_STAR);
2218 GET_STATE_CHECK_NUM_INC(mem, p);
2219 while (DATA_ENSURE_CHECK1) {
2220 STATE_CHECK_VAL(scv, mem);
2221 if (scv) goto fail;
2222
2223 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2224 n = enclen(encode, s, end);
2225 DATA_ENSURE(n);
2226 if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 0)) goto fail;
2227 sprev = s;
2228 s += n;
2229 }
2230 MOP_OUT;
2231 NEXT;
2232
2233 CASE(OP_STATE_CHECK_ANYCHAR_ML_STAR)
2234 MOP_IN(OP_STATE_CHECK_ANYCHAR_ML_STAR);
2235
2236 GET_STATE_CHECK_NUM_INC(mem, p);
2237 while (DATA_ENSURE_CHECK1) {
2238 STATE_CHECK_VAL(scv, mem);
2239 if (scv) goto fail;
2240
2241 STACK_PUSH_ALT_WITH_STATE_CHECK(p, s, sprev, mem, pkeep);
2242 n = enclen(encode, s, end);
2243 if (n > 1) {
2244 DATA_ENSURE(n);
2245 sprev = s;
2246 s += n;
2247 }
2248 else {
2249 sprev = s;
2250 s++;
2251 }
2252 }
2253 MOP_OUT;
2254 NEXT;
2255#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2256
2257 CASE(OP_WORD) MOP_IN(OP_WORD);
2258 DATA_ENSURE(1);
2259 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2260 goto fail;
2261
2262 s += enclen(encode, s, end);
2263 MOP_OUT;
2264 NEXT;
2265
2266 CASE(OP_ASCII_WORD) MOP_IN(OP_ASCII_WORD);
2267 DATA_ENSURE(1);
2268 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2269 goto fail;
2270
2271 s += enclen(encode, s, end);
2272 MOP_OUT;
2273 NEXT;
2274
2275 CASE(OP_NOT_WORD) MOP_IN(OP_NOT_WORD);
2276 DATA_ENSURE(1);
2277 if (ONIGENC_IS_MBC_WORD(encode, s, end))
2278 goto fail;
2279
2280 s += enclen(encode, s, end);
2281 MOP_OUT;
2282 NEXT;
2283
2284 CASE(OP_NOT_ASCII_WORD) MOP_IN(OP_NOT_ASCII_WORD);
2285 DATA_ENSURE(1);
2286 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2287 goto fail;
2288
2289 s += enclen(encode, s, end);
2290 MOP_OUT;
2291 NEXT;
2292
2293 CASE(OP_WORD_BOUND) MOP_IN(OP_WORD_BOUND);
2294 if (ON_STR_BEGIN(s)) {
2295 DATA_ENSURE(1);
2296 if (! ONIGENC_IS_MBC_WORD(encode, s, end))
2297 goto fail;
2298 }
2299 else if (ON_STR_END(s)) {
2300 if (! ONIGENC_IS_MBC_WORD(encode, sprev, end))
2301 goto fail;
2302 }
2303 else {
2304 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2305 == ONIGENC_IS_MBC_WORD(encode, sprev, end))
2306 goto fail;
2307 }
2308 MOP_OUT;
2309 JUMP;
2310
2311 CASE(OP_ASCII_WORD_BOUND) MOP_IN(OP_ASCII_WORD_BOUND);
2312 if (ON_STR_BEGIN(s)) {
2313 DATA_ENSURE(1);
2314 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2315 goto fail;
2316 }
2317 else if (ON_STR_END(s)) {
2318 if (! ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2319 goto fail;
2320 }
2321 else {
2322 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2323 == ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2324 goto fail;
2325 }
2326 MOP_OUT;
2327 JUMP;
2328
2329 CASE(OP_NOT_WORD_BOUND) MOP_IN(OP_NOT_WORD_BOUND);
2330 if (ON_STR_BEGIN(s)) {
2331 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end))
2332 goto fail;
2333 }
2334 else if (ON_STR_END(s)) {
2335 if (ONIGENC_IS_MBC_WORD(encode, sprev, end))
2336 goto fail;
2337 }
2338 else {
2339 if (ONIGENC_IS_MBC_WORD(encode, s, end)
2340 != ONIGENC_IS_MBC_WORD(encode, sprev, end))
2341 goto fail;
2342 }
2343 MOP_OUT;
2344 JUMP;
2345
2346 CASE(OP_NOT_ASCII_WORD_BOUND) MOP_IN(OP_NOT_ASCII_WORD_BOUND);
2347 if (ON_STR_BEGIN(s)) {
2348 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end))
2349 goto fail;
2350 }
2351 else if (ON_STR_END(s)) {
2352 if (ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2353 goto fail;
2354 }
2355 else {
2356 if (ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)
2357 != ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end))
2358 goto fail;
2359 }
2360 MOP_OUT;
2361 JUMP;
2362
2363#ifdef USE_WORD_BEGIN_END
2364 CASE(OP_WORD_BEGIN) MOP_IN(OP_WORD_BEGIN);
2365 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_WORD(encode, s, end)) {
2366 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2367 MOP_OUT;
2368 JUMP;
2369 }
2370 }
2371 goto fail;
2372 NEXT;
2373
2374 CASE(OP_ASCII_WORD_BEGIN) MOP_IN(OP_ASCII_WORD_BEGIN);
2375 if (DATA_ENSURE_CHECK1 && ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2376 if (ON_STR_BEGIN(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2377 MOP_OUT;
2378 JUMP;
2379 }
2380 }
2381 goto fail;
2382 NEXT;
2383
2384 CASE(OP_WORD_END) MOP_IN(OP_WORD_END);
2385 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_WORD(encode, sprev, end)) {
2386 if (ON_STR_END(s) || !ONIGENC_IS_MBC_WORD(encode, s, end)) {
2387 MOP_OUT;
2388 JUMP;
2389 }
2390 }
2391 goto fail;
2392 NEXT;
2393
2394 CASE(OP_ASCII_WORD_END) MOP_IN(OP_ASCII_WORD_END);
2395 if (!ON_STR_BEGIN(s) && ONIGENC_IS_MBC_ASCII_WORD(encode, sprev, end)) {
2396 if (ON_STR_END(s) || !ONIGENC_IS_MBC_ASCII_WORD(encode, s, end)) {
2397 MOP_OUT;
2398 JUMP;
2399 }
2400 }
2401 goto fail;
2402 NEXT;
2403#endif
2404
2405 CASE(OP_BEGIN_BUF) MOP_IN(OP_BEGIN_BUF);
2406 if (! ON_STR_BEGIN(s)) goto fail;
2407 if (IS_NOTBOS(msa->options)) goto fail;
2408
2409 MOP_OUT;
2410 JUMP;
2411
2412 CASE(OP_END_BUF) MOP_IN(OP_END_BUF);
2413 if (! ON_STR_END(s)) goto fail;
2414 if (IS_NOTEOS(msa->options)) goto fail;
2415
2416 MOP_OUT;
2417 JUMP;
2418
2419 CASE(OP_BEGIN_LINE) MOP_IN(OP_BEGIN_LINE);
2420 if (ON_STR_BEGIN(s)) {
2421 if (IS_NOTBOL(msa->options)) goto fail;
2422 MOP_OUT;
2423 JUMP;
2424 }
2425 else if (ONIGENC_IS_MBC_NEWLINE(encode, sprev, end)
2426#ifdef USE_CRNL_AS_LINE_TERMINATOR
2427 && !(IS_NEWLINE_CRLF(option)
2428 && ONIGENC_IS_MBC_CRNL(encode, sprev, end))
2429#endif
2430 && !ON_STR_END(s)) {
2431 MOP_OUT;
2432 JUMP;
2433 }
2434 goto fail;
2435 NEXT;
2436
2437 CASE(OP_END_LINE) MOP_IN(OP_END_LINE);
2438 if (ON_STR_END(s)) {
2439#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2440 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2441#endif
2442 if (IS_NOTEOL(msa->options)) goto fail;
2443 MOP_OUT;
2444 JUMP;
2445#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2446 }
2447#endif
2448 }
2449 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2450 MOP_OUT;
2451 JUMP;
2452 }
2453 goto fail;
2454 NEXT;
2455
2456 CASE(OP_SEMI_END_BUF) MOP_IN(OP_SEMI_END_BUF);
2457 if (ON_STR_END(s)) {
2458#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2459 if (IS_EMPTY_STR || !ONIGENC_IS_MBC_NEWLINE_EX(encode, sprev, str, end, option, 1)) {
2460#endif
2461 if (IS_NOTEOL(msa->options)) goto fail;
2462 MOP_OUT;
2463 JUMP;
2464#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
2465 }
2466#endif
2467 }
2468 else if (ONIGENC_IS_MBC_NEWLINE_EX(encode, s, str, end, option, 1)) {
2469 UChar* ss = s + enclen(encode, s, end);
2470 if (ON_STR_END(ss)) {
2471 MOP_OUT;
2472 JUMP;
2473 }
2474#ifdef USE_CRNL_AS_LINE_TERMINATOR
2475 else if (IS_NEWLINE_CRLF(option)
2476 && ONIGENC_IS_MBC_CRNL(encode, s, end)) {
2477 ss += enclen(encode, ss, end);
2478 if (ON_STR_END(ss)) {
2479 MOP_OUT;
2480 JUMP;
2481 }
2482 }
2483#endif
2484 }
2485 goto fail;
2486 NEXT;
2487
2488 CASE(OP_BEGIN_POSITION) MOP_IN(OP_BEGIN_POSITION);
2489 if (s != msa->gpos)
2490 goto fail;
2491
2492 MOP_OUT;
2493 JUMP;
2494
2495 CASE(OP_MEMORY_START_PUSH) MOP_IN(OP_MEMORY_START_PUSH);
2496 GET_MEMNUM_INC(mem, p);
2497 STACK_PUSH_MEM_START(mem, s);
2498 MOP_OUT;
2499 JUMP;
2500
2501 CASE(OP_MEMORY_START) MOP_IN(OP_MEMORY_START);
2502 GET_MEMNUM_INC(mem, p);
2503 mem_start_stk[mem] = (OnigStackIndex )((void* )s);
2504 MOP_OUT;
2505 JUMP;
2506
2507 CASE(OP_MEMORY_END_PUSH) MOP_IN(OP_MEMORY_END_PUSH);
2508 GET_MEMNUM_INC(mem, p);
2509 STACK_PUSH_MEM_END(mem, s);
2510 MOP_OUT;
2511 JUMP;
2512
2513 CASE(OP_MEMORY_END) MOP_IN(OP_MEMORY_END);
2514 GET_MEMNUM_INC(mem, p);
2515 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2516 MOP_OUT;
2517 JUMP;
2518
2519 CASE(OP_KEEP) MOP_IN(OP_KEEP);
2520 pkeep = s;
2521 MOP_OUT;
2522 JUMP;
2523
2524#ifdef USE_SUBEXP_CALL
2525 CASE(OP_MEMORY_END_PUSH_REC) MOP_IN(OP_MEMORY_END_PUSH_REC);
2526 GET_MEMNUM_INC(mem, p);
2527 STACK_GET_MEM_START(mem, stkp); /* should be before push mem-end. */
2528 STACK_PUSH_MEM_END(mem, s);
2529 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2530 MOP_OUT;
2531 JUMP;
2532
2533 CASE(OP_MEMORY_END_REC) MOP_IN(OP_MEMORY_END_REC);
2534 GET_MEMNUM_INC(mem, p);
2535 mem_end_stk[mem] = (OnigStackIndex )((void* )s);
2536 STACK_GET_MEM_START(mem, stkp);
2537
2538 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2539 mem_start_stk[mem] = GET_STACK_INDEX(stkp);
2540 else
2541 mem_start_stk[mem] = (OnigStackIndex )((void* )stkp->u.mem.pstr);
2542
2543 STACK_PUSH_MEM_END_MARK(mem);
2544 MOP_OUT;
2545 JUMP;
2546#endif
2547
2548 CASE(OP_BACKREF1) MOP_IN(OP_BACKREF1);
2549 mem = 1;
2550 goto backref;
2551 NEXT;
2552
2553 CASE(OP_BACKREF2) MOP_IN(OP_BACKREF2);
2554 mem = 2;
2555 goto backref;
2556 NEXT;
2557
2558 CASE(OP_BACKREFN) MOP_IN(OP_BACKREFN);
2559 GET_MEMNUM_INC(mem, p);
2560 backref:
2561 {
2562 int len;
2563 UChar *pstart, *pend;
2564
2565 /* if you want to remove following line,
2566 you should check in parse and compile time. */
2567 if (mem > num_mem) goto fail;
2568 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2569 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2570
2571 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2572 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2573 else
2574 pstart = (UChar* )((void* )mem_start_stk[mem]);
2575
2576 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2577 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2578 : (UChar* )((void* )mem_end_stk[mem]));
2579 n = pend - pstart;
2580 DATA_ENSURE(n);
2581 sprev = s;
2582 STRING_CMP(pstart, s, n);
2583 while (sprev + (len = enclen(encode, sprev, end)) < s)
2584 sprev += len;
2585
2586 MOP_OUT;
2587 JUMP;
2588 }
2589
2590 CASE(OP_BACKREFN_IC) MOP_IN(OP_BACKREFN_IC);
2591 GET_MEMNUM_INC(mem, p);
2592 {
2593 int len;
2594 UChar *pstart, *pend;
2595
2596 /* if you want to remove following line,
2597 you should check in parse and compile time. */
2598 if (mem > num_mem) goto fail;
2599 if (mem_end_stk[mem] == INVALID_STACK_INDEX) goto fail;
2600 if (mem_start_stk[mem] == INVALID_STACK_INDEX) goto fail;
2601
2602 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2603 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2604 else
2605 pstart = (UChar* )((void* )mem_start_stk[mem]);
2606
2607 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2608 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2609 : (UChar* )((void* )mem_end_stk[mem]));
2610 n = pend - pstart;
2611 DATA_ENSURE(n);
2612 sprev = s;
2613 STRING_CMP_IC(case_fold_flag, pstart, &s, (int)n, end);
2614 while (sprev + (len = enclen(encode, sprev, end)) < s)
2615 sprev += len;
2616
2617 MOP_OUT;
2618 JUMP;
2619 }
2620 NEXT;
2621
2622 CASE(OP_BACKREF_MULTI) MOP_IN(OP_BACKREF_MULTI);
2623 {
2624 int len, is_fail;
2625 UChar *pstart, *pend, *swork;
2626
2627 GET_LENGTH_INC(tlen, p);
2628 for (i = 0; i < tlen; i++) {
2629 GET_MEMNUM_INC(mem, p);
2630
2631 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2632 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2633
2634 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2635 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2636 else
2637 pstart = (UChar* )((void* )mem_start_stk[mem]);
2638
2639 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2640 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2641 : (UChar* )((void* )mem_end_stk[mem]));
2642 n = pend - pstart;
2643 DATA_ENSURE(n);
2644 sprev = s;
2645 swork = s;
2646 STRING_CMP_VALUE(pstart, swork, n, is_fail);
2647 if (is_fail) continue;
2648 s = swork;
2649 while (sprev + (len = enclen(encode, sprev, end)) < s)
2650 sprev += len;
2651
2652 p += (SIZE_MEMNUM * (tlen - i - 1));
2653 break; /* success */
2654 }
2655 if (i == tlen) goto fail;
2656 MOP_OUT;
2657 JUMP;
2658 }
2659 NEXT;
2660
2661 CASE(OP_BACKREF_MULTI_IC) MOP_IN(OP_BACKREF_MULTI_IC);
2662 {
2663 int len, is_fail;
2664 UChar *pstart, *pend, *swork;
2665
2666 GET_LENGTH_INC(tlen, p);
2667 for (i = 0; i < tlen; i++) {
2668 GET_MEMNUM_INC(mem, p);
2669
2670 if (mem_end_stk[mem] == INVALID_STACK_INDEX) continue;
2671 if (mem_start_stk[mem] == INVALID_STACK_INDEX) continue;
2672
2673 if (BIT_STATUS_AT(reg->bt_mem_start, mem))
2674 pstart = STACK_AT(mem_start_stk[mem])->u.mem.pstr;
2675 else
2676 pstart = (UChar* )((void* )mem_start_stk[mem]);
2677
2678 pend = (BIT_STATUS_AT(reg->bt_mem_end, mem)
2679 ? STACK_AT(mem_end_stk[mem])->u.mem.pstr
2680 : (UChar* )((void* )mem_end_stk[mem]));
2681 n = pend - pstart;
2682 DATA_ENSURE(n);
2683 sprev = s;
2684 swork = s;
2685 STRING_CMP_VALUE_IC(case_fold_flag, pstart, &swork, n, end, is_fail);
2686 if (is_fail) continue;
2687 s = swork;
2688 while (sprev + (len = enclen(encode, sprev, end)) < s)
2689 sprev += len;
2690
2691 p += (SIZE_MEMNUM * (tlen - i - 1));
2692 break; /* success */
2693 }
2694 if (i == tlen) goto fail;
2695 MOP_OUT;
2696 JUMP;
2697 }
2698
2699#ifdef USE_BACKREF_WITH_LEVEL
2700 CASE(OP_BACKREF_WITH_LEVEL)
2701 {
2702 int len;
2703 OnigOptionType ic;
2704 LengthType level;
2705
2706 GET_OPTION_INC(ic, p);
2707 GET_LENGTH_INC(level, p);
2708 GET_LENGTH_INC(tlen, p);
2709
2710 sprev = s;
2711 if (backref_match_at_nested_level(reg, stk, stk_base, ic,
2712 case_fold_flag, (int )level, (int )tlen, p, &s, end)) {
2713 while (sprev + (len = enclen(encode, sprev, end)) < s)
2714 sprev += len;
2715
2716 p += (SIZE_MEMNUM * tlen);
2717 }
2718 else
2719 goto fail;
2720
2721 MOP_OUT;
2722 JUMP;
2723 }
2724
2725#endif
2726
2727#if 0 /* no need: IS_DYNAMIC_OPTION() == 0 */
2728 CASE(OP_SET_OPTION_PUSH) MOP_IN(OP_SET_OPTION_PUSH);
2729 GET_OPTION_INC(option, p);
2730 STACK_PUSH_ALT(p, s, sprev, pkeep);
2731 p += SIZE_OP_SET_OPTION + SIZE_OP_FAIL;
2732 MOP_OUT;
2733 JUMP;
2734
2735 CASE(OP_SET_OPTION) MOP_IN(OP_SET_OPTION);
2736 GET_OPTION_INC(option, p);
2737 MOP_OUT;
2738 JUMP;
2739#endif
2740
2741 CASE(OP_NULL_CHECK_START) MOP_IN(OP_NULL_CHECK_START);
2742 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2743 STACK_PUSH_NULL_CHECK_START(mem, s);
2744 MOP_OUT;
2745 JUMP;
2746
2747 CASE(OP_NULL_CHECK_END) MOP_IN(OP_NULL_CHECK_END);
2748 {
2749 int isnull;
2750
2751 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2752 STACK_NULL_CHECK(isnull, mem, s);
2753 if (isnull) {
2754#ifdef ONIG_DEBUG_MATCH
2755 fprintf(stderr, "NULL_CHECK_END: skip id:%d, s:%"PRIuPTR" (%p)\n",
2756 (int )mem, (uintptr_t )s, s);
2757#endif
2758 null_check_found:
2759 /* empty loop founded, skip next instruction */
2760 switch (*p++) {
2761 case OP_JUMP:
2762 case OP_PUSH:
2763 p += SIZE_RELADDR;
2764 break;
2765 case OP_REPEAT_INC:
2766 case OP_REPEAT_INC_NG:
2767 case OP_REPEAT_INC_SG:
2768 case OP_REPEAT_INC_NG_SG:
2769 p += SIZE_MEMNUM;
2770 break;
2771 default:
2772 goto unexpected_bytecode_error;
2773 break;
2774 }
2775 }
2776 }
2777 MOP_OUT;
2778 JUMP;
2779
2780#ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2781 CASE(OP_NULL_CHECK_END_MEMST) MOP_IN(OP_NULL_CHECK_END_MEMST);
2782 {
2783 int isnull;
2784
2785 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2786 STACK_NULL_CHECK_MEMST(isnull, mem, s, reg);
2787 if (isnull) {
2788# ifdef ONIG_DEBUG_MATCH
2789 fprintf(stderr, "NULL_CHECK_END_MEMST: skip id:%d, s:%"PRIuPTR" (%p)\n",
2790 (int )mem, (uintptr_t )s, s);
2791# endif
2792 if (isnull == -1) goto fail;
2793 goto null_check_found;
2794 }
2795 }
2796 MOP_OUT;
2797 JUMP;
2798#endif
2799
2800#ifdef USE_SUBEXP_CALL
2801 CASE(OP_NULL_CHECK_END_MEMST_PUSH)
2802 MOP_IN(OP_NULL_CHECK_END_MEMST_PUSH);
2803 {
2804 int isnull;
2805
2806 GET_MEMNUM_INC(mem, p); /* mem: null check id */
2807# ifdef USE_MONOMANIAC_CHECK_CAPTURES_IN_ENDLESS_REPEAT
2808 STACK_NULL_CHECK_MEMST_REC(isnull, mem, s, reg);
2809# else
2810 STACK_NULL_CHECK_REC(isnull, mem, s);
2811# endif
2812 if (isnull) {
2813# ifdef ONIG_DEBUG_MATCH
2814 fprintf(stderr, "NULL_CHECK_END_MEMST_PUSH: skip id:%d, s:%"PRIuPTR" (%p)\n",
2815 (int )mem, (uintptr_t )s, s);
2816# endif
2817 if (isnull == -1) goto fail;
2818 goto null_check_found;
2819 }
2820 else {
2821 STACK_PUSH_NULL_CHECK_END(mem);
2822 }
2823 }
2824 MOP_OUT;
2825 JUMP;
2826#endif
2827
2828 CASE(OP_JUMP) MOP_IN(OP_JUMP);
2829 GET_RELADDR_INC(addr, p);
2830 p += addr;
2831 MOP_OUT;
2832 CHECK_INTERRUPT_IN_MATCH_AT;
2833 JUMP;
2834
2835 CASE(OP_PUSH) MOP_IN(OP_PUSH);
2836 GET_RELADDR_INC(addr, p);
2837 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2838 MOP_OUT;
2839 JUMP;
2840
2841#ifdef USE_COMBINATION_EXPLOSION_CHECK
2842 CASE(OP_STATE_CHECK_PUSH) MOP_IN(OP_STATE_CHECK_PUSH);
2843 GET_STATE_CHECK_NUM_INC(mem, p);
2844 STATE_CHECK_VAL(scv, mem);
2845 if (scv) goto fail;
2846
2847 GET_RELADDR_INC(addr, p);
2848 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2849 MOP_OUT;
2850 JUMP;
2851
2852 CASE(OP_STATE_CHECK_PUSH_OR_JUMP) MOP_IN(OP_STATE_CHECK_PUSH_OR_JUMP);
2853 GET_STATE_CHECK_NUM_INC(mem, p);
2854 GET_RELADDR_INC(addr, p);
2855 STATE_CHECK_VAL(scv, mem);
2856 if (scv) {
2857 p += addr;
2858 }
2859 else {
2860 STACK_PUSH_ALT_WITH_STATE_CHECK(p + addr, s, sprev, mem, pkeep);
2861 }
2862 MOP_OUT;
2863 JUMP;
2864
2865 CASE(OP_STATE_CHECK) MOP_IN(OP_STATE_CHECK);
2866 GET_STATE_CHECK_NUM_INC(mem, p);
2867 STATE_CHECK_VAL(scv, mem);
2868 if (scv) goto fail;
2869
2870 STACK_PUSH_STATE_CHECK(s, mem);
2871 MOP_OUT;
2872 JUMP;
2873#endif /* USE_COMBINATION_EXPLOSION_CHECK */
2874
2875 CASE(OP_POP) MOP_IN(OP_POP);
2876 STACK_POP_ONE;
2877 MOP_OUT;
2878 JUMP;
2879
2880#ifdef USE_OP_PUSH_OR_JUMP_EXACT
2881 CASE(OP_PUSH_OR_JUMP_EXACT1) MOP_IN(OP_PUSH_OR_JUMP_EXACT1);
2882 GET_RELADDR_INC(addr, p);
2883 if (*p == *s && DATA_ENSURE_CHECK1) {
2884 p++;
2885 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2886 MOP_OUT;
2887 JUMP;
2888 }
2889 p += (addr + 1);
2890 MOP_OUT;
2891 JUMP;
2892#endif
2893
2894 CASE(OP_PUSH_IF_PEEK_NEXT) MOP_IN(OP_PUSH_IF_PEEK_NEXT);
2895 GET_RELADDR_INC(addr, p);
2896 if (*p == *s) {
2897 p++;
2898 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2899 MOP_OUT;
2900 JUMP;
2901 }
2902 p++;
2903 MOP_OUT;
2904 JUMP;
2905
2906 CASE(OP_REPEAT) MOP_IN(OP_REPEAT);
2907 {
2908 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2909 GET_RELADDR_INC(addr, p);
2910
2911 STACK_ENSURE(1);
2912 repeat_stk[mem] = GET_STACK_INDEX(stk);
2913 STACK_PUSH_REPEAT(mem, p);
2914
2915 if (reg->repeat_range[mem].lower == 0) {
2916 STACK_PUSH_ALT(p + addr, s, sprev, pkeep);
2917 }
2918 }
2919 MOP_OUT;
2920 JUMP;
2921
2922 CASE(OP_REPEAT_NG) MOP_IN(OP_REPEAT_NG);
2923 {
2924 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2925 GET_RELADDR_INC(addr, p);
2926
2927 STACK_ENSURE(1);
2928 repeat_stk[mem] = GET_STACK_INDEX(stk);
2929 STACK_PUSH_REPEAT(mem, p);
2930
2931 if (reg->repeat_range[mem].lower == 0) {
2932 STACK_PUSH_ALT(p, s, sprev, pkeep);
2933 p += addr;
2934 }
2935 }
2936 MOP_OUT;
2937 JUMP;
2938
2939 CASE(OP_REPEAT_INC) MOP_IN(OP_REPEAT_INC);
2940 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2941 si = repeat_stk[mem];
2942 stkp = STACK_AT(si);
2943
2944 repeat_inc:
2945 stkp->u.repeat.count++;
2946 if (stkp->u.repeat.count >= reg->repeat_range[mem].upper) {
2947 /* end of repeat. Nothing to do. */
2948 }
2949 else if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2950 STACK_PUSH_ALT(p, s, sprev, pkeep);
2951 p = STACK_AT(si)->u.repeat.pcode; /* Don't use stkp after PUSH. */
2952 }
2953 else {
2954 p = stkp->u.repeat.pcode;
2955 }
2956 STACK_PUSH_REPEAT_INC(si);
2957 MOP_OUT;
2958 CHECK_INTERRUPT_IN_MATCH_AT;
2959 JUMP;
2960
2961 CASE(OP_REPEAT_INC_SG) MOP_IN(OP_REPEAT_INC_SG);
2962 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2963 STACK_GET_REPEAT(mem, stkp);
2964 si = GET_STACK_INDEX(stkp);
2965 goto repeat_inc;
2966 NEXT;
2967
2968 CASE(OP_REPEAT_INC_NG) MOP_IN(OP_REPEAT_INC_NG);
2969 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2970 si = repeat_stk[mem];
2971 stkp = STACK_AT(si);
2972
2973 repeat_inc_ng:
2974 stkp->u.repeat.count++;
2975 if (stkp->u.repeat.count < reg->repeat_range[mem].upper) {
2976 if (stkp->u.repeat.count >= reg->repeat_range[mem].lower) {
2977 UChar* pcode = stkp->u.repeat.pcode;
2978
2979 STACK_PUSH_REPEAT_INC(si);
2980 STACK_PUSH_ALT(pcode, s, sprev, pkeep);
2981 }
2982 else {
2983 p = stkp->u.repeat.pcode;
2984 STACK_PUSH_REPEAT_INC(si);
2985 }
2986 }
2987 else if (stkp->u.repeat.count == reg->repeat_range[mem].upper) {
2988 STACK_PUSH_REPEAT_INC(si);
2989 }
2990 MOP_OUT;
2991 CHECK_INTERRUPT_IN_MATCH_AT;
2992 JUMP;
2993
2994 CASE(OP_REPEAT_INC_NG_SG) MOP_IN(OP_REPEAT_INC_NG_SG);
2995 GET_MEMNUM_INC(mem, p); /* mem: OP_REPEAT ID */
2996 STACK_GET_REPEAT(mem, stkp);
2997 si = GET_STACK_INDEX(stkp);
2998 goto repeat_inc_ng;
2999 NEXT;
3000
3001 CASE(OP_PUSH_POS) MOP_IN(OP_PUSH_POS);
3002 STACK_PUSH_POS(s, sprev, pkeep);
3003 MOP_OUT;
3004 JUMP;
3005
3006 CASE(OP_POP_POS) MOP_IN(OP_POP_POS);
3007 {
3008 STACK_POS_END(stkp);
3009 s = stkp->u.state.pstr;
3010 sprev = stkp->u.state.pstr_prev;
3011 }
3012 MOP_OUT;
3013 JUMP;
3014
3015 CASE(OP_PUSH_POS_NOT) MOP_IN(OP_PUSH_POS_NOT);
3016 GET_RELADDR_INC(addr, p);
3017 STACK_PUSH_POS_NOT(p + addr, s, sprev, pkeep);
3018 MOP_OUT;
3019 JUMP;
3020
3021 CASE(OP_FAIL_POS) MOP_IN(OP_FAIL_POS);
3022 STACK_POP_TIL_POS_NOT;
3023 goto fail;
3024 NEXT;
3025
3026 CASE(OP_PUSH_STOP_BT) MOP_IN(OP_PUSH_STOP_BT);
3027 STACK_PUSH_STOP_BT;
3028 MOP_OUT;
3029 JUMP;
3030
3031 CASE(OP_POP_STOP_BT) MOP_IN(OP_POP_STOP_BT);
3032 STACK_STOP_BT_END;
3033 MOP_OUT;
3034 JUMP;
3035
3036 CASE(OP_LOOK_BEHIND) MOP_IN(OP_LOOK_BEHIND);
3037 GET_LENGTH_INC(tlen, p);
3038 s = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3039 if (IS_NULL(s)) goto fail;
3040 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3041 MOP_OUT;
3042 JUMP;
3043
3044 CASE(OP_PUSH_LOOK_BEHIND_NOT) MOP_IN(OP_PUSH_LOOK_BEHIND_NOT);
3045 GET_RELADDR_INC(addr, p);
3046 GET_LENGTH_INC(tlen, p);
3047 q = (UChar* )ONIGENC_STEP_BACK(encode, str, s, end, (int )tlen);
3048 if (IS_NULL(q)) {
3049 /* too short case -> success. ex. /(?<!XXX)a/.match("a")
3050 If you want to change to fail, replace following line. */
3051 p += addr;
3052 /* goto fail; */
3053 }
3054 else {
3055 STACK_PUSH_LOOK_BEHIND_NOT(p + addr, s, sprev, pkeep);
3056 s = q;
3057 sprev = (UChar* )onigenc_get_prev_char_head(encode, str, s, end);
3058 }
3059 MOP_OUT;
3060 JUMP;
3061
3062 CASE(OP_FAIL_LOOK_BEHIND_NOT) MOP_IN(OP_FAIL_LOOK_BEHIND_NOT);
3063 STACK_POP_TIL_LOOK_BEHIND_NOT;
3064 goto fail;
3065 NEXT;
3066
3067 CASE(OP_PUSH_ABSENT_POS) MOP_IN(OP_PUSH_ABSENT_POS);
3068 /* Save the absent-start-pos and the original end-pos. */
3069 STACK_PUSH_ABSENT_POS(s, ABSENT_END_POS);
3070 MOP_OUT;
3071 JUMP;
3072
3073 CASE(OP_ABSENT) MOP_IN(OP_ABSENT);
3074 {
3075 const UChar* aend = ABSENT_END_POS;
3076 UChar* absent;
3077 UChar* selfp = p - 1;
3078
3079 STACK_POP_ABSENT_POS(absent, ABSENT_END_POS); /* Restore end-pos. */
3080 GET_RELADDR_INC(addr, p);
3081#ifdef ONIG_DEBUG_MATCH
3082 fprintf(stderr, "ABSENT: s:%p, end:%p, absent:%p, aend:%p\n", s, end, absent, aend);
3083#endif
3084 if ((absent > aend) && (s > absent)) {
3085 /* An empty match occurred in (?~...) at the start point.
3086 * Never match. */
3087 STACK_POP;
3088 goto fail;
3089 }
3090 else if ((s >= aend) && (s > absent)) {
3091 if (s > aend) {
3092 /* Only one (or less) character matched in the last iteration.
3093 * This is not a possible point. */
3094 goto fail;
3095 }
3096 /* All possible points were found. Try matching after (?~...). */
3097 DATA_ENSURE(0);
3098 p += addr;
3099 }
3100 else {
3101 STACK_PUSH_ALT(p + addr, s, sprev, pkeep); /* Push possible point. */
3102 n = enclen(encode, s, end);
3103 STACK_PUSH_ABSENT_POS(absent, ABSENT_END_POS); /* Save the original pos. */
3104 STACK_PUSH_ALT(selfp, s + n, s, pkeep); /* Next iteration. */
3105 STACK_PUSH_ABSENT;
3106 ABSENT_END_POS = aend;
3107 }
3108 }
3109 MOP_OUT;
3110 JUMP;
3111
3112 CASE(OP_ABSENT_END) MOP_IN(OP_ABSENT_END);
3113 /* The pattern inside (?~...) was matched.
3114 * Set the end-pos temporary and go to next iteration. */
3115 if (sprev < ABSENT_END_POS)
3116 ABSENT_END_POS = sprev;
3117#ifdef ONIG_DEBUG_MATCH
3118 fprintf(stderr, "ABSENT_END: end:%p\n", ABSENT_END_POS);
3119#endif
3120 STACK_POP_TIL_ABSENT;
3121 goto fail;
3122 NEXT;
3123
3124#ifdef USE_SUBEXP_CALL
3125 CASE(OP_CALL) MOP_IN(OP_CALL);
3126 GET_ABSADDR_INC(addr, p);
3127 STACK_PUSH_CALL_FRAME(p);
3128 p = reg->p + addr;
3129 MOP_OUT;
3130 JUMP;
3131
3132 CASE(OP_RETURN) MOP_IN(OP_RETURN);
3133 STACK_RETURN(p);
3134 STACK_PUSH_RETURN;
3135 MOP_OUT;
3136 JUMP;
3137#endif
3138
3139 CASE(OP_CONDITION) MOP_IN(OP_CONDITION);
3140 GET_MEMNUM_INC(mem, p);
3141 GET_RELADDR_INC(addr, p);
3142 if ((mem > num_mem) ||
3143 (mem_end_stk[mem] == INVALID_STACK_INDEX) ||
3144 (mem_start_stk[mem] == INVALID_STACK_INDEX)) {
3145 p += addr;
3146 }
3147 MOP_OUT;
3148 JUMP;
3149
3150 CASE(OP_FINISH)
3151 goto finish;
3152 NEXT;
3153
3154 CASE(OP_FAIL)
3155 if (0) {
3156 /* fall */
3157 fail:
3158 MOP_OUT;
3159 }
3160 MOP_IN(OP_FAIL);
3161 STACK_POP;
3162 p = stk->u.state.pcode;
3163 s = stk->u.state.pstr;
3164 sprev = stk->u.state.pstr_prev;
3165 pkeep = stk->u.state.pkeep;
3166
3167#ifdef USE_COMBINATION_EXPLOSION_CHECK
3168 if (stk->u.state.state_check != 0) {
3169 stk->type = STK_STATE_CHECK_MARK;
3170 stk++;
3171 }
3172#endif
3173
3174 MOP_OUT;
3175 JUMP;
3176
3177 DEFAULT
3178 goto bytecode_error;
3179 } VM_LOOP_END
3180
3181 finish:
3182 STACK_SAVE;
3183 if (xmalloc_base) xfree(xmalloc_base);
3184 return best_len;
3185
3186#ifdef ONIG_DEBUG
3187 stack_error:
3188 STACK_SAVE;
3189 if (xmalloc_base) xfree(xmalloc_base);
3190 return ONIGERR_STACK_BUG;
3191#endif
3192
3193 bytecode_error:
3194 STACK_SAVE;
3195 if (xmalloc_base) xfree(xmalloc_base);
3196 return ONIGERR_UNDEFINED_BYTECODE;
3197
3198 unexpected_bytecode_error:
3199 STACK_SAVE;
3200 if (xmalloc_base) xfree(xmalloc_base);
3201 return ONIGERR_UNEXPECTED_BYTECODE;
3202}
3203
3204
3205static UChar*
3206slow_search(OnigEncoding enc, UChar* target, UChar* target_end,
3207 const UChar* text, const UChar* text_end, UChar* text_range)
3208{
3209 UChar *t, *p, *s, *end;
3210
3211 end = (UChar* )text_end;
3212 end -= target_end - target - 1;
3213 if (end > text_range)
3214 end = text_range;
3215
3216 s = (UChar* )text;
3217
3218 if (enc->max_enc_len == enc->min_enc_len) {
3219 int n = enc->max_enc_len;
3220
3221 while (s < end) {
3222 if (*s == *target) {
3223 p = s + 1;
3224 t = target + 1;
3225 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3226 return s;
3227 }
3228 s += n;
3229 }
3230 return (UChar* )NULL;
3231 }
3232 while (s < end) {
3233 if (*s == *target) {
3234 p = s + 1;
3235 t = target + 1;
3236 if (target_end == t || memcmp(t, p, target_end - t) == 0)
3237 return s;
3238 }
3239 s += enclen(enc, s, text_end);
3240 }
3241
3242 return (UChar* )NULL;
3243}
3244
3245static int
3246str_lower_case_match(OnigEncoding enc, int case_fold_flag,
3247 const UChar* t, const UChar* tend,
3248 const UChar* p, const UChar* end)
3249{
3250 int lowlen;
3251 UChar *q, lowbuf[ONIGENC_MBC_CASE_FOLD_MAXLEN];
3252
3253 while (t < tend) {
3254 lowlen = ONIGENC_MBC_CASE_FOLD(enc, case_fold_flag, &p, end, lowbuf);
3255 q = lowbuf;
3256 while (lowlen > 0) {
3257 if (*t++ != *q++) return 0;
3258 lowlen--;
3259 }
3260 }
3261
3262 return 1;
3263}
3264
3265static UChar*
3266slow_search_ic(OnigEncoding enc, int case_fold_flag,
3267 UChar* target, UChar* target_end,
3268 const UChar* text, const UChar* text_end, UChar* text_range)
3269{
3270 UChar *s, *end;
3271
3272 end = (UChar* )text_end;
3273 end -= target_end - target - 1;
3274 if (end > text_range)
3275 end = text_range;
3276
3277 s = (UChar* )text;
3278
3279 while (s < end) {
3280 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3281 s, text_end))
3282 return s;
3283
3284 s += enclen(enc, s, text_end);
3285 }
3286
3287 return (UChar* )NULL;
3288}
3289
3290static UChar*
3291slow_search_backward(OnigEncoding enc, UChar* target, UChar* target_end,
3292 const UChar* text, const UChar* adjust_text,
3293 const UChar* text_end, const UChar* text_start)
3294{
3295 UChar *t, *p, *s;
3296
3297 s = (UChar* )text_end;
3298 s -= (target_end - target);
3299 if (s > text_start)
3300 s = (UChar* )text_start;
3301 else
3302 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3303
3304 while (s >= text) {
3305 if (*s == *target) {
3306 p = s + 1;
3307 t = target + 1;
3308 while (t < target_end) {
3309 if (*t != *p++)
3310 break;
3311 t++;
3312 }
3313 if (t == target_end)
3314 return s;
3315 }
3316 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3317 }
3318
3319 return (UChar* )NULL;
3320}
3321
3322static UChar*
3323slow_search_backward_ic(OnigEncoding enc, int case_fold_flag,
3324 UChar* target, UChar* target_end,
3325 const UChar* text, const UChar* adjust_text,
3326 const UChar* text_end, const UChar* text_start)
3327{
3328 UChar *s;
3329
3330 s = (UChar* )text_end;
3331 s -= (target_end - target);
3332 if (s > text_start)
3333 s = (UChar* )text_start;
3334 else
3335 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, adjust_text, s, text_end);
3336
3337 while (s >= text) {
3338 if (str_lower_case_match(enc, case_fold_flag,
3339 target, target_end, s, text_end))
3340 return s;
3341
3342 s = (UChar* )onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3343 }
3344
3345 return (UChar* )NULL;
3346}
3347
3348#ifndef USE_SUNDAY_QUICK_SEARCH
3349/* Boyer-Moore-Horspool search applied to a multibyte string */
3350static UChar*
3351bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3352 const UChar* text, const UChar* text_end,
3353 const UChar* text_range)
3354{
3355 const UChar *s, *se, *t, *p, *end;
3356 const UChar *tail;
3357 ptrdiff_t skip, tlen1;
3358
3359# ifdef ONIG_DEBUG_SEARCH
3360 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3361 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3362# endif
3363
3364 tail = target_end - 1;
3365 tlen1 = tail - target;
3366 end = text_range;
3367 if (end + tlen1 > text_end)
3368 end = text_end - tlen1;
3369
3370 s = text;
3371
3372 if (IS_NULL(reg->int_map)) {
3373 while (s < end) {
3374 p = se = s + tlen1;
3375 t = tail;
3376 while (*p == *t) {
3377 if (t == target) return (UChar* )s;
3378 p--; t--;
3379 }
3380 skip = reg->map[*se];
3381 t = s;
3382 do {
3383 s += enclen(reg->enc, s, end);
3384 } while ((s - t) < skip && s < end);
3385 }
3386 }
3387 else {
3388# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3389 while (s < end) {
3390 p = se = s + tlen1;
3391 t = tail;
3392 while (*p == *t) {
3393 if (t == target) return (UChar* )s;
3394 p--; t--;
3395 }
3396 skip = reg->int_map[*se];
3397 t = s;
3398 do {
3399 s += enclen(reg->enc, s, end);
3400 } while ((s - t) < skip && s < end);
3401 }
3402# endif
3403 }
3404
3405 return (UChar* )NULL;
3406}
3407
3408/* Boyer-Moore-Horspool search */
3409static UChar*
3410bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3411 const UChar* text, const UChar* text_end, const UChar* text_range)
3412{
3413 const UChar *s, *t, *p, *end;
3414 const UChar *tail;
3415
3416# ifdef ONIG_DEBUG_SEARCH
3417 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3418 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3419# endif
3420
3421 end = text_range + (target_end - target) - 1;
3422 if (end > text_end)
3423 end = text_end;
3424
3425 tail = target_end - 1;
3426 s = text + (target_end - target) - 1;
3427 if (IS_NULL(reg->int_map)) {
3428 while (s < end) {
3429 p = s;
3430 t = tail;
3431# ifdef ONIG_DEBUG_SEARCH
3432 fprintf(stderr, "bm_search_loop: pos: %"PRIdPTR" %s\n",
3433 (intptr_t )(s - text), s);
3434# endif
3435 while (*p == *t) {
3436 if (t == target) return (UChar* )p;
3437 p--; t--;
3438 }
3439 s += reg->map[*s];
3440 }
3441 }
3442 else { /* see int_map[] */
3443# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3444 while (s < end) {
3445 p = s;
3446 t = tail;
3447 while (*p == *t) {
3448 if (t == target) return (UChar* )p;
3449 p--; t--;
3450 }
3451 s += reg->int_map[*s];
3452 }
3453# endif
3454 }
3455 return (UChar* )NULL;
3456}
3457
3458/* Boyer-Moore-Horspool search applied to a multibyte string (ignore case) */
3459static UChar*
3460bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3461 const UChar* text, const UChar* text_end,
3462 const UChar* text_range)
3463{
3464 const UChar *s, *se, *t, *end;
3465 const UChar *tail;
3466 ptrdiff_t skip, tlen1;
3467 OnigEncoding enc = reg->enc;
3468 int case_fold_flag = reg->case_fold_flag;
3469
3470# ifdef ONIG_DEBUG_SEARCH
3471 fprintf(stderr, "bm_search_notrev_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3472 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3473# endif
3474
3475 tail = target_end - 1;
3476 tlen1 = tail - target;
3477 end = text_range;
3478 if (end + tlen1 > text_end)
3479 end = text_end - tlen1;
3480
3481 s = text;
3482
3483 if (IS_NULL(reg->int_map)) {
3484 while (s < end) {
3485 se = s + tlen1;
3486 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3487 s, se + 1))
3488 return (UChar* )s;
3489 skip = reg->map[*se];
3490 t = s;
3491 do {
3492 s += enclen(reg->enc, s, end);
3493 } while ((s - t) < skip && s < end);
3494 }
3495 }
3496 else {
3497# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3498 while (s < end) {
3499 se = s + tlen1;
3500 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3501 s, se + 1))
3502 return (UChar* )s;
3503 skip = reg->int_map[*se];
3504 t = s;
3505 do {
3506 s += enclen(reg->enc, s, end);
3507 } while ((s - t) < skip && s < end);
3508 }
3509# endif
3510 }
3511
3512 return (UChar* )NULL;
3513}
3514
3515/* Boyer-Moore-Horspool search (ignore case) */
3516static UChar*
3517bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3518 const UChar* text, const UChar* text_end, const UChar* text_range)
3519{
3520 const UChar *s, *p, *end;
3521 const UChar *tail;
3522 OnigEncoding enc = reg->enc;
3523 int case_fold_flag = reg->case_fold_flag;
3524
3525# ifdef ONIG_DEBUG_SEARCH
3526 fprintf(stderr, "bm_search_ic: text: %d (%p), text_end: %d (%p), text_range: %d (%p)\n",
3527 (int )text, text, (int )text_end, text_end, (int )text_range, text_range);
3528# endif
3529
3530 end = text_range + (target_end - target) - 1;
3531 if (end > text_end)
3532 end = text_end;
3533
3534 tail = target_end - 1;
3535 s = text + (target_end - target) - 1;
3536 if (IS_NULL(reg->int_map)) {
3537 while (s < end) {
3538 p = s - (target_end - target) + 1;
3539 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3540 p, s + 1))
3541 return (UChar* )p;
3542 s += reg->map[*s];
3543 }
3544 }
3545 else { /* see int_map[] */
3546# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3547 while (s < end) {
3548 p = s - (target_end - target) + 1;
3549 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3550 p, s + 1))
3551 return (UChar* )p;
3552 s += reg->int_map[*s];
3553 }
3554# endif
3555 }
3556 return (UChar* )NULL;
3557}
3558
3559#else /* USE_SUNDAY_QUICK_SEARCH */
3560
3561/* Sunday's quick search applied to a multibyte string */
3562static UChar*
3563bm_search_notrev(regex_t* reg, const UChar* target, const UChar* target_end,
3564 const UChar* text, const UChar* text_end,
3565 const UChar* text_range)
3566{
3567 const UChar *s, *se, *t, *p, *end;
3568 const UChar *tail;
3569 ptrdiff_t skip, tlen1;
3570 OnigEncoding enc = reg->enc;
3571
3572# ifdef ONIG_DEBUG_SEARCH
3573 fprintf(stderr, "bm_search_notrev: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3574 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3575# endif
3576
3577 tail = target_end - 1;
3578 tlen1 = tail - target;
3579 end = text_range;
3580 if (end + tlen1 > text_end)
3581 end = text_end - tlen1;
3582
3583 s = text;
3584
3585 if (IS_NULL(reg->int_map)) {
3586 while (s < end) {
3587 p = se = s + tlen1;
3588 t = tail;
3589 while (*p == *t) {
3590 if (t == target) return (UChar* )s;
3591 p--; t--;
3592 }
3593 if (s + 1 >= end) break;
3594 skip = reg->map[se[1]];
3595 t = s;
3596 do {
3597 s += enclen(enc, s, end);
3598 } while ((s - t) < skip && s < end);
3599 }
3600 }
3601 else {
3602# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3603 while (s < end) {
3604 p = se = s + tlen1;
3605 t = tail;
3606 while (*p == *t) {
3607 if (t == target) return (UChar* )s;
3608 p--; t--;
3609 }
3610 if (s + 1 >= end) break;
3611 skip = reg->int_map[se[1]];
3612 t = s;
3613 do {
3614 s += enclen(enc, s, end);
3615 } while ((s - t) < skip && s < end);
3616 }
3617# endif
3618 }
3619
3620 return (UChar* )NULL;
3621}
3622
3623/* Sunday's quick search */
3624static UChar*
3625bm_search(regex_t* reg, const UChar* target, const UChar* target_end,
3626 const UChar* text, const UChar* text_end, const UChar* text_range)
3627{
3628 const UChar *s, *t, *p, *end;
3629 const UChar *tail;
3630 ptrdiff_t tlen1;
3631
3632# ifdef ONIG_DEBUG_SEARCH
3633 fprintf(stderr, "bm_search: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3634 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3635# endif
3636
3637 tail = target_end - 1;
3638 tlen1 = tail - target;
3639 end = text_range + tlen1;
3640 if (end > text_end)
3641 end = text_end;
3642
3643 s = text + tlen1;
3644 if (IS_NULL(reg->int_map)) {
3645 while (s < end) {
3646 p = s;
3647 t = tail;
3648 while (*p == *t) {
3649 if (t == target) return (UChar* )p;
3650 p--; t--;
3651 }
3652 if (s + 1 >= end) break;
3653 s += reg->map[s[1]];
3654 }
3655 }
3656 else { /* see int_map[] */
3657# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3658 while (s < end) {
3659 p = s;
3660 t = tail;
3661 while (*p == *t) {
3662 if (t == target) return (UChar* )p;
3663 p--; t--;
3664 }
3665 if (s + 1 >= end) break;
3666 s += reg->int_map[s[1]];
3667 }
3668# endif
3669 }
3670 return (UChar* )NULL;
3671}
3672
3673/* Sunday's quick search applied to a multibyte string (ignore case) */
3674static UChar*
3675bm_search_notrev_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3676 const UChar* text, const UChar* text_end,
3677 const UChar* text_range)
3678{
3679 const UChar *s, *se, *t, *end;
3680 const UChar *tail;
3681 ptrdiff_t skip, tlen1;
3682 OnigEncoding enc = reg->enc;
3683 int case_fold_flag = reg->case_fold_flag;
3684
3685# ifdef ONIG_DEBUG_SEARCH
3686 fprintf(stderr, "bm_search_notrev_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3687 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3688# endif
3689
3690 tail = target_end - 1;
3691 tlen1 = tail - target;
3692 end = text_range;
3693 if (end + tlen1 > text_end)
3694 end = text_end - tlen1;
3695
3696 s = text;
3697
3698 if (IS_NULL(reg->int_map)) {
3699 while (s < end) {
3700 se = s + tlen1;
3701 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3702 s, se + 1))
3703 return (UChar* )s;
3704 if (s + 1 >= end) break;
3705 skip = reg->map[se[1]];
3706 t = s;
3707 do {
3708 s += enclen(enc, s, end);
3709 } while ((s - t) < skip && s < end);
3710 }
3711 }
3712 else {
3713# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3714 while (s < end) {
3715 se = s + tlen1;
3716 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3717 s, se + 1))
3718 return (UChar* )s;
3719 if (s + 1 >= end) break;
3720 skip = reg->int_map[se[1]];
3721 t = s;
3722 do {
3723 s += enclen(enc, s, end);
3724 } while ((s - t) < skip && s < end);
3725 }
3726# endif
3727 }
3728
3729 return (UChar* )NULL;
3730}
3731
3732/* Sunday's quick search (ignore case) */
3733static UChar*
3734bm_search_ic(regex_t* reg, const UChar* target, const UChar* target_end,
3735 const UChar* text, const UChar* text_end, const UChar* text_range)
3736{
3737 const UChar *s, *p, *end;
3738 const UChar *tail;
3739 ptrdiff_t tlen1;
3740 OnigEncoding enc = reg->enc;
3741 int case_fold_flag = reg->case_fold_flag;
3742
3743# ifdef ONIG_DEBUG_SEARCH
3744 fprintf(stderr, "bm_search_ic: text: %"PRIuPTR" (%p), text_end: %"PRIuPTR" (%p), text_range: %"PRIuPTR" (%p)\n",
3745 (uintptr_t )text, text, (uintptr_t )text_end, text_end, (uintptr_t )text_range, text_range);
3746# endif
3747
3748 tail = target_end - 1;
3749 tlen1 = tail - target;
3750 end = text_range + tlen1;
3751 if (end > text_end)
3752 end = text_end;
3753
3754 s = text + tlen1;
3755 if (IS_NULL(reg->int_map)) {
3756 while (s < end) {
3757 p = s - tlen1;
3758 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3759 p, s + 1))
3760 return (UChar* )p;
3761 if (s + 1 >= end) break;
3762 s += reg->map[s[1]];
3763 }
3764 }
3765 else { /* see int_map[] */
3766# if OPT_EXACT_MAXLEN >= ONIG_CHAR_TABLE_SIZE
3767 while (s < end) {
3768 p = s - tlen1;
3769 if (str_lower_case_match(enc, case_fold_flag, target, target_end,
3770 p, s + 1))
3771 return (UChar* )p;
3772 if (s + 1 >= end) break;
3773 s += reg->int_map[s[1]];
3774 }
3775# endif
3776 }
3777 return (UChar* )NULL;
3778}
3779#endif /* USE_SUNDAY_QUICK_SEARCH */
3780
3781#ifdef USE_INT_MAP_BACKWARD
3782static int
3783set_bm_backward_skip(UChar* s, UChar* end, OnigEncoding enc ARG_UNUSED,
3784 int** skip)
3785{
3786 int i, len;
3787
3788 if (IS_NULL(*skip)) {
3789 *skip = (int* )xmalloc(sizeof(int) * ONIG_CHAR_TABLE_SIZE);
3790 if (IS_NULL(*skip)) return ONIGERR_MEMORY;
3791 }
3792
3793 len = (int )(end - s);
3794 for (i = 0; i < ONIG_CHAR_TABLE_SIZE; i++)
3795 (*skip)[i] = len;
3796
3797 for (i = len - 1; i > 0; i--)
3798 (*skip)[s[i]] = i;
3799
3800 return 0;
3801}
3802
3803static UChar*
3804bm_search_backward(regex_t* reg, const UChar* target, const UChar* target_end,
3805 const UChar* text, const UChar* adjust_text,
3806 const UChar* text_end, const UChar* text_start)
3807{
3808 const UChar *s, *t, *p;
3809
3810 s = text_end - (target_end - target);
3811 if (text_start < s)
3812 s = text_start;
3813 else
3814 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3815
3816 while (s >= text) {
3817 p = s;
3818 t = target;
3819 while (t < target_end && *p == *t) {
3820 p++; t++;
3821 }
3822 if (t == target_end)
3823 return (UChar* )s;
3824
3825 s -= reg->int_map_backward[*s];
3826 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, adjust_text, s, text_end);
3827 }
3828
3829 return (UChar* )NULL;
3830}
3831#endif
3832
3833static UChar*
3834map_search(OnigEncoding enc, UChar map[],
3835 const UChar* text, const UChar* text_range, const UChar* text_end)
3836{
3837 const UChar *s = text;
3838
3839 while (s < text_range) {
3840 if (map[*s]) return (UChar* )s;
3841
3842 s += enclen(enc, s, text_end);
3843 }
3844 return (UChar* )NULL;
3845}
3846
3847static UChar*
3848map_search_backward(OnigEncoding enc, UChar map[],
3849 const UChar* text, const UChar* adjust_text,
3850 const UChar* text_start, const UChar* text_end)
3851{
3852 const UChar *s = text_start;
3853
3854 while (s >= text) {
3855 if (map[*s]) return (UChar* )s;
3856
3857 s = onigenc_get_prev_char_head(enc, adjust_text, s, text_end);
3858 }
3859 return (UChar* )NULL;
3860}
3861
3862extern OnigPosition
3863onig_match(regex_t* reg, const UChar* str, const UChar* end, const UChar* at, OnigRegion* region,
3864 OnigOptionType option)
3865{
3866 ptrdiff_t r;
3867 UChar *prev;
3868 OnigMatchArg msa;
3869
3870 MATCH_ARG_INIT(msa, option, region, at, at);
3871#ifdef USE_COMBINATION_EXPLOSION_CHECK
3872 {
3873 int offset = at - str;
3874 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
3875 }
3876#endif
3877
3878 if (region) {
3879 r = onig_region_resize_clear(region, reg->num_mem + 1);
3880 }
3881 else
3882 r = 0;
3883
3884 if (r == 0) {
3885 prev = (UChar* )onigenc_get_prev_char_head(reg->enc, str, at, end);
3886 r = match_at(reg, str, end,
3887#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
3888 end,
3889#endif
3890 at, prev, &msa);
3891 }
3892
3893 MATCH_ARG_FREE(msa);
3894 return r;
3895}
3896
3897static int
3898forward_search_range(regex_t* reg, const UChar* str, const UChar* end, UChar* s,
3899 UChar* range, UChar** low, UChar** high, UChar** low_prev)
3900{
3901 UChar *p, *pprev = (UChar* )NULL;
3902
3903#ifdef ONIG_DEBUG_SEARCH
3904 fprintf(stderr, "forward_search_range: str: %"PRIuPTR" (%p), end: %"PRIuPTR" (%p), s: %"PRIuPTR" (%p), range: %"PRIuPTR" (%p)\n",
3905 (uintptr_t )str, str, (uintptr_t )end, end, (uintptr_t )s, s, (uintptr_t )range, range);
3906#endif
3907
3908 p = s;
3909 if (reg->dmin > 0) {
3910 if (ONIGENC_IS_SINGLEBYTE(reg->enc)) {
3911 p += reg->dmin;
3912 }
3913 else {
3914 UChar *q = p + reg->dmin;
3915
3916 if (q >= end) return 0; /* fail */
3917 while (p < q) p += enclen(reg->enc, p, end);
3918 }
3919 }
3920
3921 retry:
3922 switch (reg->optimize) {
3923 case ONIG_OPTIMIZE_EXACT:
3924 p = slow_search(reg->enc, reg->exact, reg->exact_end, p, end, range);
3925 break;
3926 case ONIG_OPTIMIZE_EXACT_IC:
3927 p = slow_search_ic(reg->enc, reg->case_fold_flag,
3928 reg->exact, reg->exact_end, p, end, range);
3929 break;
3930
3931 case ONIG_OPTIMIZE_EXACT_BM:
3932 p = bm_search(reg, reg->exact, reg->exact_end, p, end, range);
3933 break;
3934
3935 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
3936 p = bm_search_notrev(reg, reg->exact, reg->exact_end, p, end, range);
3937 break;
3938
3939 case ONIG_OPTIMIZE_EXACT_BM_IC:
3940 p = bm_search_ic(reg, reg->exact, reg->exact_end, p, end, range);
3941 break;
3942
3943 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
3944 p = bm_search_notrev_ic(reg, reg->exact, reg->exact_end, p, end, range);
3945 break;
3946
3947 case ONIG_OPTIMIZE_MAP:
3948 p = map_search(reg->enc, reg->map, p, range, end);
3949 break;
3950 }
3951
3952 if (p && p < range) {
3953 if (p - reg->dmin < s) {
3954 retry_gate:
3955 pprev = p;
3956 p += enclen(reg->enc, p, end);
3957 goto retry;
3958 }
3959
3960 if (reg->sub_anchor) {
3961 UChar* prev;
3962
3963 switch (reg->sub_anchor) {
3964 case ANCHOR_BEGIN_LINE:
3965 if (!ON_STR_BEGIN(p)) {
3966 prev = onigenc_get_prev_char_head(reg->enc,
3967 (pprev ? pprev : str), p, end);
3968 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0))
3969 goto retry_gate;
3970 }
3971 break;
3972
3973 case ANCHOR_END_LINE:
3974 if (ON_STR_END(p)) {
3975#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
3976 prev = (UChar* )onigenc_get_prev_char_head(reg->enc,
3977 (pprev ? pprev : str), p);
3978 if (prev && ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1))
3979 goto retry_gate;
3980#endif
3981 }
3982 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1))
3983 goto retry_gate;
3984 break;
3985 }
3986 }
3987
3988 if (reg->dmax == 0) {
3989 *low = p;
3990 if (low_prev) {
3991 if (*low > s)
3992 *low_prev = onigenc_get_prev_char_head(reg->enc, s, p, end);
3993 else
3994 *low_prev = onigenc_get_prev_char_head(reg->enc,
3995 (pprev ? pprev : str), p, end);
3996 }
3997 }
3998 else {
3999 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4000 if (p < str + reg->dmax) {
4001 *low = (UChar* )str;
4002 if (low_prev)
4003 *low_prev = onigenc_get_prev_char_head(reg->enc, str, *low, end);
4004 }
4005 else {
4006 *low = p - reg->dmax;
4007 if (*low > s) {
4008 *low = onigenc_get_right_adjust_char_head_with_prev(reg->enc, s,
4009 *low, end, (const UChar** )low_prev);
4010 if (low_prev && IS_NULL(*low_prev))
4011 *low_prev = onigenc_get_prev_char_head(reg->enc,
4012 (pprev ? pprev : s), *low, end);
4013 }
4014 else {
4015 if (low_prev)
4016 *low_prev = onigenc_get_prev_char_head(reg->enc,
4017 (pprev ? pprev : str), *low, end);
4018 }
4019 }
4020 }
4021 }
4022 /* no needs to adjust *high, *high is used as range check only */
4023 *high = p - reg->dmin;
4024
4025#ifdef ONIG_DEBUG_SEARCH
4026 fprintf(stderr,
4027 "forward_search_range success: low: %"PRIdPTR", high: %"PRIdPTR", dmin: %"PRIdPTR", dmax: %"PRIdPTR"\n",
4028 *low - str, *high - str, reg->dmin, reg->dmax);
4029#endif
4030 return 1; /* success */
4031 }
4032
4033 return 0; /* fail */
4034}
4035
4036#define BM_BACKWARD_SEARCH_LENGTH_THRESHOLD 100
4037
4038static int
4039backward_search_range(regex_t* reg, const UChar* str, const UChar* end,
4040 UChar* s, const UChar* range, UChar* adjrange,
4041 UChar** low, UChar** high)
4042{
4043 UChar *p;
4044
4045 range += reg->dmin;
4046 p = s;
4047
4048 retry:
4049 switch (reg->optimize) {
4050 case ONIG_OPTIMIZE_EXACT:
4051 exact_method:
4052 p = slow_search_backward(reg->enc, reg->exact, reg->exact_end,
4053 range, adjrange, end, p);
4054 break;
4055
4056 case ONIG_OPTIMIZE_EXACT_IC:
4057 case ONIG_OPTIMIZE_EXACT_BM_IC:
4058 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV_IC:
4059 p = slow_search_backward_ic(reg->enc, reg->case_fold_flag,
4060 reg->exact, reg->exact_end,
4061 range, adjrange, end, p);
4062 break;
4063
4064 case ONIG_OPTIMIZE_EXACT_BM:
4065 case ONIG_OPTIMIZE_EXACT_BM_NOT_REV:
4066#ifdef USE_INT_MAP_BACKWARD
4067 if (IS_NULL(reg->int_map_backward)) {
4068 int r;
4069 if (s - range < BM_BACKWARD_SEARCH_LENGTH_THRESHOLD)
4070 goto exact_method;
4071
4072 r = set_bm_backward_skip(reg->exact, reg->exact_end, reg->enc,
4073 &(reg->int_map_backward));
4074 if (r) return r;
4075 }
4076 p = bm_search_backward(reg, reg->exact, reg->exact_end, range, adjrange,
4077 end, p);
4078#else
4079 goto exact_method;
4080#endif
4081 break;
4082
4083 case ONIG_OPTIMIZE_MAP:
4084 p = map_search_backward(reg->enc, reg->map, range, adjrange, p, end);
4085 break;
4086 }
4087
4088 if (p) {
4089 if (reg->sub_anchor) {
4090 UChar* prev;
4091
4092 switch (reg->sub_anchor) {
4093 case ANCHOR_BEGIN_LINE:
4094 if (!ON_STR_BEGIN(p)) {
4095 prev = onigenc_get_prev_char_head(reg->enc, str, p, end);
4096 if (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)) {
4097 p = prev;
4098 goto retry;
4099 }
4100 }
4101 break;
4102
4103 case ANCHOR_END_LINE:
4104 if (ON_STR_END(p)) {
4105#ifndef USE_NEWLINE_AT_END_OF_STRING_HAS_EMPTY_LINE
4106 prev = onigenc_get_prev_char_head(reg->enc, adjrange, p);
4107 if (IS_NULL(prev)) goto fail;
4108 if (ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 1)) {
4109 p = prev;
4110 goto retry;
4111 }
4112#endif
4113 }
4114 else if (! ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, p, str, end, reg->options, 1)) {
4115 p = onigenc_get_prev_char_head(reg->enc, adjrange, p, end);
4116 if (IS_NULL(p)) goto fail;
4117 goto retry;
4118 }
4119 break;
4120 }
4121 }
4122
4123 /* no needs to adjust *high, *high is used as range check only */
4124 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4125 *low = p - reg->dmax;
4126 *high = p - reg->dmin;
4127 *high = onigenc_get_right_adjust_char_head(reg->enc, adjrange, *high, end);
4128 }
4129
4130#ifdef ONIG_DEBUG_SEARCH
4131 fprintf(stderr, "backward_search_range: low: %d, high: %d\n",
4132 (int )(*low - str), (int )(*high - str));
4133#endif
4134 return 1; /* success */
4135 }
4136
4137 fail:
4138#ifdef ONIG_DEBUG_SEARCH
4139 fprintf(stderr, "backward_search_range: fail.\n");
4140#endif
4141 return 0; /* fail */
4142}
4143
4144
4145extern OnigPosition
4146onig_search(regex_t* reg, const UChar* str, const UChar* end,
4147 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4148{
4149 return onig_search_gpos(reg, str, end, start, start, range, region, option);
4150}
4151
4152extern OnigPosition
4153onig_search_gpos(regex_t* reg, const UChar* str, const UChar* end,
4154 const UChar* global_pos,
4155 const UChar* start, const UChar* range, OnigRegion* region, OnigOptionType option)
4156{
4157 ptrdiff_t r;
4158 UChar *s, *prev;
4159 OnigMatchArg msa;
4160#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4161 const UChar *orig_start = start;
4162 const UChar *orig_range = range;
4163#endif
4164
4165#ifdef ONIG_DEBUG_SEARCH
4166 fprintf(stderr,
4167 "onig_search (entry point): str: %"PRIuPTR" (%p), end: %"PRIuPTR", start: %"PRIuPTR", range: %"PRIuPTR"\n",
4168 (uintptr_t )str, str, end - str, start - str, range - str);
4169#endif
4170
4171 if (region) {
4172 r = onig_region_resize_clear(region, reg->num_mem + 1);
4173 if (r) goto finish_no_msa;
4174 }
4175
4176 if (start > end || start < str) goto mismatch_no_msa;
4177
4178
4179#ifdef USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE
4180# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4181# define MATCH_AND_RETURN_CHECK(upper_range) \
4182 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4183 if (r != ONIG_MISMATCH) {\
4184 if (r >= 0) {\
4185 if (! IS_FIND_LONGEST(reg->options)) {\
4186 goto match;\
4187 }\
4188 }\
4189 else goto finish; /* error */ \
4190 }
4191# else
4192# define MATCH_AND_RETURN_CHECK(upper_range) \
4193 r = match_at(reg, str, end, (upper_range), s, prev, &msa); \
4194 if (r != ONIG_MISMATCH) {\
4195 if (r >= 0) {\
4196 goto match;\
4197 }\
4198 else goto finish; /* error */ \
4199 }
4200# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4201#else
4202# ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4203# define MATCH_AND_RETURN_CHECK(none) \
4204 r = match_at(reg, str, end, s, prev, &msa);\
4205 if (r != ONIG_MISMATCH) {\
4206 if (r >= 0) {\
4207 if (! IS_FIND_LONGEST(reg->options)) {\
4208 goto match;\
4209 }\
4210 }\
4211 else goto finish; /* error */ \
4212 }
4213# else
4214# define MATCH_AND_RETURN_CHECK(none) \
4215 r = match_at(reg, str, end, s, prev, &msa);\
4216 if (r != ONIG_MISMATCH) {\
4217 if (r >= 0) {\
4218 goto match;\
4219 }\
4220 else goto finish; /* error */ \
4221 }
4222# endif /* USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE */
4223#endif /* USE_MATCH_RANGE_MUST_BE_INSIDE_OF_SPECIFIED_RANGE */
4224
4225
4226 /* anchor optimize: resume search range */
4227 if (reg->anchor != 0 && str < end) {
4228 UChar *min_semi_end, *max_semi_end;
4229
4230 if (reg->anchor & ANCHOR_BEGIN_POSITION) {
4231 /* search start-position only */
4232 begin_position:
4233 if (range > start)
4234 {
4235 if (global_pos > start)
4236 {
4237 if (global_pos < range)
4238 range = global_pos + 1;
4239 }
4240 else
4241 range = start + 1;
4242 }
4243 else
4244 range = start;
4245 }
4246 else if (reg->anchor & ANCHOR_BEGIN_BUF) {
4247 /* search str-position only */
4248 if (range > start) {
4249 if (start != str) goto mismatch_no_msa;
4250 range = str + 1;
4251 }
4252 else {
4253 if (range <= str) {
4254 start = str;
4255 range = str;
4256 }
4257 else
4258 goto mismatch_no_msa;
4259 }
4260 }
4261 else if (reg->anchor & ANCHOR_END_BUF) {
4262 min_semi_end = max_semi_end = (UChar* )end;
4263
4264 end_buf:
4265 if ((OnigDistance )(max_semi_end - str) < reg->anchor_dmin)
4266 goto mismatch_no_msa;
4267
4268 if (range > start) {
4269 if ((OnigDistance )(min_semi_end - start) > reg->anchor_dmax) {
4270 start = min_semi_end - reg->anchor_dmax;
4271 if (start < end)
4272 start = onigenc_get_right_adjust_char_head(reg->enc, str, start, end);
4273 }
4274 if ((OnigDistance )(max_semi_end - (range - 1)) < reg->anchor_dmin) {
4275 range = max_semi_end - reg->anchor_dmin + 1;
4276 }
4277
4278 if (start > range) goto mismatch_no_msa;
4279 /* If start == range, match with empty at end.
4280 Backward search is used. */
4281 }
4282 else {
4283 if ((OnigDistance )(min_semi_end - range) > reg->anchor_dmax) {
4284 range = min_semi_end - reg->anchor_dmax;
4285 }
4286 if ((OnigDistance )(max_semi_end - start) < reg->anchor_dmin) {
4287 start = max_semi_end - reg->anchor_dmin;
4288 start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, start, end);
4289 }
4290 if (range > start) goto mismatch_no_msa;
4291 }
4292 }
4293 else if (reg->anchor & ANCHOR_SEMI_END_BUF) {
4294 UChar* pre_end = ONIGENC_STEP_BACK(reg->enc, str, end, end, 1);
4295
4296 max_semi_end = (UChar* )end;
4297 if (ONIGENC_IS_MBC_NEWLINE(reg->enc, pre_end, end)) {
4298 min_semi_end = pre_end;
4299
4300#ifdef USE_CRNL_AS_LINE_TERMINATOR
4301 pre_end = ONIGENC_STEP_BACK(reg->enc, str, pre_end, end, 1);
4302 if (IS_NOT_NULL(pre_end) &&
4303 IS_NEWLINE_CRLF(reg->options) &&
4304 ONIGENC_IS_MBC_CRNL(reg->enc, pre_end, end)) {
4305 min_semi_end = pre_end;
4306 }
4307#endif
4308 if (min_semi_end > str && start <= min_semi_end) {
4309 goto end_buf;
4310 }
4311 }
4312 else {
4313 min_semi_end = (UChar* )end;
4314 goto end_buf;
4315 }
4316 }
4317 else if ((reg->anchor & ANCHOR_ANYCHAR_STAR_ML)) {
4318 goto begin_position;
4319 }
4320 }
4321 else if (str == end) { /* empty string */
4322 static const UChar address_for_empty_string[] = "";
4323
4324#ifdef ONIG_DEBUG_SEARCH
4325 fprintf(stderr, "onig_search: empty string.\n");
4326#endif
4327
4328 if (reg->threshold_len == 0) {
4329 start = end = str = address_for_empty_string;
4330 s = (UChar* )start;
4331 prev = (UChar* )NULL;
4332
4333 MATCH_ARG_INIT(msa, option, region, start, start);
4334#ifdef USE_COMBINATION_EXPLOSION_CHECK
4335 msa.state_check_buff = (void* )0;
4336 msa.state_check_buff_size = 0; /* NO NEED, for valgrind */
4337#endif
4338 MATCH_AND_RETURN_CHECK(end);
4339 goto mismatch;
4340 }
4341 goto mismatch_no_msa;
4342 }
4343
4344#ifdef ONIG_DEBUG_SEARCH
4345 fprintf(stderr, "onig_search(apply anchor): end: %d, start: %d, range: %d\n",
4346 (int )(end - str), (int )(start - str), (int )(range - str));
4347#endif
4348
4349 MATCH_ARG_INIT(msa, option, region, start, global_pos);
4350#ifdef USE_COMBINATION_EXPLOSION_CHECK
4351 {
4352 int offset = (MIN(start, range) - str);
4353 STATE_CHECK_BUFF_INIT(msa, end - str, offset, reg->num_comb_exp_check);
4354 }
4355#endif
4356
4357 s = (UChar* )start;
4358 if (range > start) { /* forward search */
4359 if (s > str)
4360 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4361 else
4362 prev = (UChar* )NULL;
4363
4364 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4365 UChar *sch_range, *low, *high, *low_prev;
4366
4367 sch_range = (UChar* )range;
4368 if (reg->dmax != 0) {
4369 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4370 sch_range = (UChar* )end;
4371 else {
4372 sch_range += reg->dmax;
4373 if (sch_range > end) sch_range = (UChar* )end;
4374 }
4375 }
4376
4377 if ((end - start) < reg->threshold_len)
4378 goto mismatch;
4379
4380 if (reg->dmax != ONIG_INFINITE_DISTANCE) {
4381 do {
4382 if (! forward_search_range(reg, str, end, s, sch_range,
4383 &low, &high, &low_prev)) goto mismatch;
4384 if (s < low) {
4385 s = low;
4386 prev = low_prev;
4387 }
4388 while (s <= high) {
4389 MATCH_AND_RETURN_CHECK(orig_range);
4390 prev = s;
4391 s += enclen(reg->enc, s, end);
4392 }
4393 } while (s < range);
4394 goto mismatch;
4395 }
4396 else { /* check only. */
4397 if (! forward_search_range(reg, str, end, s, sch_range,
4398 &low, &high, (UChar** )NULL)) goto mismatch;
4399
4400 if ((reg->anchor & ANCHOR_ANYCHAR_STAR) != 0) {
4401 do {
4402 MATCH_AND_RETURN_CHECK(orig_range);
4403 prev = s;
4404 s += enclen(reg->enc, s, end);
4405
4406 if ((reg->anchor & (ANCHOR_LOOK_BEHIND | ANCHOR_PREC_READ_NOT)) == 0) {
4407 while (!ONIGENC_IS_MBC_NEWLINE_EX(reg->enc, prev, str, end, reg->options, 0)
4408 && s < range) {
4409 prev = s;
4410 s += enclen(reg->enc, s, end);
4411 }
4412 }
4413 } while (s < range);
4414 goto mismatch;
4415 }
4416 }
4417 }
4418
4419 do {
4420 MATCH_AND_RETURN_CHECK(orig_range);
4421 prev = s;
4422 s += enclen(reg->enc, s, end);
4423 } while (s < range);
4424
4425 if (s == range) { /* because empty match with /$/. */
4426 MATCH_AND_RETURN_CHECK(orig_range);
4427 }
4428 }
4429 else { /* backward search */
4430 if (reg->optimize != ONIG_OPTIMIZE_NONE) {
4431 UChar *low, *high, *adjrange, *sch_start;
4432
4433 if (range < end)
4434 adjrange = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc, str, range, end);
4435 else
4436 adjrange = (UChar* )end;
4437
4438 if (reg->dmax != ONIG_INFINITE_DISTANCE &&
4439 (end - range) >= reg->threshold_len) {
4440 do {
4441 sch_start = s + reg->dmax;
4442 if (sch_start > end) sch_start = (UChar* )end;
4443 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4444 &low, &high) <= 0)
4445 goto mismatch;
4446
4447 if (s > high)
4448 s = high;
4449
4450 while (s >= low) {
4451 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4452 MATCH_AND_RETURN_CHECK(orig_start);
4453 s = prev;
4454 }
4455 } while (s >= range);
4456 goto mismatch;
4457 }
4458 else { /* check only. */
4459 if ((end - range) < reg->threshold_len) goto mismatch;
4460
4461 sch_start = s;
4462 if (reg->dmax != 0) {
4463 if (reg->dmax == ONIG_INFINITE_DISTANCE)
4464 sch_start = (UChar* )end;
4465 else {
4466 sch_start += reg->dmax;
4467 if (sch_start > end) sch_start = (UChar* )end;
4468 else
4469 sch_start = ONIGENC_LEFT_ADJUST_CHAR_HEAD(reg->enc,
4470 start, sch_start, end);
4471 }
4472 }
4473 if (backward_search_range(reg, str, end, sch_start, range, adjrange,
4474 &low, &high) <= 0) goto mismatch;
4475 }
4476 }
4477
4478 do {
4479 prev = onigenc_get_prev_char_head(reg->enc, str, s, end);
4480 MATCH_AND_RETURN_CHECK(orig_start);
4481 s = prev;
4482 } while (s >= range);
4483 }
4484
4485 mismatch:
4486#ifdef USE_FIND_LONGEST_SEARCH_ALL_OF_RANGE
4487 if (IS_FIND_LONGEST(reg->options)) {
4488 if (msa.best_len >= 0) {
4489 s = msa.best_s;
4490 goto match;
4491 }
4492 }
4493#endif
4494 r = ONIG_MISMATCH;
4495
4496 finish:
4497 MATCH_ARG_FREE(msa);
4498
4499 /* If result is mismatch and no FIND_NOT_EMPTY option,
4500 then the region is not set in match_at(). */
4501 if (IS_FIND_NOT_EMPTY(reg->options) && region) {
4502 onig_region_clear(region);
4503 }
4504
4505#ifdef ONIG_DEBUG
4506 if (r != ONIG_MISMATCH)
4507 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4508#endif
4509 return r;
4510
4511 mismatch_no_msa:
4512 r = ONIG_MISMATCH;
4513 finish_no_msa:
4514#ifdef ONIG_DEBUG
4515 if (r != ONIG_MISMATCH)
4516 fprintf(stderr, "onig_search: error %"PRIdPTRDIFF"\n", r);
4517#endif
4518 return r;
4519
4520 match:
4521 MATCH_ARG_FREE(msa);
4522 return s - str;
4523}
4524
4525extern OnigPosition
4526onig_scan(regex_t* reg, const UChar* str, const UChar* end,
4527 OnigRegion* region, OnigOptionType option,
4528 int (*scan_callback)(OnigPosition, OnigPosition, OnigRegion*, void*),
4529 void* callback_arg)
4530{
4531 OnigPosition r;
4532 OnigPosition n;
4533 int rs;
4534 const UChar* start;
4535
4536 n = 0;
4537 start = str;
4538 while (1) {
4539 r = onig_search(reg, str, end, start, end, region, option);
4540 if (r >= 0) {
4541 rs = scan_callback(n, r, region, callback_arg);
4542 n++;
4543 if (rs != 0)
4544 return rs;
4545
4546 if (region->end[0] == start - str) {
4547 if (start >= end) break;
4548 start += enclen(reg->enc, start, end);
4549 }
4550 else
4551 start = str + region->end[0];
4552
4553 if (start > end)
4554 break;
4555 }
4556 else if (r == ONIG_MISMATCH) {
4557 break;
4558 }
4559 else { /* error */
4560 return r;
4561 }
4562 }
4563
4564 return n;
4565}
4566
4567extern OnigEncoding
4568onig_get_encoding(const regex_t* reg)
4569{
4570 return reg->enc;
4571}
4572
4573extern OnigOptionType
4574onig_get_options(const regex_t* reg)
4575{
4576 return reg->options;
4577}
4578
4579extern OnigCaseFoldType
4580onig_get_case_fold_flag(const regex_t* reg)
4581{
4582 return reg->case_fold_flag;
4583}
4584
4585extern const OnigSyntaxType*
4586onig_get_syntax(const regex_t* reg)
4587{
4588 return reg->syntax;
4589}
4590
4591extern int
4592onig_number_of_captures(const regex_t* reg)
4593{
4594 return reg->num_mem;
4595}
4596
4597extern int
4598onig_number_of_capture_histories(const regex_t* reg)
4599{
4600#ifdef USE_CAPTURE_HISTORY
4601 int i, n;
4602
4603 n = 0;
4604 for (i = 0; i <= ONIG_MAX_CAPTURE_HISTORY_GROUP; i++) {
4605 if (BIT_STATUS_AT(reg->capture_history, i) != 0)
4606 n++;
4607 }
4608 return n;
4609#else
4610 return 0;
4611#endif
4612}
4613
4614extern void
4615onig_copy_encoding(OnigEncodingType *to, OnigEncoding from)
4616{
4617 *to = *from;
4618}
4619
Note: See TracBrowser for help on using the repository browser.