Changeset 331 for EcnlProtoTool/trunk/onigmo-6.1.3/src/regparse.c
- Timestamp:
- Jan 21, 2018, 12:10:09 AM (6 years ago)
- Location:
- EcnlProtoTool/trunk/onigmo-6.1.3
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/onigmo-6.1.3/src/regparse.c
r321 r331 4 4 /*- 5 5 * Copyright (c) 2002-2008 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * Copyright (c) 2011-201 4K.Takata <kentkt AT csc DOT jp>6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp> 7 7 * All rights reserved. 8 8 * … … 30 30 31 31 #include "regparse.h" 32 #include "st.h"32 #include <stdarg.h> 33 33 34 34 #define WARN_BUFSIZE 256 … … 37 37 38 38 39 OnigSyntaxType OnigSyntaxRuby = {39 const OnigSyntaxType OnigSyntaxRuby = { 40 40 (( SYN_GNU_REGEX_OP | ONIG_SYN_OP_QMARK_NON_GREEDY | 41 41 ONIG_SYN_OP_ESC_OCTAL3 | ONIG_SYN_OP_ESC_X_HEX2 | … … 53 53 ONIG_SYN_OP2_ESC_CAPITAL_M_BAR_META | ONIG_SYN_OP2_ESC_V_VTAB | 54 54 ONIG_SYN_OP2_ESC_H_XDIGIT | 55 #ifndef RUBY 56 ONIG_SYN_OP2_ESC_U_HEX4 | 57 #endif 55 58 ONIG_SYN_OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER | 56 59 ONIG_SYN_OP2_QMARK_LPAREN_CONDITION | 57 60 ONIG_SYN_OP2_ESC_CAPITAL_R_LINEBREAK | 58 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP ) 61 ONIG_SYN_OP2_ESC_CAPITAL_K_KEEP | 62 ONIG_SYN_OP2_QMARK_TILDE_ABSENT ) 59 63 , ( SYN_GNU_REGEX_BV | 60 64 ONIG_SYN_ALLOW_INTERVAL_LOW_ABBREV | … … 64 68 ONIG_SYN_FIXED_INTERVAL_IS_GREEDY_ONLY | 65 69 ONIG_SYN_WARN_CC_OP_NOT_ESCAPED | 70 ONIG_SYN_WARN_CC_DUP | 66 71 ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT ) 67 72 , ( ONIG_OPTION_ASCII_RANGE | ONIG_OPTION_POSIX_BRACKET_ALL_RANGE | … … 78 83 }; 79 84 80 OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY;85 const OnigSyntaxType* OnigDefaultSyntax = ONIG_SYNTAX_RUBY; 81 86 82 87 extern void onig_null_warn(const char* s ARG_UNUSED) { } … … 103 108 onig_verb_warn = f; 104 109 } 110 111 static void CC_DUP_WARN(ScanEnv *env); 112 113 114 static unsigned int ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; 115 116 extern unsigned int 117 onig_get_parse_depth_limit(void) 118 { 119 return ParseDepthLimit; 120 } 121 122 extern int 123 onig_set_parse_depth_limit(unsigned int depth) 124 { 125 if (depth == 0) 126 ParseDepthLimit = DEFAULT_PARSE_DEPTH_LIMIT; 127 else 128 ParseDepthLimit = depth; 129 return 0; 130 } 131 105 132 106 133 static void … … 137 164 138 165 #define SET_ALL_MULTI_BYTE_RANGE(enc, pbuf) \ 139 add_code_range_to_buf(pbuf, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT)166 add_code_range_to_buf(pbuf, env, MBCODE_START_POS(enc), ONIG_LAST_CODE_POINT) 140 167 141 168 #define ADD_ALL_MULTI_BYTE_RANGE(enc, mbuf) do {\ … … 146 173 } while (0) 147 174 175 176 #define BITSET_SET_BIT_CHKDUP(bs, pos) do { \ 177 if (BITSET_AT(bs, pos)) CC_DUP_WARN(env); \ 178 BS_ROOM(bs, pos) |= BS_BIT(pos); \ 179 } while (0) 148 180 149 181 #define BITSET_IS_EMPTY(bs,empty) do {\ … … 158 190 159 191 static void 160 bitset_set_range( BitSetRef bs, int from, int to)192 bitset_set_range(ScanEnv *env, BitSetRef bs, int from, int to) 161 193 { 162 194 int i; 163 195 for (i = from; i <= to && i < SINGLE_BYTE_SIZE; i++) { 164 BITSET_SET_BIT (bs, i);196 BITSET_SET_BIT_CHKDUP(bs, i); 165 197 } 166 198 } … … 210 242 } 211 243 244 #if defined(USE_NAMED_GROUP) && !defined(USE_ST_LIBRARY) 212 245 extern int 213 246 onig_strncmp(const UChar* s1, const UChar* s2, int n) … … 221 254 return 0; 222 255 } 256 #endif 223 257 224 258 extern void … … 259 293 #ifdef __GNUC__ 260 294 /* get rid of Wunused-but-set-variable and Wuninitialized */ 261 # define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev295 # define PFETCH_READY UChar* pfetch_prev = NULL; (void)pfetch_prev 262 296 #else 263 # define PFETCH_READY UChar* pfetch_prev297 # define PFETCH_READY UChar* pfetch_prev 264 298 #endif 265 299 #define PEND (p < end ? 0 : 1) … … 267 301 #define PINC do { \ 268 302 pfetch_prev = p; \ 269 p += ONIGENC_MBC_ENC_LEN(enc, p); \303 p += enclen(enc, p, end); \ 270 304 } while (0) 271 305 #define PFETCH(c) do { \ 272 c = ONIGENC_MBC_TO_CODE(enc, p, end); \306 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \ 273 307 pfetch_prev = p; \ 274 p += ONIGENC_MBC_ENC_LEN(enc, p); \308 p += enclen(enc, p, end); \ 275 309 } while (0) 276 310 277 311 #define PINC_S do { \ 278 p += ONIGENC_MBC_ENC_LEN(enc, p); \312 p += enclen(enc, p, end); \ 279 313 } while (0) 280 314 #define PFETCH_S(c) do { \ 281 c = ONIGENC_MBC_TO_CODE(enc, p, end); \282 p += ONIGENC_MBC_ENC_LEN(enc, p); \315 c = ((enc->max_enc_len == 1) ? *p : ONIGENC_MBC_TO_CODE(enc, p, end)); \ 316 p += enclen(enc, p, end); \ 283 317 } while (0) 284 318 … … 319 353 #ifdef USE_ST_LIBRARY 320 354 355 # ifdef RUBY 356 # include "ruby/st.h" 357 # else 358 # include "st.h" 359 # endif 360 321 361 typedef struct { 322 UChar* s;323 UChar* end;362 const UChar* s; 363 const UChar* end; 324 364 } st_str_end_key; 325 365 326 366 static int 327 str_end_cmp(st_str_end_key* x, st_str_end_key* y) 328 { 329 UChar *p, *q; 367 str_end_cmp(st_data_t xp, st_data_t yp) 368 { 369 const st_str_end_key *x, *y; 370 const UChar *p, *q; 330 371 int c; 331 372 373 x = (const st_str_end_key *)xp; 374 y = (const st_str_end_key *)yp; 332 375 if ((x->end - x->s) != (y->end - y->s)) 333 376 return 1; … … 345 388 } 346 389 347 static int 348 str_end_hash(st_str_end_key* x) 349 { 350 UChar *p; 351 int val = 0; 390 static st_index_t 391 str_end_hash(st_data_t xp) 392 { 393 const st_str_end_key *x = (const st_str_end_key *)xp; 394 const UChar *p; 395 st_index_t val = 0; 352 396 353 397 p = x->s; … … 360 404 361 405 extern hash_table_type* 362 onig_st_init_strend_table_with_size( int size)363 { 364 static struct st_hash_type hashType = {406 onig_st_init_strend_table_with_size(st_index_t size) 407 { 408 static const struct st_hash_type hashType = { 365 409 str_end_cmp, 366 410 str_end_hash, … … 405 449 #ifdef USE_NAMED_GROUP 406 450 407 # define INIT_NAME_BACKREFS_ALLOC_NUM 8451 # define INIT_NAME_BACKREFS_ALLOC_NUM 8 408 452 409 453 typedef struct { … … 416 460 } NameEntry; 417 461 418 # ifdef USE_ST_LIBRARY462 # ifdef USE_ST_LIBRARY 419 463 420 464 typedef st_table NameTable; 421 465 typedef st_data_t HashDataType; /* 1.6 st.h doesn't define st_data_t type */ 422 466 423 # ifdef ONIG_DEBUG467 # ifdef ONIG_DEBUG 424 468 static int 425 469 i_print_name_entry(UChar* key, NameEntry* e, void* arg) … … 455 499 return 0; 456 500 } 457 # endif /* ONIG_DEBUG */501 # endif /* ONIG_DEBUG */ 458 502 459 503 static int … … 518 562 { 519 563 int r = (*(arg->func))(e->name, 520 521 564 e->name + e->name_len, 565 e->back_num, 522 566 (e->back_num > 1 ? e->back_refs : &(e->back_ref1)), 523 567 arg->reg, arg->arg); … … 577 621 578 622 extern int 579 onig_number_of_names( regex_t* reg)623 onig_number_of_names(const regex_t* reg) 580 624 { 581 625 NameTable* t = (NameTable* )reg->name_table; 582 626 583 627 if (IS_NOT_NULL(t)) 584 return t->num_entries;628 return (int )t->num_entries; 585 629 else 586 630 return 0; 587 631 } 588 632 589 # else /* USE_ST_LIBRARY */590 591 # define INIT_NAMES_ALLOC_NUM 8633 # else /* USE_ST_LIBRARY */ 634 635 # define INIT_NAMES_ALLOC_NUM 8 592 636 593 637 typedef struct { … … 597 641 } NameTable; 598 642 599 # ifdef ONIG_DEBUG643 # ifdef ONIG_DEBUG 600 644 extern int 601 645 onig_print_names(FILE* fp, regex_t* reg) … … 628 672 return 0; 629 673 } 630 # endif674 # endif 631 675 632 676 static int … … 713 757 714 758 extern int 715 onig_number_of_names( regex_t* reg)759 onig_number_of_names(const regex_t* reg) 716 760 { 717 761 NameTable* t = (NameTable* )reg->name_table; … … 723 767 } 724 768 725 # endif /* else USE_ST_LIBRARY */769 # endif /* else USE_ST_LIBRARY */ 726 770 727 771 static int … … 737 781 e = name_find(reg, name, name_end); 738 782 if (IS_NULL(e)) { 739 # ifdef USE_ST_LIBRARY783 # ifdef USE_ST_LIBRARY 740 784 if (IS_NULL(t)) { 741 785 t = onig_st_init_strend_table_with_size(5); … … 758 802 e->back_refs = (int* )NULL; 759 803 760 # else804 # else 761 805 762 806 if (IS_NULL(t)) { … … 801 845 if (IS_NULL(e->name)) return ONIGERR_MEMORY; 802 846 e->name_len = name_end - name; 803 # endif847 # endif 804 848 } 805 849 … … 864 908 extern int 865 909 onig_name_to_backref_number(regex_t* reg, const UChar* name, 866 const UChar* name_end, OnigRegion *region)910 const UChar* name_end, const OnigRegion *region) 867 911 { 868 912 int i, n, *nums; … … 897 941 extern int 898 942 onig_name_to_backref_number(regex_t* reg, const UChar* name, 899 const UChar* name_end, OnigRegion* region)943 const UChar* name_end, const OnigRegion* region) 900 944 { 901 945 return ONIG_NO_SUPPORT_CONFIG; … … 910 954 911 955 extern int 912 onig_number_of_names( regex_t* reg)956 onig_number_of_names(const regex_t* reg) 913 957 { 914 958 return 0; … … 917 961 918 962 extern int 919 onig_noname_group_capture_is_active( regex_t* reg)963 onig_noname_group_capture_is_active(const regex_t* reg) 920 964 { 921 965 if (ONIG_IS_OPTION_ON(reg->options, ONIG_OPTION_DONT_CAPTURE_GROUP)) … … 964 1008 env->has_recursion = 0; 965 1009 #endif 1010 env->parse_depth = 0; 1011 env->warnings_flag = 0; 966 1012 } 967 1013 … … 980 1026 alloc = INIT_SCANENV_MEMNODES_ALLOC_SIZE; 981 1027 p = (Node** )xmalloc(sizeof(Node*) * alloc); 1028 CHECK_NULL_RETURN_MEMERR(p); 982 1029 xmemcpy(p, env->mem_nodes_static, 983 1030 sizeof(Node*) * SCANENV_MEMNODES_SIZE); … … 986 1033 alloc = env->mem_alloc * 2; 987 1034 p = (Node** )xrealloc(env->mem_nodes_dynamic, sizeof(Node*) * alloc); 988 } 989 CHECK_NULL_RETURN_MEMERR(p);1035 CHECK_NULL_RETURN_MEMERR(p); 1036 } 990 1037 991 1038 for (i = env->num_mem + 1; i < alloc; i++) … … 1012 1059 1013 1060 1014 #ifdef USE_PARSE_TREE_NODE_RECYCLE1015 typedef struct _FreeNode {1016 struct _FreeNode* next;1017 } FreeNode;1018 1019 static FreeNode* FreeNodeList = (FreeNode* )NULL;1020 #endif1021 1022 1061 extern void 1023 1062 onig_node_free(Node* node) … … 1040 1079 Node* next_node = NCDR(node); 1041 1080 1042 #ifdef USE_PARSE_TREE_NODE_RECYCLE1043 {1044 FreeNode* n = (FreeNode* )node;1045 1046 THREAD_ATOMIC_START;1047 n->next = FreeNodeList;1048 FreeNodeList = n;1049 THREAD_ATOMIC_END;1050 }1051 #else1052 1081 xfree(node); 1053 #endif1054 1082 node = next_node; 1055 1083 goto start; … … 1061 1089 CClassNode* cc = NCCLASS(node); 1062 1090 1063 if (IS_NCCLASS_SHARE(cc)) return ;1064 1091 if (cc->mbuf) 1065 1092 bbuf_free(cc->mbuf); 1066 1093 } 1067 1094 break; … … 1088 1115 } 1089 1116 1090 #ifdef USE_PARSE_TREE_NODE_RECYCLE1091 {1092 FreeNode* n = (FreeNode* )node;1093 1094 THREAD_ATOMIC_START;1095 n->next = FreeNodeList;1096 FreeNodeList = n;1097 THREAD_ATOMIC_END;1098 }1099 #else1100 1117 xfree(node); 1101 #endif 1102 } 1103 1104 #ifdef USE_PARSE_TREE_NODE_RECYCLE 1105 extern int 1106 onig_free_node_list(void) 1107 { 1108 FreeNode* n; 1109 1110 /* THREAD_ATOMIC_START; */ 1111 while (IS_NOT_NULL(FreeNodeList)) { 1112 n = FreeNodeList; 1113 FreeNodeList = FreeNodeList->next; 1114 xfree(n); 1115 } 1116 /* THREAD_ATOMIC_END; */ 1117 return 0; 1118 } 1119 #endif 1118 } 1120 1119 1121 1120 static Node* … … 1123 1122 { 1124 1123 Node* node; 1125 1126 #ifdef USE_PARSE_TREE_NODE_RECYCLE1127 THREAD_ATOMIC_START;1128 if (IS_NOT_NULL(FreeNodeList)) {1129 node = (Node* )FreeNodeList;1130 FreeNodeList = FreeNodeList->next;1131 THREAD_ATOMIC_END;1132 return node;1133 }1134 THREAD_ATOMIC_END;1135 #endif1136 1124 1137 1125 node = (Node* )xmalloc(sizeof(Node)); … … 1139 1127 return node; 1140 1128 } 1141 1142 #if defined(USE_MULTI_THREAD_SYSTEM) && \1143 defined(USE_SHARED_CCLASS_TABLE) && \1144 defined(USE_PARSE_TREE_NODE_RECYCLE)1145 static Node*1146 node_new_locked(void)1147 {1148 Node* node;1149 1150 if (IS_NOT_NULL(FreeNodeList)) {1151 node = (Node* )FreeNodeList;1152 FreeNodeList = FreeNodeList->next;1153 return node;1154 }1155 1156 node = (Node* )xmalloc(sizeof(Node));1157 /* xmemset(node, 0, sizeof(Node)); */1158 return node;1159 }1160 #endif1161 1129 1162 1130 static void … … 1179 1147 return node; 1180 1148 } 1181 1182 #if defined(USE_MULTI_THREAD_SYSTEM) && \1183 defined(USE_SHARED_CCLASS_TABLE) && \1184 defined(USE_PARSE_TREE_NODE_RECYCLE)1185 static Node*1186 node_new_cclass_locked(void)1187 {1188 Node* node = node_new_locked();1189 CHECK_NULL_RETURN(node);1190 1191 SET_NTYPE(node, NT_CCLASS);1192 initialize_cclass(NCCLASS(node));1193 return node;1194 }1195 #else1196 #define node_new_cclass_locked() node_new_cclass()1197 #endif1198 1199 #ifdef USE_SHARED_CCLASS_TABLE1200 static Node*1201 node_new_cclass_by_codepoint_range(int not, OnigCodePoint sb_out,1202 const OnigCodePoint ranges[])1203 {1204 int n, i;1205 CClassNode* cc;1206 OnigCodePoint j;1207 1208 Node* node = node_new_cclass_locked();1209 CHECK_NULL_RETURN(node);1210 1211 cc = NCCLASS(node);1212 if (not != 0) NCCLASS_SET_NOT(cc);1213 1214 BITSET_CLEAR(cc->bs);1215 if (sb_out > 0 && IS_NOT_NULL(ranges)) {1216 n = ONIGENC_CODE_RANGE_NUM(ranges);1217 for (i = 0; i < n; i++) {1218 for (j = ONIGENC_CODE_RANGE_FROM(ranges, i);1219 j <= (OnigCodePoint )ONIGENC_CODE_RANGE_TO(ranges, i); j++) {1220 if (j >= sb_out) goto sb_end;1221 1222 BITSET_SET_BIT(cc->bs, j);1223 }1224 }1225 }1226 1227 sb_end:1228 if (IS_NULL(ranges)) {1229 is_null:1230 cc->mbuf = NULL;1231 }1232 else {1233 BBuf* bbuf;1234 1235 n = ONIGENC_CODE_RANGE_NUM(ranges);1236 if (n == 0) goto is_null;1237 1238 bbuf = (BBuf* )xmalloc(sizeof(BBuf));1239 CHECK_NULL_RETURN(bbuf);1240 bbuf->alloc = n + 1;1241 bbuf->used = n + 1;1242 bbuf->p = (UChar* )((void* )ranges);1243 1244 cc->mbuf = bbuf;1245 }1246 1247 return node;1248 }1249 #endif /* USE_SHARED_CCLASS_TABLE */1250 1149 1251 1150 static Node* … … 1535 1434 } 1536 1435 1436 #if 0 1537 1437 extern void 1538 1438 onig_node_conv_to_str_node(Node* node, int flag) … … 1544 1444 NSTR(node)->end = NSTR(node)->buf; 1545 1445 } 1446 #endif 1546 1447 1547 1448 extern void … … 1614 1515 1615 1516 if (sn->end > sn->s) { 1616 p = onigenc_get_prev_char_head(enc, sn->s, sn->end );1517 p = onigenc_get_prev_char_head(enc, sn->s, sn->end, sn->end); 1617 1518 if (p && p > sn->s) { /* can be split. */ 1618 1519 n = node_new_str(p, sn->end); … … 1629 1530 { 1630 1531 if (sn->end > sn->s) { 1631 return ((enclen(enc, sn->s ) < sn->end - sn->s) ? 1 : 0);1532 return ((enclen(enc, sn->s, sn->end) < sn->end - sn->s) ? 1 : 0); 1632 1533 } 1633 1534 return 0; … … 1702 1603 else { 1703 1604 PUNFETCH; 1605 maxlen++; 1704 1606 break; 1705 1607 } … … 1766 1668 1767 1669 static int 1768 add_code_range_to_buf(BBuf** pbuf, OnigCodePoint from, OnigCodePoint to) 1670 add_code_range_to_buf0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, 1671 int checkdup) 1769 1672 { 1770 1673 int r, inc_n, pos; … … 1807 1710 bound = x; 1808 1711 } 1712 /* data[(low-1)*2+1] << from <= data[low*2] 1713 * data[(high-1)*2+1] <= to << data[high*2] 1714 */ 1809 1715 1810 1716 inc_n = low + 1 - high; … … 1813 1719 1814 1720 if (inc_n != 1) { 1721 if (checkdup && from <= data[low*2+1] 1722 && (data[low*2] <= from || data[low*2+1] <= to)) 1723 CC_DUP_WARN(env); 1815 1724 if (from > data[low*2]) 1816 1725 from = data[low*2]; … … 1845 1754 1846 1755 static int 1847 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) 1756 add_code_range_to_buf(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) 1757 { 1758 return add_code_range_to_buf0(pbuf, env, from, to, 1); 1759 } 1760 1761 static int 1762 add_code_range0(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to, int checkdup) 1848 1763 { 1849 1764 if (from > to) { … … 1854 1769 } 1855 1770 1856 return add_code_range_to_buf (pbuf, from, to);1771 return add_code_range_to_buf0(pbuf, env, from, to, checkdup); 1857 1772 } 1858 1773 1859 1774 static int 1860 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf) 1775 add_code_range(BBuf** pbuf, ScanEnv* env, OnigCodePoint from, OnigCodePoint to) 1776 { 1777 return add_code_range0(pbuf, env, from, to, 1); 1778 } 1779 1780 static int 1781 not_code_range_buf(OnigEncoding enc, BBuf* bbuf, BBuf** pbuf, ScanEnv* env) 1861 1782 { 1862 1783 int r, i, n; … … 1880 1801 to = data[i*2+1]; 1881 1802 if (pre <= from - 1) { 1882 r = add_code_range_to_buf(pbuf, pre, from - 1);1803 r = add_code_range_to_buf(pbuf, env, pre, from - 1); 1883 1804 if (r != 0) return r; 1884 1805 } … … 1887 1808 } 1888 1809 if (to < ONIG_LAST_CODE_POINT) { 1889 r = add_code_range_to_buf(pbuf, to + 1, ONIG_LAST_CODE_POINT);1810 r = add_code_range_to_buf(pbuf, env, to + 1, ONIG_LAST_CODE_POINT); 1890 1811 } 1891 1812 return r; … … 1901 1822 static int 1902 1823 or_code_range_buf(OnigEncoding enc, BBuf* bbuf1, int not1, 1903 BBuf* bbuf2, int not2, BBuf** pbuf )1824 BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) 1904 1825 { 1905 1826 int r; … … 1927 1848 } 1928 1849 else { 1929 return not_code_range_buf(enc, bbuf2, pbuf );1850 return not_code_range_buf(enc, bbuf2, pbuf, env); 1930 1851 } 1931 1852 } … … 1943 1864 } 1944 1865 else if (not1 == 0) { /* 1 OR (not 2) */ 1945 r = not_code_range_buf(enc, bbuf2, pbuf );1866 r = not_code_range_buf(enc, bbuf2, pbuf, env); 1946 1867 } 1947 1868 if (r != 0) return r; … … 1950 1871 from = data1[i*2]; 1951 1872 to = data1[i*2+1]; 1952 r = add_code_range_to_buf(pbuf, from, to);1873 r = add_code_range_to_buf(pbuf, env, from, to); 1953 1874 if (r != 0) return r; 1954 1875 } … … 1957 1878 1958 1879 static int 1959 and_code_range1(BBuf** pbuf, OnigCodePoint from1, OnigCodePoint to1,1960 1880 and_code_range1(BBuf** pbuf, ScanEnv* env, OnigCodePoint from1, OnigCodePoint to1, 1881 OnigCodePoint* data, int n) 1961 1882 { 1962 1883 int i, r; … … 1975 1896 if (to2 < to1) { 1976 1897 if (from1 <= from2 - 1) { 1977 r = add_code_range_to_buf(pbuf, from1, from2-1);1898 r = add_code_range_to_buf(pbuf, env, from1, from2-1); 1978 1899 if (r != 0) return r; 1979 1900 } … … 1990 1911 } 1991 1912 if (from1 <= to1) { 1992 r = add_code_range_to_buf(pbuf, from1, to1);1913 r = add_code_range_to_buf(pbuf, env, from1, to1); 1993 1914 if (r != 0) return r; 1994 1915 } … … 1997 1918 1998 1919 static int 1999 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf )1920 and_code_range_buf(BBuf* bbuf1, int not1, BBuf* bbuf2, int not2, BBuf** pbuf, ScanEnv* env) 2000 1921 { 2001 1922 int r; … … 2036 1957 from = MAX(from1, from2); 2037 1958 to = MIN(to1, to2); 2038 r = add_code_range_to_buf(pbuf, from, to);1959 r = add_code_range_to_buf(pbuf, env, from, to); 2039 1960 if (r != 0) return r; 2040 1961 } … … 2045 1966 from1 = data1[i*2]; 2046 1967 to1 = data1[i*2+1]; 2047 r = and_code_range1(pbuf, from1, to1, data2, n2);1968 r = and_code_range1(pbuf, env, from1, to1, data2, n2); 2048 1969 if (r != 0) return r; 2049 1970 } … … 2054 1975 2055 1976 static int 2056 and_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) 2057 { 1977 and_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) 1978 { 1979 OnigEncoding enc = env->enc; 2058 1980 int r, not1, not2; 2059 1981 BBuf *buf1, *buf2, *pbuf = 0; … … 2087 2009 if (! ONIGENC_IS_SINGLEBYTE(enc)) { 2088 2010 if (not1 != 0 && not2 != 0) { 2089 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf );2011 r = or_code_range_buf(enc, buf1, 0, buf2, 0, &pbuf, env); 2090 2012 } 2091 2013 else { 2092 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf );2014 r = and_code_range_buf(buf1, not1, buf2, not2, &pbuf, env); 2093 2015 if (r == 0 && not1 != 0) { 2094 2016 BBuf *tbuf = 0; 2095 r = not_code_range_buf(enc, pbuf, &tbuf );2017 r = not_code_range_buf(enc, pbuf, &tbuf, env); 2096 2018 bbuf_free(pbuf); 2097 2019 pbuf = tbuf; … … 2111 2033 2112 2034 static int 2113 or_cclass(CClassNode* dest, CClassNode* cc, OnigEncoding enc) 2114 { 2035 or_cclass(CClassNode* dest, CClassNode* cc, ScanEnv* env) 2036 { 2037 OnigEncoding enc = env->enc; 2115 2038 int r, not1, not2; 2116 2039 BBuf *buf1, *buf2, *pbuf = 0; … … 2144 2067 if (! ONIGENC_IS_SINGLEBYTE(enc)) { 2145 2068 if (not1 != 0 && not2 != 0) { 2146 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf );2069 r = and_code_range_buf(buf1, 0, buf2, 0, &pbuf, env); 2147 2070 } 2148 2071 else { 2149 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf );2072 r = or_code_range_buf(enc, buf1, not1, buf2, not2, &pbuf, env); 2150 2073 if (r == 0 && not1 != 0) { 2151 2074 BBuf *tbuf = 0; 2152 r = not_code_range_buf(enc, pbuf, &tbuf );2075 r = not_code_range_buf(enc, pbuf, &tbuf, env); 2153 2076 bbuf_free(pbuf); 2154 2077 pbuf = tbuf; … … 2168 2091 } 2169 2092 2170 static int 2171 conv_backslash_value(int c, ScanEnv* env) 2093 static void UNKNOWN_ESC_WARN(ScanEnv *env, int c); 2094 2095 static OnigCodePoint 2096 conv_backslash_value(OnigCodePoint c, ScanEnv* env) 2172 2097 { 2173 2098 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_CONTROL_CHARS)) { … … 2186 2111 2187 2112 default: 2113 if (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) 2114 UNKNOWN_ESC_WARN(env, c); 2188 2115 break; 2189 2116 } … … 2193 2120 2194 2121 #ifdef USE_NO_INVALID_QUANTIFIER 2195 # define is_invalid_quantifier_target(node) 02122 # define is_invalid_quantifier_target(node) 0 2196 2123 #else 2197 2124 static int … … 2265 2192 2266 2193 static enum ReduceType const ReduceTypeTable[6][6] = { 2194 /* '?', '*', '+', '??', '*?', '+?' p / c */ 2267 2195 {RQ_DEL, RQ_A, RQ_A, RQ_QQ, RQ_AQ, RQ_ASIS}, /* '?' */ 2268 2196 {RQ_DEL, RQ_DEL, RQ_DEL, RQ_P_QQ, RQ_P_QQ, RQ_DEL}, /* '*' */ … … 2467 2395 if (IS_SYNTAX_OP(env->syntax, ONIG_SYN_OP_ESC_BRACE_INTERVAL)) { 2468 2396 if (c != MC_ESC(env->syntax)) goto invalid; 2397 if (PEND) goto invalid; 2469 2398 PFETCH(c); 2470 2399 } … … 2490 2419 /* \M-, \C-, \c, or \... */ 2491 2420 static int 2492 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env )2421 fetch_escaped_value(UChar** src, UChar* end, ScanEnv* env, OnigCodePoint* val) 2493 2422 { 2494 2423 int v; … … 2509 2438 PFETCH_S(c); 2510 2439 if (c == MC_ESC(env->syntax)) { 2511 v = fetch_escaped_value(&p, end, env); 2512 if (v < 0) return v; 2513 c = (OnigCodePoint )v; 2440 v = fetch_escaped_value(&p, end, env, &c); 2441 if (v < 0) return v; 2514 2442 } 2515 2443 c = ((c & 0xff) | 0x80); … … 2535 2463 PFETCH_S(c); 2536 2464 if (c == '?') { 2537 2465 c = 0177; 2538 2466 } 2539 2467 else { 2540 if (c == MC_ESC(env->syntax)) { 2541 v = fetch_escaped_value(&p, end, env); 2542 if (v < 0) return v; 2543 c = (OnigCodePoint )v; 2544 } 2545 c &= 0x9f; 2468 if (c == MC_ESC(env->syntax)) { 2469 v = fetch_escaped_value(&p, end, env, &c); 2470 if (v < 0) return v; 2471 } 2472 c &= 0x9f; 2546 2473 } 2547 2474 break; … … 2558 2485 2559 2486 *src = p; 2560 return c; 2487 *val = c; 2488 return 0; 2561 2489 } 2562 2490 … … 2579 2507 2580 2508 #ifdef USE_NAMED_GROUP 2581 #ifdef USE_BACKREF_WITH_LEVEL 2509 # ifdef RUBY 2510 # define ONIGENC_IS_CODE_NAME(enc, c) TRUE 2511 # else 2512 # define ONIGENC_IS_CODE_NAME(enc, c) ONIGENC_IS_CODE_WORD(enc, c) 2513 # endif 2514 2515 # ifdef USE_BACKREF_WITH_LEVEL 2582 2516 /* 2583 2517 \k<name+n>, \k<name-n> … … 2624 2558 pnum_head = p; 2625 2559 } 2626 else if (!ONIGENC_IS_CODE_ WORD(enc, c)) {2560 else if (!ONIGENC_IS_CODE_NAME(enc, c)) { 2627 2561 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; 2628 2562 } … … 2639 2573 if (is_num != 0) { 2640 2574 if (ONIGENC_IS_CODE_DIGIT(enc, c)) { 2641 2575 is_num = 1; 2642 2576 } 2643 2577 else { 2644 2645 2646 } 2647 } 2648 else if (!ONIGENC_IS_CODE_ WORD(enc, c)) {2578 r = ONIGERR_INVALID_GROUP_NAME; 2579 is_num = 0; 2580 } 2581 } 2582 else if (!ONIGENC_IS_CODE_NAME(enc, c)) { 2649 2583 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; 2650 2584 } … … 2656 2590 int flag = (c == '-' ? -1 : 1); 2657 2591 2592 if (PEND) { 2593 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; 2594 goto end; 2595 } 2658 2596 PFETCH(c); 2659 2597 if (! ONIGENC_IS_CODE_DIGIT(enc, c)) goto err; … … 2664 2602 exist_level = 1; 2665 2603 2666 PFETCH(c); 2667 if (c == end_code) 2668 goto end; 2604 if (!PEND) { 2605 PFETCH(c); 2606 if (c == end_code) 2607 goto end; 2608 } 2669 2609 } 2670 2610 … … 2693 2633 } 2694 2634 } 2695 # endif /* USE_BACKREF_WITH_LEVEL */2635 # endif /* USE_BACKREF_WITH_LEVEL */ 2696 2636 2697 2637 /* … … 2730 2670 if (ONIGENC_IS_CODE_DIGIT(enc, c)) { 2731 2671 if (ref == 1) 2732 is_num = 1; 2733 else { 2734 r = ONIGERR_INVALID_GROUP_NAME; 2735 is_num = 0; 2736 } 2737 } 2738 else if (c == '-') { 2739 if (ref == 1) { 2740 is_num = 2; 2741 sign = -1; 2742 pnum_head = p; 2743 } 2672 is_num = 1; 2744 2673 else { 2745 2674 r = ONIGERR_INVALID_GROUP_NAME; … … 2747 2676 } 2748 2677 } 2749 else if (!ONIGENC_IS_CODE_WORD(enc, c)) { 2678 else if (c == '-') { 2679 if (ref == 1) { 2680 is_num = 2; 2681 sign = -1; 2682 pnum_head = p; 2683 } 2684 else { 2685 r = ONIGERR_INVALID_GROUP_NAME; 2686 is_num = 0; 2687 } 2688 } 2689 else if (!ONIGENC_IS_CODE_NAME(enc, c)) { 2750 2690 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; 2751 2691 } … … 2757 2697 PFETCH_S(c); 2758 2698 if (c == end_code || c == ')') { 2759 if (is_num == 2) r = ONIGERR_INVALID_GROUP_NAME; 2699 if (is_num == 2) { 2700 r = ONIGERR_INVALID_GROUP_NAME; 2701 goto teardown; 2702 } 2760 2703 break; 2761 2704 } 2762 2705 2763 2706 if (is_num != 0) { 2764 2765 2766 2767 2768 2769 2770 2771 2772 is_num = 0;2773 2707 if (ONIGENC_IS_CODE_DIGIT(enc, c)) { 2708 is_num = 1; 2709 } 2710 else { 2711 if (!ONIGENC_IS_CODE_WORD(enc, c)) 2712 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; 2713 else 2714 r = ONIGERR_INVALID_GROUP_NAME; 2715 goto teardown; 2716 } 2774 2717 } 2775 2718 else { 2776 if (!ONIGENC_IS_CODE_WORD(enc, c)) { 2777 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; 2778 } 2719 if (!ONIGENC_IS_CODE_NAME(enc, c)) { 2720 r = ONIGERR_INVALID_CHAR_IN_GROUP_NAME; 2721 goto teardown; 2722 } 2779 2723 } 2780 2724 } … … 2783 2727 r = ONIGERR_INVALID_GROUP_NAME; 2784 2728 name_end = end; 2729 goto err; 2785 2730 } 2786 2731 … … 2789 2734 if (*rback_num < 0) return ONIGERR_TOO_BIG_NUMBER; 2790 2735 else if (*rback_num == 0) { 2791 2792 2736 r = ONIGERR_INVALID_GROUP_NAME; 2737 goto err; 2793 2738 } 2794 2739 … … 2801 2746 } 2802 2747 else { 2748 teardown: 2803 2749 while (!PEND) { 2804 2750 name_end = p; 2805 2751 PFETCH_S(c); 2806 2752 if (c == end_code || c == ')') 2807 2753 break; 2808 2754 } 2809 2755 if (PEND) … … 2894 2840 #endif /* USE_NAMED_GROUP */ 2895 2841 2842 2896 2843 static void 2897 CC_ESC_WARN(ScanEnv* env, UChar *c) 2844 onig_syntax_warn(ScanEnv *env, const char *fmt, ...) 2845 { 2846 va_list args; 2847 UChar buf[WARN_BUFSIZE]; 2848 va_start(args, fmt); 2849 onig_vsnprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, 2850 env->pattern, env->pattern_end, 2851 (const UChar *)fmt, args); 2852 va_end(args); 2853 #ifdef RUBY 2854 if (env->sourcefile == NULL) 2855 rb_warn("%s", (char *)buf); 2856 else 2857 rb_compile_warn(env->sourcefile, env->sourceline, "%s", (char *)buf); 2858 #else 2859 (*onig_warn)((char* )buf); 2860 #endif 2861 } 2862 2863 static void 2864 CC_ESC_WARN(ScanEnv *env, UChar *c) 2898 2865 { 2899 2866 if (onig_warn == onig_null_warn) return ; … … 2901 2868 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED) && 2902 2869 IS_SYNTAX_BV(env->syntax, ONIG_SYN_BACKSLASH_ESCAPE_IN_CC)) { 2903 UChar buf[WARN_BUFSIZE]; 2904 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, 2905 env->pattern, env->pattern_end, 2906 (UChar* )"character class has '%s' without escape", c); 2907 (*onig_warn)((char* )buf); 2870 onig_syntax_warn(env, "character class has '%s' without escape", c); 2908 2871 } 2909 2872 } … … 2915 2878 2916 2879 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_OP_NOT_ESCAPED)) { 2917 UChar buf[WARN_BUFSIZE]; 2918 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, 2919 env->pattern, env->pattern_end, 2920 (UChar* )"regular expression has '%s' without escape", c); 2921 (*onig_warn)((char* )buf); 2922 } 2880 onig_syntax_warn(env, "regular expression has '%s' without escape", c); 2881 } 2882 } 2883 2884 #ifndef RTEST 2885 # define RTEST(v) 1 2886 #endif 2887 2888 static void 2889 CC_DUP_WARN(ScanEnv *env) 2890 { 2891 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ; 2892 2893 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_CC_DUP) && 2894 !(env->warnings_flag & ONIG_SYN_WARN_CC_DUP)) { 2895 env->warnings_flag |= ONIG_SYN_WARN_CC_DUP; 2896 onig_syntax_warn(env, "character class has duplicated range"); 2897 } 2898 } 2899 2900 static void 2901 UNKNOWN_ESC_WARN(ScanEnv *env, int c) 2902 { 2903 if (onig_warn == onig_null_warn || !RTEST(ruby_verbose)) return ; 2904 onig_syntax_warn(env, "Unknown escape \\%c is ignored", c); 2923 2905 } 2924 2906 … … 2934 2916 while (p < to) { 2935 2917 x = ONIGENC_MBC_TO_CODE(enc, p, to); 2936 q = p + enclen(enc, p );2918 q = p + enclen(enc, p, to); 2937 2919 if (x == s[0]) { 2938 2920 for (i = 1; i < n && q < to; i++) { 2939 2921 x = ONIGENC_MBC_TO_CODE(enc, q, to); 2940 2922 if (x != s[i]) break; 2941 q += enclen(enc, q );2923 q += enclen(enc, q, to); 2942 2924 } 2943 2925 if (i >= n) { … … 2954 2936 static int 2955 2937 str_exist_check_with_esc(OnigCodePoint s[], int n, UChar* from, UChar* to, 2956 OnigCodePoint bad, OnigEncoding enc, OnigSyntaxType* syn)2938 OnigCodePoint bad, OnigEncoding enc, const OnigSyntaxType* syn) 2957 2939 { 2958 2940 int i, in_esc; … … 2965 2947 if (in_esc) { 2966 2948 in_esc = 0; 2967 p += enclen(enc, p );2949 p += enclen(enc, p, to); 2968 2950 } 2969 2951 else { 2970 2952 x = ONIGENC_MBC_TO_CODE(enc, p, to); 2971 q = p + enclen(enc, p );2953 q = p + enclen(enc, p, to); 2972 2954 if (x == s[0]) { 2973 2955 for (i = 1; i < n && q < to; i++) { 2974 2956 x = ONIGENC_MBC_TO_CODE(enc, q, to); 2975 2957 if (x != s[i]) break; 2976 q += enclen(enc, q );2958 q += enclen(enc, q, to); 2977 2959 } 2978 2960 if (i >= n) return 1; 2979 p += enclen(enc, p );2961 p += enclen(enc, p, to); 2980 2962 } 2981 2963 else { … … 2995 2977 int num; 2996 2978 OnigCodePoint c, c2; 2997 OnigSyntaxType* syn = env->syntax;2979 const OnigSyntaxType* syn = env->syntax; 2998 2980 OnigEncoding enc = env->enc; 2999 2981 UChar* prev; … … 3073 3055 case 'p': 3074 3056 case 'P': 3057 if (PEND) break; 3058 3075 3059 c2 = PPEEK; 3076 3060 if (c2 == '{' && … … 3080 3064 tok->u.prop.not = (c == 'P' ? 1 : 0); 3081 3065 3082 if ( IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {3066 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { 3083 3067 PFETCH(c2); 3084 3068 if (c2 == '^') { … … 3089 3073 } 3090 3074 } 3075 else { 3076 onig_syntax_warn(env, "invalid Unicode Property \\%c", c); 3077 } 3091 3078 break; 3092 3079 … … 3100 3087 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 3101 3088 if (!PEND) { 3102 3103 3104 3105 3106 3107 if (p > prev + enclen(enc, prev ) && !PEND && (PPEEK_IS('}'))) {3089 c2 = PPEEK; 3090 if (ONIGENC_IS_CODE_XDIGIT(enc, c2)) 3091 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; 3092 } 3093 3094 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { 3108 3095 PINC; 3109 3096 tok->type = TK_CODE_POINT; … … 3142 3129 tok->base = 16; 3143 3130 tok->u.code = (OnigCodePoint )num; 3131 } 3132 break; 3133 3134 case 'o': 3135 if (PEND) break; 3136 3137 prev = p; 3138 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { 3139 PINC; 3140 num = scan_unsigned_octal_number(&p, end, 11, enc); 3141 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 3142 if (!PEND) { 3143 c2 = PPEEK; 3144 if (ONIGENC_IS_CODE_DIGIT(enc, c2) && c2 < '8') 3145 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; 3146 } 3147 3148 if (p > prev + enclen(enc, prev, end) && !PEND && (PPEEK_IS('}'))) { 3149 PINC; 3150 tok->type = TK_CODE_POINT; 3151 tok->base = 8; 3152 tok->u.code = (OnigCodePoint )num; 3153 } 3154 else { 3155 /* can't read nothing or invalid format */ 3156 p = prev; 3157 } 3144 3158 } 3145 3159 break; … … 3163 3177 default: 3164 3178 PUNFETCH; 3165 num = fetch_escaped_value(&p, end, env );3179 num = fetch_escaped_value(&p, end, env, &c2); 3166 3180 if (num < 0) return num; 3167 if ( tok->u.c != num) {3168 tok->u.code = (OnigCodePoint ) num;3181 if ((OnigCodePoint )tok->u.c != c2) { 3182 tok->u.code = (OnigCodePoint )c2; 3169 3183 tok->type = TK_CODE_POINT; 3170 3184 } … … 3215 3229 { 3216 3230 int r, num; 3217 OnigSyntaxType* syn = env->syntax;3231 const OnigSyntaxType* syn = env->syntax; 3218 3232 UChar* prev; 3219 3233 UChar* p = *src; … … 3224 3238 prev = p; 3225 3239 3226 # ifdef USE_BACKREF_WITH_LEVEL3240 # ifdef USE_BACKREF_WITH_LEVEL 3227 3241 name_end = NULL_UCHARP; /* no need. escape gcc warning. */ 3228 3242 r = fetch_name_with_level(c, &p, end, &name_end, … … 3230 3244 if (r == 1) tok->u.backref.exist_level = 1; 3231 3245 else tok->u.backref.exist_level = 0; 3232 # else3246 # else 3233 3247 r = fetch_name(&p, end, &name_end, env, &back_num, 1); 3234 # endif3248 # endif 3235 3249 if (r < 0) return r; 3236 3250 … … 3270 3284 tok->type = TK_BACKREF; 3271 3285 tok->u.backref.by_name = 1; 3272 if (num == 1 ) {3286 if (num == 1 || IS_SYNTAX_BV(syn, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) { 3273 3287 tok->u.backref.num = 1; 3274 3288 tok->u.backref.ref1 = backs[0]; … … 3290 3304 OnigCodePoint c; 3291 3305 OnigEncoding enc = env->enc; 3292 OnigSyntaxType* syn = env->syntax;3306 const OnigSyntaxType* syn = env->syntax; 3293 3307 UChar* prev; 3294 3308 UChar* p = *src; … … 3523 3537 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 3524 3538 if (!PEND) { 3525 3526 3527 3528 3529 if ((p > prev + enclen(enc, prev )) && !PEND && PPEEK_IS('}')) {3539 if (ONIGENC_IS_CODE_XDIGIT(enc, PPEEK)) 3540 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; 3541 } 3542 3543 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { 3530 3544 PINC; 3531 3545 tok->type = TK_CODE_POINT; … … 3566 3580 break; 3567 3581 3582 case 'o': 3583 if (PEND) break; 3584 3585 prev = p; 3586 if (PPEEK_IS('{') && IS_SYNTAX_OP(syn, ONIG_SYN_OP_ESC_O_BRACE_OCTAL)) { 3587 PINC; 3588 num = scan_unsigned_octal_number(&p, end, 11, enc); 3589 if (num < 0) return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 3590 if (!PEND) { 3591 OnigCodePoint c = PPEEK; 3592 if (ONIGENC_IS_CODE_DIGIT(enc, c) && c < '8') 3593 return ONIGERR_TOO_LONG_WIDE_CHAR_VALUE; 3594 } 3595 3596 if ((p > prev + enclen(enc, prev, end)) && !PEND && PPEEK_IS('}')) { 3597 PINC; 3598 tok->type = TK_CODE_POINT; 3599 tok->u.code = (OnigCodePoint )num; 3600 } 3601 else { 3602 /* can't read nothing or invalid format */ 3603 p = prev; 3604 } 3605 } 3606 break; 3607 3568 3608 case '1': case '2': case '3': case '4': 3569 3609 case '5': case '6': case '7': case '8': case '9': … … 3572 3612 num = onig_scan_unsigned_number(&p, end, enc); 3573 3613 if (num < 0 || num > ONIG_MAX_BACKREF_NUM) { 3574 3614 goto skip_backref; 3575 3615 } 3576 3616 … … 3605 3645 prev = p; 3606 3646 num = scan_unsigned_octal_number(&p, end, (c == '0' ? 2:3), enc); 3607 if (num < 0 ) return ONIGERR_TOO_BIG_NUMBER;3647 if (num < 0 || 0xff < num) return ONIGERR_TOO_BIG_NUMBER; 3608 3648 if (p == prev) { /* can't read nothing. */ 3609 3649 num = 0; /* but, it's not error */ … … 3620 3660 #ifdef USE_NAMED_GROUP 3621 3661 case 'k': 3622 if ( IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) {3662 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_K_NAMED_BACKREF)) { 3623 3663 PFETCH(c); 3624 3664 if (c == '<' || c == '\'') { … … 3626 3666 if (r < 0) return r; 3627 3667 } 3628 else 3668 else { 3629 3669 PUNFETCH; 3670 onig_syntax_warn(env, "invalid back reference"); 3671 } 3630 3672 } 3631 3673 break; … … 3634 3676 #if defined(USE_SUBEXP_CALL) || defined(USE_NAMED_GROUP) 3635 3677 case 'g': 3636 # ifdef USE_NAMED_GROUP3637 if ( IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_BRACE_BACKREF)) {3678 # ifdef USE_NAMED_GROUP 3679 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_BRACE_BACKREF)) { 3638 3680 PFETCH(c); 3639 3681 if (c == '{') { … … 3644 3686 PUNFETCH; 3645 3687 } 3646 # endif3647 # ifdef USE_SUBEXP_CALL3648 if ( IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) {3688 # endif 3689 # ifdef USE_SUBEXP_CALL 3690 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_G_SUBEXP_CALL)) { 3649 3691 PFETCH(c); 3650 3692 if (c == '<' || c == '\'') { … … 3678 3720 tok->u.call.rel = rel; 3679 3721 } 3680 else 3722 else { 3723 onig_syntax_warn(env, "invalid subexp call"); 3681 3724 PUNFETCH; 3682 } 3683 #endif 3725 } 3726 } 3727 # endif 3684 3728 break; 3685 3729 #endif … … 3699 3743 tok->u.prop.not = (c == 'P' ? 1 : 0); 3700 3744 3701 if ( IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) {3745 if (!PEND && IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_ESC_P_BRACE_CIRCUMFLEX_NOT)) { 3702 3746 PFETCH(c); 3703 3747 if (c == '^') { … … 3708 3752 } 3709 3753 } 3754 else { 3755 onig_syntax_warn(env, "invalid Unicode Property \\%c", c); 3756 } 3710 3757 break; 3711 3758 … … 3729 3776 3730 3777 default: 3731 PUNFETCH; 3732 num = fetch_escaped_value(&p, end, env); 3733 if (num < 0) return num; 3734 /* set_raw: */ 3735 if (tok->u.c != num) { 3736 tok->type = TK_CODE_POINT; 3737 tok->u.code = (OnigCodePoint )num; 3738 } 3739 else { /* string */ 3740 p = tok->backp + enclen(enc, tok->backp); 3778 { 3779 OnigCodePoint c2; 3780 3781 PUNFETCH; 3782 num = fetch_escaped_value(&p, end, env, &c2); 3783 if (num < 0) return num; 3784 /* set_raw: */ 3785 if ((OnigCodePoint )tok->u.c != c2) { 3786 tok->type = TK_CODE_POINT; 3787 tok->u.code = (OnigCodePoint )c2; 3788 } 3789 else { /* string */ 3790 p = tok->backp + enclen(enc, tok->backp, end); 3791 } 3741 3792 } 3742 3793 break; … … 3828 3879 case '(': 3829 3880 if (PPEEK_IS('?') && 3830 3831 3832 3833 3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3881 IS_SYNTAX_OP2(syn, ONIG_SYN_OP2_QMARK_GROUP_EFFECT)) { 3882 PINC; 3883 if (PPEEK_IS('#')) { 3884 PFETCH(c); 3885 while (1) { 3886 if (PEND) return ONIGERR_END_PATTERN_IN_GROUP; 3887 PFETCH(c); 3888 if (c == MC_ESC(syn)) { 3889 if (!PEND) PFETCH(c); 3890 } 3891 else { 3892 if (c == ')') break; 3893 } 3894 } 3895 goto start; 3896 } 3846 3897 #ifdef USE_PERL_SUBEXP_CALL 3847 3898 /* (?&name), (?n), (?R), (?0), (?+n), (?-n) */ … … 3914 3965 3915 3966 PINC; /* skip 'P' */ 3967 if (PEND) return ONIGERR_UNDEFINED_GROUP_OPTION; 3916 3968 PFETCH(c); 3917 3969 if (c == '=') { /* (?P=name): backref */ … … 3932 3984 break; 3933 3985 } 3934 PUNFETCH;3935 3986 } 3936 3987 #endif /* USE_CAPITAL_P_NAMED_GROUP */ 3937 3988 PUNFETCH; 3938 3989 } 3939 3990 … … 4003 4054 static int 4004 4055 add_ctype_to_cc_by_range(CClassNode* cc, int ctype ARG_UNUSED, int not, 4005 OnigEncoding enc ARG_UNUSED,4056 ScanEnv* env, 4006 4057 OnigCodePoint sb_out, const OnigCodePoint mbr[]) 4007 4058 { … … 4013 4064 if (not == 0) { 4014 4065 for (i = 0; i < n; i++) { 4015 for (j 4016 4066 for (j = ONIGENC_CODE_RANGE_FROM(mbr, i); 4067 j <= ONIGENC_CODE_RANGE_TO(mbr, i); j++) { 4017 4068 if (j >= sb_out) { 4018 4069 if (j > ONIGENC_CODE_RANGE_FROM(mbr, i)) { 4019 r = add_code_range_to_buf(&(cc->mbuf), j,4070 r = add_code_range_to_buf(&(cc->mbuf), env, j, 4020 4071 ONIGENC_CODE_RANGE_TO(mbr, i)); 4021 4072 if (r != 0) return r; … … 4025 4076 goto sb_end; 4026 4077 } 4027 BITSET_SET_BIT(cc->bs, j);4078 BITSET_SET_BIT_CHKDUP(cc->bs, j); 4028 4079 } 4029 4080 } … … 4031 4082 sb_end: 4032 4083 for ( ; i < n; i++) { 4033 r = add_code_range_to_buf(&(cc->mbuf), 4084 r = add_code_range_to_buf(&(cc->mbuf), env, 4034 4085 ONIGENC_CODE_RANGE_FROM(mbr, i), 4035 4086 ONIGENC_CODE_RANGE_TO(mbr, i)); … … 4046 4097 goto sb_end2; 4047 4098 } 4048 BITSET_SET_BIT (cc->bs, j);4099 BITSET_SET_BIT_CHKDUP(cc->bs, j); 4049 4100 } 4050 4101 prev = ONIGENC_CODE_RANGE_TO(mbr, i) + 1; 4051 4102 } 4052 4103 for (j = prev; j < sb_out; j++) { 4053 BITSET_SET_BIT (cc->bs, j);4104 BITSET_SET_BIT_CHKDUP(cc->bs, j); 4054 4105 } 4055 4106 … … 4059 4110 for (i = 0; i < n; i++) { 4060 4111 if (prev < ONIGENC_CODE_RANGE_FROM(mbr, i)) { 4061 r = add_code_range_to_buf(&(cc->mbuf), prev,4112 r = add_code_range_to_buf(&(cc->mbuf), env, prev, 4062 4113 ONIGENC_CODE_RANGE_FROM(mbr, i) - 1); 4063 4114 if (r != 0) return r; … … 4066 4117 } 4067 4118 if (prev < 0x7fffffff) { 4068 r = add_code_range_to_buf(&(cc->mbuf), prev, 0x7fffffff);4119 r = add_code_range_to_buf(&(cc->mbuf), env, prev, 0x7fffffff); 4069 4120 if (r != 0) return r; 4070 4121 } … … 4088 4139 CClassNode ccwork; 4089 4140 initialize_cclass(&ccwork); 4090 r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env ->enc, sb_out,4141 r = add_ctype_to_cc_by_range(&ccwork, ctype, not, env, sb_out, 4091 4142 ranges); 4092 4143 if (r == 0) { 4093 4144 if (not) { 4094 r = add_code_range_to_buf (&(ccwork.mbuf), 0x80, ONIG_LAST_CODE_POINT);4145 r = add_code_range_to_buf0(&(ccwork.mbuf), env, 0x80, ONIG_LAST_CODE_POINT, FALSE); 4095 4146 } 4096 4147 else { … … 4098 4149 initialize_cclass(&ccascii); 4099 4150 if (ONIGENC_MBC_MINLEN(env->enc) > 1) { 4100 add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F);4151 r = add_code_range(&(ccascii.mbuf), env, 0x00, 0x7F); 4101 4152 } 4102 4153 else { 4103 bitset_set_range(ccascii.bs, 0x00, 0x7F); 4154 bitset_set_range(env, ccascii.bs, 0x00, 0x7F); 4155 r = 0; 4104 4156 } 4105 r = and_cclass(&ccwork, &ccascii, enc); 4157 if (r == 0) { 4158 r = and_cclass(&ccwork, &ccascii, env); 4159 } 4106 4160 if (IS_NOT_NULL(ccascii.mbuf)) bbuf_free(ccascii.mbuf); 4107 4161 } 4108 4162 if (r == 0) { 4109 r = or_cclass(cc, &ccwork, en c);4163 r = or_cclass(cc, &ccwork, env); 4110 4164 } 4111 4165 if (IS_NOT_NULL(ccwork.mbuf)) bbuf_free(ccwork.mbuf); … … 4113 4167 } 4114 4168 else { 4115 r = add_ctype_to_cc_by_range(cc, ctype, not, env ->enc, sb_out, ranges);4169 r = add_ctype_to_cc_by_range(cc, ctype, not, env, sb_out, ranges); 4116 4170 } 4117 4171 return r; … … 4138 4192 for (c = 0; c < SINGLE_BYTE_SIZE; c++) { 4139 4193 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) 4140 BITSET_SET_BIT (cc->bs, c);4194 BITSET_SET_BIT_CHKDUP(cc->bs, c); 4141 4195 } 4142 4196 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); … … 4145 4199 for (c = 0; c < SINGLE_BYTE_SIZE; c++) { 4146 4200 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) 4147 BITSET_SET_BIT (cc->bs, c);4201 BITSET_SET_BIT_CHKDUP(cc->bs, c); 4148 4202 } 4149 4203 } … … 4156 4210 if (! ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype) 4157 4211 || c >= maxcode) 4158 BITSET_SET_BIT (cc->bs, c);4212 BITSET_SET_BIT_CHKDUP(cc->bs, c); 4159 4213 } 4160 4214 if (ascii_range) 4161 4215 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); 4162 4216 } 4163 4217 else { 4164 4218 for (c = 0; c < maxcode; c++) { 4165 4219 if (ONIGENC_IS_CODE_CTYPE(enc, (OnigCodePoint )c, ctype)) 4166 BITSET_SET_BIT (cc->bs, c);4220 BITSET_SET_BIT_CHKDUP(cc->bs, c); 4167 4221 } 4168 4222 if (! ascii_range) 4169 4223 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); 4170 4224 } 4171 4225 break; … … 4174 4228 if (not == 0) { 4175 4229 for (c = 0; c < maxcode; c++) { 4176 if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT (cc->bs, c);4230 if (ONIGENC_IS_CODE_WORD(enc, c)) BITSET_SET_BIT_CHKDUP(cc->bs, c); 4177 4231 } 4178 4232 if (! ascii_range) 4179 4233 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); 4180 4234 } 4181 4235 else { 4182 4236 for (c = 0; c < SINGLE_BYTE_SIZE; c++) { 4183 4237 if ((ONIGENC_CODE_TO_MBCLEN(enc, c) > 0) /* check invalid code point */ 4184 4238 && (! ONIGENC_IS_CODE_WORD(enc, c) || c >= maxcode)) 4185 BITSET_SET_BIT (cc->bs, c);4239 BITSET_SET_BIT_CHKDUP(cc->bs, c); 4186 4240 } 4187 4241 if (ascii_range) 4188 4242 ADD_ALL_MULTI_BYTE_RANGE(enc, cc->mbuf); 4189 4243 } 4190 4244 break; … … 4245 4299 p = (UChar* )onigenc_step(enc, p, end, pb->len); 4246 4300 if (onigenc_with_ascii_strncmp(enc, p, end, (UChar* )":]", 2) != 0) 4247 4301 return ONIGERR_INVALID_POSIX_BRACKET_TYPE; 4248 4302 4249 4303 r = add_ctype_to_cc(cc, pb->ctype, not, ascii_range, env); … … 4276 4330 PFETCH_S(c); 4277 4331 if (c == ']') 4278 4332 return ONIGERR_INVALID_POSIX_BRACKET_TYPE; 4279 4333 } 4280 4334 } … … 4356 4410 static int 4357 4411 next_state_class(CClassNode* cc, CClassNode* asc_cc, 4358 4412 OnigCodePoint* vs, enum CCVALTYPE* type, 4359 4413 enum CCSTATE* state, ScanEnv* env) 4360 4414 { … … 4366 4420 if (*state == CCS_VALUE && *type != CCV_CLASS) { 4367 4421 if (*type == CCV_SB) { 4368 BITSET_SET_BIT (cc->bs, (int )(*vs));4422 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*vs)); 4369 4423 if (IS_NOT_NULL(asc_cc)) 4370 4424 BITSET_SET_BIT(asc_cc->bs, (int )(*vs)); … … 4374 4428 if (r < 0) return r; 4375 4429 if (IS_NOT_NULL(asc_cc)) { 4376 r = add_code_range (&(asc_cc->mbuf), env, *vs, *vs);4430 r = add_code_range0(&(asc_cc->mbuf), env, *vs, *vs, 0); 4377 4431 if (r < 0) return r; 4378 4432 } … … 4387 4441 static int 4388 4442 next_state_val(CClassNode* cc, CClassNode* asc_cc, 4389 OnigCodePoint * vs, OnigCodePoint v,4390 int* vs_israw, int v_israw,4443 OnigCodePoint *from, OnigCodePoint to, 4444 int* from_israw, int to_israw, 4391 4445 enum CCVALTYPE intype, enum CCVALTYPE* type, 4392 4446 enum CCSTATE* state, ScanEnv* env) … … 4397 4451 case CCS_VALUE: 4398 4452 if (*type == CCV_SB) { 4399 BITSET_SET_BIT (cc->bs, (int )(*vs));4453 BITSET_SET_BIT_CHKDUP(cc->bs, (int )(*from)); 4400 4454 if (IS_NOT_NULL(asc_cc)) 4401 BITSET_SET_BIT(asc_cc->bs, (int )(* vs));4455 BITSET_SET_BIT(asc_cc->bs, (int )(*from)); 4402 4456 } 4403 4457 else if (*type == CCV_CODE_POINT) { 4404 r = add_code_range(&(cc->mbuf), env, * vs, *vs);4458 r = add_code_range(&(cc->mbuf), env, *from, *from); 4405 4459 if (r < 0) return r; 4406 4460 if (IS_NOT_NULL(asc_cc)) { 4407 r = add_code_range (&(asc_cc->mbuf), env, *vs, *vs);4461 r = add_code_range0(&(asc_cc->mbuf), env, *from, *from, 0); 4408 4462 if (r < 0) return r; 4409 4463 } … … 4414 4468 if (intype == *type) { 4415 4469 if (intype == CCV_SB) { 4416 if (*vs > 0xff || v> 0xff)4417 4418 4419 if (* vs > v) {4470 if (*from > 0xff || to > 0xff) 4471 return ONIGERR_INVALID_CODE_POINT_VALUE; 4472 4473 if (*from > to) { 4420 4474 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) 4421 4475 goto ccs_range_end; … … 4423 4477 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; 4424 4478 } 4425 bitset_set_range( cc->bs, (int )*vs, (int )v);4479 bitset_set_range(env, cc->bs, (int )*from, (int )to); 4426 4480 if (IS_NOT_NULL(asc_cc)) 4427 bitset_set_range( asc_cc->bs, (int )*vs, (int )v);4481 bitset_set_range(env, asc_cc->bs, (int )*from, (int )to); 4428 4482 } 4429 4483 else { 4430 r = add_code_range(&(cc->mbuf), env, * vs, v);4484 r = add_code_range(&(cc->mbuf), env, *from, to); 4431 4485 if (r < 0) return r; 4432 4486 if (IS_NOT_NULL(asc_cc)) { 4433 r = add_code_range (&(asc_cc->mbuf), env, *vs, v);4487 r = add_code_range0(&(asc_cc->mbuf), env, *from, to, 0); 4434 4488 if (r < 0) return r; 4435 4489 } … … 4437 4491 } 4438 4492 else { 4439 #if 0 4440 if (intype == CCV_CODE_POINT && *type == CCV_SB) { 4441 #endif 4442 if (*vs > v) { 4443 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) 4444 goto ccs_range_end; 4445 else 4446 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; 4447 } 4448 bitset_set_range(cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); 4449 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*vs, v); 4493 if (*from > to) { 4494 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_ALLOW_EMPTY_RANGE_IN_CC)) 4495 goto ccs_range_end; 4496 else 4497 return ONIGERR_EMPTY_RANGE_IN_CHAR_CLASS; 4498 } 4499 bitset_set_range(env, cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); 4500 r = add_code_range(&(cc->mbuf), env, (OnigCodePoint )*from, to); 4501 if (r < 0) return r; 4502 if (IS_NOT_NULL(asc_cc)) { 4503 bitset_set_range(env, asc_cc->bs, (int )*from, (int )(to < 0xff ? to : 0xff)); 4504 r = add_code_range0(&(asc_cc->mbuf), env, (OnigCodePoint )*from, to, 0); 4450 4505 if (r < 0) return r; 4451 if (IS_NOT_NULL(asc_cc)) { 4452 bitset_set_range(asc_cc->bs, (int )*vs, (int )(v < 0xff ? v : 0xff)); 4453 r = add_code_range(&(asc_cc->mbuf), env, (OnigCodePoint )*vs, v); 4454 if (r < 0) return r; 4455 } 4456 #if 0 4457 } 4458 else 4459 return ONIGERR_MISMATCH_CODE_LENGTH_IN_CLASS_RANGE; 4460 #endif 4506 } 4461 4507 } 4462 4508 ccs_range_end: … … 4473 4519 } 4474 4520 4475 * vs_israw = v_israw;4476 * vs = v;4477 *type = intype;4521 *from_israw = to_israw; 4522 *from = to; 4523 *type = intype; 4478 4524 return 0; 4479 4525 } … … 4519 4565 int val_israw, in_israw; 4520 4566 4567 *np = *asc_np = NULL_NODE; 4568 env->parse_depth++; 4569 if (env->parse_depth > ParseDepthLimit) 4570 return ONIGERR_PARSE_DEPTH_LIMIT_OVER; 4521 4571 prev_cc = asc_prev_cc = (CClassNode* )NULL; 4522 *np = *asc_np = NULL_NODE;4523 4572 r = fetch_token_in_cc(tok, src, end, env); 4524 4573 if (r == TK_CHAR && tok->u.c == '^' && tok->escaped == 0) { … … 4602 4651 } 4603 4652 4604 len = enclen(env->enc, buf );4653 len = enclen(env->enc, buf, buf + i); 4605 4654 if (i < len) { 4606 4655 r = ONIGERR_TOO_SHORT_MULTI_BYTE_STRING; … … 4610 4659 p = psave; 4611 4660 for (i = 1; i < len; i++) { 4612 r = fetch_token_in_cc(tok, &p, end, env); 4661 (void)fetch_token_in_cc(tok, &p, end, env); 4662 /* no need to check the retun value (already checked above) */ 4613 4663 } 4614 4664 fetched = 0; … … 4710 4760 goto range_end_val; 4711 4761 } 4762 4763 if (val_type == CCV_CLASS) { 4764 r = ONIGERR_UNMATCHED_RANGE_SPECIFIER_IN_CHAR_CLASS; 4765 goto err; 4766 } 4767 4712 4768 state = CCS_RANGE; 4713 4769 } … … 4757 4813 if (r == 0) { 4758 4814 acc = NCCLASS(anode); 4759 r = or_cclass(cc, acc, env ->enc);4815 r = or_cclass(cc, acc, env); 4760 4816 } 4761 4817 if (r == 0 && IS_NOT_NULL(aasc_node)) { 4762 4818 acc = NCCLASS(aasc_node); 4763 r = or_cclass(asc_cc, acc, env ->enc);4819 r = or_cclass(asc_cc, acc, env); 4764 4820 } 4765 4821 onig_node_free(anode); … … 4781 4837 4782 4838 if (IS_NOT_NULL(prev_cc)) { 4783 r = and_cclass(prev_cc, cc, env ->enc);4839 r = and_cclass(prev_cc, cc, env); 4784 4840 if (r != 0) goto err; 4785 4841 bbuf_free(cc->mbuf); 4786 4842 if (IS_NOT_NULL(asc_cc)) { 4787 r = and_cclass(asc_prev_cc, asc_cc, env ->enc);4843 r = and_cclass(asc_prev_cc, asc_cc, env); 4788 4844 if (r != 0) goto err; 4789 4845 bbuf_free(asc_cc->mbuf); … … 4829 4885 4830 4886 if (IS_NOT_NULL(prev_cc)) { 4831 r = and_cclass(prev_cc, cc, env ->enc);4887 r = and_cclass(prev_cc, cc, env); 4832 4888 if (r != 0) goto err; 4833 4889 bbuf_free(cc->mbuf); 4834 4890 cc = prev_cc; 4835 4891 if (IS_NOT_NULL(asc_cc)) { 4836 r = and_cclass(asc_prev_cc, asc_cc, env ->enc);4892 r = and_cclass(asc_prev_cc, asc_cc, env); 4837 4893 if (r != 0) goto err; 4838 4894 bbuf_free(asc_cc->mbuf); … … 4863 4919 4864 4920 if (ONIGENC_IS_CODE_NEWLINE(env->enc, NEWLINE_CODE)) { 4865 4866 BITSET_SET_BIT(cc->bs, NEWLINE_CODE);4867 4868 4869 4870 4921 if (ONIGENC_CODE_TO_MBCLEN(env->enc, NEWLINE_CODE) == 1) 4922 BITSET_SET_BIT_CHKDUP(cc->bs, NEWLINE_CODE); 4923 else { 4924 r = add_code_range(&(cc->mbuf), env, NEWLINE_CODE, NEWLINE_CODE); 4925 if (r < 0) goto err; 4926 } 4871 4927 } 4872 4928 } 4873 4929 } 4874 4930 *src = p; 4931 env->parse_depth--; 4875 4932 return 0; 4876 4933 … … 4933 4990 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); 4934 4991 break; 4992 case '~': /* (?~...) absent operator */ 4993 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_TILDE_ABSENT)) { 4994 *np = node_new_enclose(ENCLOSE_ABSENT); 4995 } 4996 else { 4997 return ONIGERR_UNDEFINED_GROUP_OPTION; 4998 } 4999 break; 4935 5000 4936 5001 #ifdef USE_NAMED_GROUP … … 4943 5008 break; 4944 5009 4945 # ifdef USE_CAPITAL_P_NAMED_GROUP5010 # ifdef USE_CAPITAL_P_NAMED_GROUP 4946 5011 case 'P': /* (?P<name>...) */ 4947 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { 5012 if (!PEND && 5013 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_CAPITAL_P_NAMED_GROUP)) { 4948 5014 PFETCH(c); 4949 5015 if (c == '<') goto named_group1; … … 4951 5017 return ONIGERR_UNDEFINED_GROUP_OPTION; 4952 5018 break; 5019 # endif 4953 5020 #endif 4954 #endif4955 5021 4956 5022 case '<': /* look behind (?<=...), (?<!...) */ 5023 if (PEND) return ONIGERR_END_PATTERN_WITH_UNMATCHED_PARENTHESIS; 4957 5024 PFETCH(c); 4958 5025 if (c == '=') … … 4972 5039 list_capture = 0; 4973 5040 5041 # ifdef USE_CAPTURE_HISTORY 4974 5042 named_group2: 5043 # endif 4975 5044 name = p; 4976 5045 r = fetch_name((OnigCodePoint )c, &p, end, &name_end, env, &num, 0); … … 5002 5071 break; 5003 5072 5073 #ifdef USE_CAPTURE_HISTORY 5004 5074 case '@': 5005 5075 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_ATMARK_CAPTURE_HISTORY)) { 5006 #ifdef USE_NAMED_GROUP 5007 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { 5076 # ifdef USE_NAMED_GROUP 5077 if (!PEND && 5078 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LT_NAMED_GROUP)) { 5008 5079 PFETCH(c); 5009 5080 if (c == '<' || c == '\'') { … … 5013 5084 PUNFETCH; 5014 5085 } 5015 # endif5086 # endif 5016 5087 *np = node_new_enclose_memory(env->option, 0); 5017 5088 CHECK_NULL_RETURN_MEMERR(*np); … … 5028 5099 } 5029 5100 break; 5101 #endif /* USE_CAPTURE_HISTORY */ 5030 5102 5031 5103 case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */ 5032 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) { 5104 if (!PEND && 5105 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_QMARK_LPAREN_CONDITION)) { 5033 5106 UChar *name = NULL; 5034 5107 UChar *name_end; … … 5048 5121 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)) { 5049 5122 if (num > env->num_mem || 5050 5123 IS_NULL(SCANENV_MEM_NODES(env)[num])) 5051 5124 return ONIGERR_INVALID_BACKREF; 5052 5125 } … … 5054 5127 #ifdef USE_NAMED_GROUP 5055 5128 else if (c == '<' || c == '\'') { /* (<name>), ('name') */ 5056 int nums;5057 int *backs;5058 5059 5129 name = p; 5060 r = fetch_name ((OnigCodePoint )c, &p, end, &name_end, env, &num, 0);5130 r = fetch_named_backref_token(c, tok, &p, end, env); 5061 5131 if (r < 0) return r; 5062 PFETCH(c); 5063 if (c != ')') return ONIGERR_UNDEFINED_GROUP_OPTION; 5064 5065 nums = onig_name_to_group_numbers(env->reg, name, name_end, &backs); 5066 if (nums <= 0) { 5067 onig_scan_env_set_error_string(env, 5068 ONIGERR_UNDEFINED_NAME_REFERENCE, name, name_end); 5069 return ONIGERR_UNDEFINED_NAME_REFERENCE; 5132 if (!PPEEK_IS(')')) return ONIGERR_UNDEFINED_GROUP_OPTION; 5133 PINC; 5134 5135 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_USE_LEFT_MOST_NAMED_GROUP)) { 5136 num = tok->u.backref.ref1; 5070 5137 } 5071 if (IS_SYNTAX_BV(env->syntax, ONIG_SYN_STRICT_CHECK_BACKREF)){5072 int i;5073 for (i = 0; i < nums; i++) {5074 if (backs[i] > env->num_mem ||5075 IS_NULL(SCANENV_MEM_NODES(env)[backs[i]]))5076 return ONIGERR_INVALID_BACKREF;5077 }5138 else { 5139 /* FIXME: 5140 * Use left most named group for now. This is the same as Perl. 5141 * However this should use the same strategy as normal back- 5142 * references on Ruby syntax; search right to left. */ 5143 int len = tok->u.backref.num; 5144 num = len > 1 ? tok->u.backref.refs[0] : tok->u.backref.ref1; 5078 5145 } 5079 num = backs[0]; /* XXX: use left most named group as Perl */5080 5146 } 5081 5147 #endif … … 5102 5168 5103 5169 case '^': /* loads default options */ 5104 if ( IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) {5170 if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL)) { 5105 5171 /* d-imsx */ 5106 5172 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); … … 5112 5178 } 5113 5179 #if 0 5114 else if ( IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) {5180 else if (!PEND && IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) { 5115 5181 /* d-imx */ 5116 5182 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); … … 5170 5236 case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */ 5171 5237 if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) || 5172 5173 5238 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) && 5239 (neg == 0)) { 5174 5240 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); 5175 5241 ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1); … … 5182 5248 case 'u': 5183 5249 if ((IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) || 5184 5185 5250 IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY)) && 5251 (neg == 0)) { 5186 5252 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); 5187 5253 ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 1); … … 5194 5260 case 'd': 5195 5261 if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_PERL) && 5196 5262 (neg == 0)) { 5197 5263 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 1); 5198 5264 } 5199 5265 else if (IS_SYNTAX_OP2(env->syntax, ONIG_SYN_OP2_OPTION_RUBY) && 5200 5266 (neg == 0)) { 5201 5267 ONOFF(option, ONIG_OPTION_ASCII_RANGE, 0); 5202 5268 ONOFF(option, ONIG_OPTION_POSIX_BRACKET_ALL_RANGE, 0); … … 5228 5294 OnigOptionType prev = env->option; 5229 5295 5230 env->option 5296 env->option = option; 5231 5297 r = fetch_token(tok, &p, end, env); 5232 if (r < 0) return r; 5298 if (r < 0) { 5299 env->option = prev; 5300 return r; 5301 } 5233 5302 r = parse_subexp(&target, tok, term, &p, end, env); 5234 5303 env->option = prev; … … 5345 5414 5346 5415 #ifdef USE_WARNING_REDUNDANT_NESTED_REPEAT_OPERATOR 5347 if ( !IS_QUANTIFIER_BY_NUMBER(qn) && !IS_QUANTIFIER_BY_NUMBER(qnt)&&5416 if (nestq_num >= 0 && targetq_num >= 0 && 5348 5417 IS_SYNTAX_BV(env->syntax, ONIG_SYN_WARN_REDUNDANT_NESTED_REPEAT)) { 5349 UChar buf[WARN_BUFSIZE]; 5350 5351 switch (ReduceTypeTable[targetq_num][nestq_num]) { 5352 case RQ_ASIS: 5353 break; 5354 5355 case RQ_DEL: 5356 if (onig_verb_warn != onig_null_warn) { 5357 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, 5358 env->pattern, env->pattern_end, 5359 (UChar* )"redundant nested repeat operator"); 5360 (*onig_verb_warn)((char* )buf); 5361 } 5362 goto warn_exit; 5363 break; 5364 5365 default: 5366 if (onig_verb_warn != onig_null_warn) { 5367 onig_snprintf_with_pattern(buf, WARN_BUFSIZE, env->enc, 5368 env->pattern, env->pattern_end, 5369 (UChar* )"nested repeat operator %s and %s was replaced with '%s'", 5370 PopularQStr[targetq_num], PopularQStr[nestq_num], 5371 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); 5372 (*onig_verb_warn)((char* )buf); 5373 } 5374 goto warn_exit; 5375 break; 5376 } 5418 switch (ReduceTypeTable[targetq_num][nestq_num]) { 5419 case RQ_ASIS: 5420 break; 5421 5422 case RQ_DEL: 5423 if (onig_warn != onig_null_warn) { 5424 onig_syntax_warn(env, "regular expression has redundant nested repeat operator '%s'", 5425 PopularQStr[targetq_num]); 5426 } 5427 goto warn_exit; 5428 break; 5429 5430 default: 5431 if (onig_warn != onig_null_warn) { 5432 onig_syntax_warn(env, "nested repeat operator '%s' and '%s' was replaced with '%s' in regular expression", 5433 PopularQStr[targetq_num], PopularQStr[nestq_num], 5434 ReduceQStr[ReduceTypeTable[targetq_num][nestq_num]]); 5435 } 5436 goto warn_exit; 5437 break; 5438 } 5377 5439 } 5378 5440 … … 5402 5464 return 0; 5403 5465 } 5404 5405 5406 #ifdef USE_SHARED_CCLASS_TABLE5407 5408 #define THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS 85409 5410 /* for ctype node hash table */5411 5412 typedef struct {5413 OnigEncoding enc;5414 int not;5415 int type;5416 } type_cclass_key;5417 5418 static int type_cclass_cmp(type_cclass_key* x, type_cclass_key* y)5419 {5420 if (x->type != y->type) return 1;5421 if (x->enc != y->enc) return 1;5422 if (x->not != y->not) return 1;5423 return 0;5424 }5425 5426 static int type_cclass_hash(type_cclass_key* key)5427 {5428 int i, val;5429 UChar *p;5430 5431 val = 0;5432 5433 p = (UChar* )&(key->enc);5434 for (i = 0; i < (int )sizeof(key->enc); i++) {5435 val = val * 997 + (int )*p++;5436 }5437 5438 p = (UChar* )(&key->type);5439 for (i = 0; i < (int )sizeof(key->type); i++) {5440 val = val * 997 + (int )*p++;5441 }5442 5443 val += key->not;5444 return val + (val >> 5);5445 }5446 5447 static struct st_hash_type type_type_cclass_hash = {5448 type_cclass_cmp,5449 type_cclass_hash,5450 };5451 5452 static st_table* OnigTypeCClassTable;5453 5454 5455 static int5456 i_free_shared_class(type_cclass_key* key, Node* node, void* arg ARG_UNUSED)5457 {5458 if (IS_NOT_NULL(node)) {5459 CClassNode* cc = NCCLASS(node);5460 if (IS_NOT_NULL(cc->mbuf)) xfree(cc->mbuf);5461 xfree(node);5462 }5463 5464 if (IS_NOT_NULL(key)) xfree(key);5465 return ST_DELETE;5466 }5467 5468 extern int5469 onig_free_shared_cclass_table(void)5470 {5471 /* THREAD_ATOMIC_START; */5472 if (IS_NOT_NULL(OnigTypeCClassTable)) {5473 onig_st_foreach(OnigTypeCClassTable, i_free_shared_class, 0);5474 onig_st_free_table(OnigTypeCClassTable);5475 OnigTypeCClassTable = NULL;5476 }5477 /* THREAD_ATOMIC_END; */5478 5479 return 0;5480 }5481 5482 #endif /* USE_SHARED_CCLASS_TABLE */5483 5466 5484 5467 … … 5525 5508 CClassNode* asc_cc; 5526 5509 BitSetRef bs; 5527 int add_flag ;5510 int add_flag, r; 5528 5511 5529 5512 iarg = (IApplyCaseFoldArg* )arg; … … 5552 5535 if (add_flag) { 5553 5536 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { 5554 add_code_range(&(cc->mbuf), env, *to, *to); 5537 r = add_code_range0(&(cc->mbuf), env, *to, *to, 0); 5538 if (r < 0) return r; 5555 5539 } 5556 5540 else { … … 5564 5548 if (ONIGENC_MBC_MINLEN(env->enc) > 1 || *to >= SINGLE_BYTE_SIZE) { 5565 5549 if (IS_NCCLASS_NOT(cc)) clear_not_flag_cclass(cc, env->enc); 5566 add_code_range(&(cc->mbuf), env, *to, *to); 5550 r = add_code_range0(&(cc->mbuf), env, *to, *to, 0); 5551 if (r < 0) return r; 5567 5552 } 5568 5553 else { … … 5654 5639 Node* target2 = NULL; 5655 5640 CClassNode* cc; 5656 int num1, num2 ;5641 int num1, num2, r; 5657 5642 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2]; 5658 5643 … … 5670 5655 cc = NCCLASS(right); 5671 5656 if (ONIGENC_MBC_MINLEN(env->enc) > 1) { 5672 add_code_range(&(cc->mbuf), env, 0x0A, 0x0D); 5657 r = add_code_range(&(cc->mbuf), env, 0x0A, 0x0D); 5658 if (r != 0) goto err; 5673 5659 } 5674 5660 else { 5675 bitset_set_range( cc->bs, 0x0A, 0x0D);5661 bitset_set_range(env, cc->bs, 0x0A, 0x0D); 5676 5662 } 5677 5663 … … 5679 5665 if (ONIGENC_IS_UNICODE(env->enc)) { 5680 5666 /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */ 5681 add_code_range(&(cc->mbuf), env, 0x85, 0x85); 5682 add_code_range(&(cc->mbuf), env, 0x2028, 0x2029); 5667 r = add_code_range(&(cc->mbuf), env, 0x85, 0x85); 5668 if (r != 0) goto err; 5669 r = add_code_range(&(cc->mbuf), env, 0x2028, 0x2029); 5670 if (r != 0) goto err; 5683 5671 } 5684 5672 … … 5707 5695 5708 5696 static int 5697 propname2ctype(ScanEnv* env, const char* propname) 5698 { 5699 UChar* name = (UChar* )propname; 5700 int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII, 5701 name, name + strlen(propname)); 5702 return ctype; 5703 } 5704 5705 static int 5709 5706 node_extended_grapheme_cluster(Node** np, ScanEnv* env) 5710 5707 { 5711 /* same as (?>\P{M}\p{M}*) */5708 Node* tmp = NULL; 5712 5709 Node* np1 = NULL; 5713 Node* np2 = NULL; 5714 Node* qn = NULL; 5715 Node* list1 = NULL; 5710 Node* list = NULL; 5716 5711 Node* list2 = NULL; 5712 Node* alt = NULL; 5713 Node* alt2 = NULL; 5714 BBuf *pbuf1 = NULL; 5717 5715 int r = 0; 5716 int num1; 5717 UChar buf[ONIGENC_CODE_TO_MBC_MAXLEN * 2]; 5718 OnigOptionType option; 5718 5719 5719 5720 #ifdef USE_UNICODE_PROPERTIES 5720 5721 if (ONIGENC_IS_UNICODE(env->enc)) { 5721 5722 /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */ 5722 CClassNode* cc1; 5723 CClassNode* cc2; 5724 UChar* propname = (UChar* )"M"; 5725 int ctype = env->enc->property_name_to_ctype(ONIG_ENCODING_ASCII, 5726 propname, propname + 1); 5727 if (ctype >= 0) { 5728 /* \P{M} */ 5729 np1 = node_new_cclass(); 5730 if (IS_NULL(np1)) goto err; 5731 cc1 = NCCLASS(np1); 5732 r = add_ctype_to_cc(cc1, ctype, 0, 0, env); 5723 CClassNode* cc; 5724 OnigCodePoint sb_out = (ONIGENC_MBC_MINLEN(env->enc) > 1) ? 0x00 : 0x80; 5725 int extend = propname2ctype(env, "Grapheme_Cluster_Break=Extend"); 5726 5727 /* Prepend* 5728 * ( RI-sequence | Hangul-Syllable | !Control ) 5729 * ( Grapheme_Extend | SpacingMark )* */ 5730 5731 /* ( Grapheme_Extend | SpacingMark )* */ 5732 np1 = node_new_cclass(); 5733 if (IS_NULL(np1)) goto err; 5734 cc = NCCLASS(np1); 5735 r = add_ctype_to_cc(cc, extend, 0, 0, env); 5736 if (r != 0) goto err; 5737 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=SpacingMark"), 0, 0, env); 5738 if (r != 0) goto err; 5739 r = add_code_range(&(cc->mbuf), env, 0x200D, 0x200D); 5740 if (r != 0) goto err; 5741 5742 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5743 if (IS_NULL(tmp)) goto err; 5744 NQTFR(tmp)->target = np1; 5745 np1 = tmp; 5746 5747 tmp = node_new_list(np1, NULL_NODE); 5748 if (IS_NULL(tmp)) goto err; 5749 list = tmp; 5750 np1 = NULL; 5751 5752 /* ( RI-sequence | Hangul-Syllable | !Control ) */ 5753 /* !Control */ 5754 np1 = node_new_cclass(); 5755 if (IS_NULL(np1)) goto err; 5756 cc = NCCLASS(np1); 5757 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Control"), 1, 0, env); 5758 if (r != 0) goto err; 5759 if (ONIGENC_MBC_MINLEN(env->enc) > 1) { 5760 BBuf *pbuf2 = NULL; 5761 r = add_code_range(&pbuf1, env, 0x0a, 0x0a); 5733 5762 if (r != 0) goto err; 5734 NCCLASS_SET_NOT(cc1); 5735 5736 /* \p{M}* */ 5737 np2 = node_new_cclass(); 5738 if (IS_NULL(np2)) goto err; 5739 cc2 = NCCLASS(np2); 5740 r = add_ctype_to_cc(cc2, ctype, 0, 0, env); 5763 r = add_code_range(&pbuf1, env, 0x0d, 0x0d); 5741 5764 if (r != 0) goto err; 5742 5743 qn = node_new_quantifier(0, REPEAT_INFINITE, 0); 5744 if (IS_NULL(qn)) goto err; 5745 NQTFR(qn)->target = np2; 5746 np2 = NULL; 5747 5748 /* \P{M}\p{M}* */ 5749 list2 = node_new_list(qn, NULL_NODE); 5750 if (IS_NULL(list2)) goto err; 5751 qn = NULL; 5752 list1 = node_new_list(np1, list2); 5753 if (IS_NULL(list1)) goto err; 5754 np1 = NULL; 5755 list2 = NULL; 5756 5757 /* (?>...) */ 5758 *np = node_new_enclose(ENCLOSE_STOP_BACKTRACK); 5759 if (IS_NULL(*np)) goto err; 5760 NENCLOSE(*np)->target = list1; 5761 return ONIG_NORMAL; 5762 } 5763 } 5764 #endif /* USE_UNICODE_PROPERTIES */ 5765 if (IS_NULL(*np)) { 5765 r = and_code_range_buf(cc->mbuf, 0, pbuf1, 1, &pbuf2, env); 5766 if (r != 0) { 5767 bbuf_free(pbuf2); 5768 goto err; 5769 } 5770 bbuf_free(pbuf1); 5771 pbuf1 = NULL; 5772 bbuf_free(cc->mbuf); 5773 cc->mbuf = pbuf2; 5774 } 5775 else { 5776 BITSET_CLEAR_BIT(cc->bs, 0x0a); 5777 BITSET_CLEAR_BIT(cc->bs, 0x0d); 5778 } 5779 5780 tmp = onig_node_new_alt(np1, NULL_NODE); 5781 if (IS_NULL(tmp)) goto err; 5782 alt = tmp; 5783 np1 = NULL; 5784 5785 /* Hangul-Syllable 5786 * := L* V+ T* 5787 * | L* LV V* T* 5788 * | L* LVT T* 5789 * | L+ 5790 * | T+ */ 5791 5792 /* T+ */ 5793 np1 = node_new_cclass(); 5794 if (IS_NULL(np1)) goto err; 5795 cc = NCCLASS(np1); 5796 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); 5797 if (r != 0) goto err; 5798 5799 tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); 5800 if (IS_NULL(tmp)) goto err; 5801 NQTFR(tmp)->target = np1; 5802 np1 = tmp; 5803 5804 tmp = onig_node_new_alt(np1, alt); 5805 if (IS_NULL(tmp)) goto err; 5806 alt = tmp; 5807 np1 = NULL; 5808 5809 /* L+ */ 5810 np1 = node_new_cclass(); 5811 if (IS_NULL(np1)) goto err; 5812 cc = NCCLASS(np1); 5813 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); 5814 if (r != 0) goto err; 5815 5816 tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); 5817 if (IS_NULL(tmp)) goto err; 5818 NQTFR(tmp)->target = np1; 5819 np1 = tmp; 5820 5821 tmp = onig_node_new_alt(np1, alt); 5822 if (IS_NULL(tmp)) goto err; 5823 alt = tmp; 5824 np1 = NULL; 5825 5826 /* L* LVT T* */ 5827 np1 = node_new_cclass(); 5828 if (IS_NULL(np1)) goto err; 5829 cc = NCCLASS(np1); 5830 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); 5831 if (r != 0) goto err; 5832 5833 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5834 if (IS_NULL(tmp)) goto err; 5835 NQTFR(tmp)->target = np1; 5836 np1 = tmp; 5837 5838 tmp = node_new_list(np1, NULL_NODE); 5839 if (IS_NULL(tmp)) goto err; 5840 list2 = tmp; 5841 np1 = NULL; 5842 5843 np1 = node_new_cclass(); 5844 if (IS_NULL(np1)) goto err; 5845 cc = NCCLASS(np1); 5846 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=LVT"), 0, 0, env); 5847 if (r != 0) goto err; 5848 5849 tmp = node_new_list(np1, list2); 5850 if (IS_NULL(tmp)) goto err; 5851 list2 = tmp; 5852 np1 = NULL; 5853 5854 np1 = node_new_cclass(); 5855 if (IS_NULL(np1)) goto err; 5856 cc = NCCLASS(np1); 5857 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); 5858 if (r != 0) goto err; 5859 5860 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5861 if (IS_NULL(tmp)) goto err; 5862 NQTFR(tmp)->target = np1; 5863 np1 = tmp; 5864 5865 tmp = node_new_list(np1, list2); 5866 if (IS_NULL(tmp)) goto err; 5867 list2 = tmp; 5868 np1 = NULL; 5869 5870 tmp = onig_node_new_alt(list2, alt); 5871 if (IS_NULL(tmp)) goto err; 5872 alt = tmp; 5873 list2 = NULL; 5874 5875 /* L* LV V* T* */ 5876 np1 = node_new_cclass(); 5877 if (IS_NULL(np1)) goto err; 5878 cc = NCCLASS(np1); 5879 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); 5880 if (r != 0) goto err; 5881 5882 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5883 if (IS_NULL(tmp)) goto err; 5884 NQTFR(tmp)->target = np1; 5885 np1 = tmp; 5886 5887 tmp = node_new_list(np1, NULL_NODE); 5888 if (IS_NULL(tmp)) goto err; 5889 list2 = tmp; 5890 np1 = NULL; 5891 5892 np1 = node_new_cclass(); 5893 if (IS_NULL(np1)) goto err; 5894 cc = NCCLASS(np1); 5895 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=V"), 0, 0, env); 5896 if (r != 0) goto err; 5897 5898 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5899 if (IS_NULL(tmp)) goto err; 5900 NQTFR(tmp)->target = np1; 5901 np1 = tmp; 5902 5903 tmp = node_new_list(np1, list2); 5904 if (IS_NULL(tmp)) goto err; 5905 list2 = tmp; 5906 np1 = NULL; 5907 5908 np1 = node_new_cclass(); 5909 if (IS_NULL(np1)) goto err; 5910 cc = NCCLASS(np1); 5911 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=LV"), 0, 0, env); 5912 if (r != 0) goto err; 5913 5914 tmp = node_new_list(np1, list2); 5915 if (IS_NULL(tmp)) goto err; 5916 list2 = tmp; 5917 np1 = NULL; 5918 5919 np1 = node_new_cclass(); 5920 if (IS_NULL(np1)) goto err; 5921 cc = NCCLASS(np1); 5922 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); 5923 if (r != 0) goto err; 5924 5925 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5926 if (IS_NULL(tmp)) goto err; 5927 NQTFR(tmp)->target = np1; 5928 np1 = tmp; 5929 5930 tmp = node_new_list(np1, list2); 5931 if (IS_NULL(tmp)) goto err; 5932 list2 = tmp; 5933 np1 = NULL; 5934 5935 tmp = onig_node_new_alt(list2, alt); 5936 if (IS_NULL(tmp)) goto err; 5937 alt = tmp; 5938 list2 = NULL; 5939 5940 /* L* V+ T* */ 5941 np1 = node_new_cclass(); 5942 if (IS_NULL(np1)) goto err; 5943 cc = NCCLASS(np1); 5944 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=T"), 0, 0, env); 5945 if (r != 0) goto err; 5946 5947 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5948 if (IS_NULL(tmp)) goto err; 5949 NQTFR(tmp)->target = np1; 5950 np1 = tmp; 5951 5952 tmp = node_new_list(np1, NULL_NODE); 5953 if (IS_NULL(tmp)) goto err; 5954 list2 = tmp; 5955 np1 = NULL; 5956 5957 np1 = node_new_cclass(); 5958 if (IS_NULL(np1)) goto err; 5959 cc = NCCLASS(np1); 5960 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=V"), 0, 0, env); 5961 if (r != 0) goto err; 5962 5963 tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); 5964 if (IS_NULL(tmp)) goto err; 5965 NQTFR(tmp)->target = np1; 5966 np1 = tmp; 5967 5968 tmp = node_new_list(np1, list2); 5969 if (IS_NULL(tmp)) goto err; 5970 list2 = tmp; 5971 np1 = NULL; 5972 5973 np1 = node_new_cclass(); 5974 if (IS_NULL(np1)) goto err; 5975 cc = NCCLASS(np1); 5976 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=L"), 0, 0, env); 5977 if (r != 0) goto err; 5978 5979 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 5980 if (IS_NULL(tmp)) goto err; 5981 NQTFR(tmp)->target = np1; 5982 np1 = tmp; 5983 5984 tmp = node_new_list(np1, list2); 5985 if (IS_NULL(tmp)) goto err; 5986 list2 = tmp; 5987 np1 = NULL; 5988 5989 tmp = onig_node_new_alt(list2, alt); 5990 if (IS_NULL(tmp)) goto err; 5991 alt = tmp; 5992 list2 = NULL; 5993 5994 /* Emoji sequence := (E_Base | EBG) Extend* E_Modifier? 5995 * (ZWJ (Glue_After_Zwj | EBG Extend* E_Modifier?) )* */ 5996 5997 /* ZWJ (Glue_After_Zwj | E_Base_GAZ Extend* E_Modifier?) */ 5998 np1 = node_new_cclass(); 5999 if (IS_NULL(np1)) goto err; 6000 cc = NCCLASS(np1); 6001 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env); 6002 if (r != 0) goto err; 6003 6004 tmp = node_new_quantifier(0, 1, 0); 6005 if (IS_NULL(tmp)) goto err; 6006 NQTFR(tmp)->target = np1; 6007 np1 = tmp; 6008 6009 tmp = node_new_list(np1, NULL_NODE); 6010 if (IS_NULL(tmp)) goto err; 6011 list2 = tmp; 6012 np1 = NULL; 6013 6014 np1 = node_new_cclass(); 6015 if (IS_NULL(np1)) goto err; 6016 cc = NCCLASS(np1); 6017 r = add_ctype_to_cc(cc, extend, 0, 0, env); 6018 if (r != 0) goto err; 6019 6020 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 6021 if (IS_NULL(tmp)) goto err; 6022 NQTFR(tmp)->target = np1; 6023 np1 = tmp; 6024 6025 tmp = node_new_list(np1, list2); 6026 if (IS_NULL(tmp)) goto err; 6027 list2 = tmp; 6028 np1 = NULL; 6029 6030 np1 = node_new_cclass(); 6031 if (IS_NULL(np1)) goto err; 6032 cc = NCCLASS(np1); 6033 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env); 6034 if (r != 0) goto err; 6035 6036 tmp = node_new_list(np1, list2); 6037 if (IS_NULL(tmp)) goto err; 6038 list2 = tmp; 6039 np1 = NULL; 6040 6041 tmp = onig_node_new_alt(list2, NULL_NODE); 6042 if (IS_NULL(tmp)) goto err; 6043 alt2 = tmp; 6044 list2 = NULL; 6045 6046 /* Glue_After_Zwj */ 6047 np1 = node_new_cclass(); 6048 if (IS_NULL(np1)) goto err; 6049 cc = NCCLASS(np1); 6050 r = add_ctype_to_cc(cc, extend, 0, 0, env); 6051 if (r != 0) goto err; 6052 6053 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 6054 if (IS_NULL(tmp)) goto err; 6055 NQTFR(tmp)->target = np1; 6056 np1 = tmp; 6057 6058 tmp = node_new_list(np1, NULL_NODE); 6059 if (IS_NULL(tmp)) goto err; 6060 list2 = tmp; 6061 np1 = NULL; 6062 6063 np1 = node_new_cclass(); 6064 if (IS_NULL(np1)) goto err; 6065 cc = NCCLASS(np1); 6066 { 6067 static const OnigCodePoint ranges[] = { 6068 13, 6069 0x1F308, 0x1F308, 6070 0x1F33E, 0x1F33E, 6071 0x1F373, 0x1F373, 6072 0x1F393, 0x1F393, 6073 0x1F3A4, 0x1F3A4, 6074 0x1F3A8, 0x1F3A8, 6075 0x1F3EB, 0x1F3EB, 6076 0x1F3ED, 0x1F3ED, 6077 0x1F4BB, 0x1F4BC, 6078 0x1F527, 0x1F527, 6079 0x1F52C, 0x1F52C, 6080 0x1F680, 0x1F680, 6081 0x1F692, 0x1F692, 6082 }; 6083 r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges); 6084 if (r != 0) goto err; 6085 } 6086 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Glue_After_Zwj"), 0, 0, env); 6087 if (r != 0) goto err; 6088 6089 tmp = node_new_list(np1, list2); 6090 if (IS_NULL(tmp)) goto err; 6091 list2 = tmp; 6092 np1 = NULL; 6093 6094 tmp = onig_node_new_alt(list2, alt2); 6095 if (IS_NULL(tmp)) goto err; 6096 alt2 = tmp; 6097 list2 = NULL; 6098 6099 /* Emoji variation sequence 6100 * http://unicode.org/Public/emoji/4.0/emoji-zwj-sequences.txt 6101 */ 6102 r = ONIGENC_CODE_TO_MBC(env->enc, 0xfe0f, buf); 6103 if (r < 0) goto err; 6104 np1 = node_new_str_raw(buf, buf + r); 6105 if (IS_NULL(np1)) goto err; 6106 6107 tmp = node_new_quantifier(0, 1, 0); 6108 if (IS_NULL(tmp)) goto err; 6109 NQTFR(tmp)->target = np1; 6110 np1 = tmp; 6111 6112 tmp = node_new_list(np1, NULL_NODE); 6113 if (IS_NULL(tmp)) goto err; 6114 list2 = tmp; 6115 np1 = NULL; 6116 6117 np1 = node_new_cclass(); 6118 if (IS_NULL(np1)) goto err; 6119 cc = NCCLASS(np1); 6120 { 6121 static const OnigCodePoint ranges[] = { 6122 4, 6123 0x2640, 0x2640, 6124 0x2642, 0x2642, 6125 0x2695, 0x2696, 6126 0x2708, 0x2708, 6127 }; 6128 r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges); 6129 if (r != 0) goto err; 6130 } 6131 6132 tmp = node_new_list(np1, list2); 6133 if (IS_NULL(tmp)) goto err; 6134 list2 = tmp; 6135 np1 = NULL; 6136 6137 tmp = onig_node_new_alt(list2, alt2); 6138 if (IS_NULL(tmp)) goto err; 6139 alt2 = tmp; 6140 list2 = NULL; 6141 6142 tmp = node_new_list(alt2, NULL_NODE); 6143 if (IS_NULL(tmp)) goto err; 6144 list2 = tmp; 6145 alt2 = NULL; 6146 6147 /* ZWJ */ 6148 r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf); 6149 if (r < 0) goto err; 6150 np1 = node_new_str_raw(buf, buf + r); 6151 if (IS_NULL(np1)) goto err; 6152 6153 tmp = node_new_list(np1, list2); 6154 if (IS_NULL(tmp)) goto err; 6155 list2 = tmp; 6156 np1 = NULL; 6157 6158 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 6159 if (IS_NULL(tmp)) goto err; 6160 NQTFR(tmp)->target = list2; 6161 np1 = tmp; 6162 list2 = NULL; 6163 6164 tmp = node_new_list(np1, NULL_NODE); 6165 if (IS_NULL(tmp)) goto err; 6166 list2 = tmp; 6167 np1 = NULL; 6168 6169 /* E_Modifier? */ 6170 np1 = node_new_cclass(); 6171 if (IS_NULL(np1)) goto err; 6172 cc = NCCLASS(np1); 6173 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env); 6174 if (r != 0) goto err; 6175 6176 tmp = node_new_quantifier(0, 1, 0); 6177 if (IS_NULL(tmp)) goto err; 6178 NQTFR(tmp)->target = np1; 6179 np1 = tmp; 6180 6181 tmp = node_new_list(np1, list2); 6182 if (IS_NULL(tmp)) goto err; 6183 list2 = tmp; 6184 np1 = NULL; 6185 6186 /* Extend* */ 6187 np1 = node_new_cclass(); 6188 if (IS_NULL(np1)) goto err; 6189 cc = NCCLASS(np1); 6190 r = add_ctype_to_cc(cc, extend, 0, 0, env); 6191 if (r != 0) goto err; 6192 6193 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 6194 if (IS_NULL(tmp)) goto err; 6195 NQTFR(tmp)->target = np1; 6196 np1 = tmp; 6197 6198 tmp = node_new_list(np1, list2); 6199 if (IS_NULL(tmp)) goto err; 6200 list2 = tmp; 6201 np1 = NULL; 6202 6203 /* (E_Base | EBG) */ 6204 np1 = node_new_cclass(); 6205 if (IS_NULL(np1)) goto err; 6206 cc = NCCLASS(np1); 6207 { 6208 static const OnigCodePoint ranges[] = { 6209 8, 6210 0x1F3C2, 0x1F3C2, 6211 0x1F3C7, 0x1F3C7, 6212 0x1F3CC, 0x1F3CC, 6213 0x1F3F3, 0x1F3F3, 6214 0x1F441, 0x1F441, 6215 0x1F46F, 0x1F46F, 6216 0x1F574, 0x1F574, 6217 0x1F6CC, 0x1F6CC, 6218 }; 6219 r = add_ctype_to_cc_by_range(cc, -1, 0, env, sb_out, ranges); 6220 if (r != 0) goto err; 6221 } 6222 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base"), 0, 0, env); 6223 if (r != 0) goto err; 6224 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env); 6225 if (r != 0) goto err; 6226 6227 tmp = node_new_list(np1, list2); 6228 if (IS_NULL(tmp)) goto err; 6229 list2 = tmp; 6230 np1 = NULL; 6231 6232 tmp = onig_node_new_alt(list2, alt); 6233 if (IS_NULL(tmp)) goto err; 6234 alt = tmp; 6235 list2 = NULL; 6236 6237 /* ZWJ (E_Base_GAZ | Glue_After_Zwj) E_Modifier? */ 6238 /* a sequence starting with ZWJ seems artificial, but GraphemeBreakTest 6239 * has such examples. 6240 * http://www.unicode.org/Public/9.0.0/ucd/auxiliary/GraphemeBreakTest.html 6241 */ 6242 np1 = node_new_cclass(); 6243 if (IS_NULL(np1)) goto err; 6244 cc = NCCLASS(np1); 6245 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Modifier"), 0, 0, env); 6246 if (r != 0) goto err; 6247 6248 tmp = node_new_quantifier(0, 1, 0); 6249 if (IS_NULL(tmp)) goto err; 6250 NQTFR(tmp)->target = np1; 6251 np1 = tmp; 6252 6253 tmp = node_new_list(np1, NULL_NODE); 6254 if (IS_NULL(tmp)) goto err; 6255 list2 = tmp; 6256 np1 = NULL; 6257 6258 np1 = node_new_cclass(); 6259 if (IS_NULL(np1)) goto err; 6260 cc = NCCLASS(np1); 6261 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Glue_After_Zwj"), 0, 0, env); 6262 if (r != 0) goto err; 6263 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=E_Base_GAZ"), 0, 0, env); 6264 if (r != 0) goto err; 6265 6266 tmp = node_new_list(np1, list2); 6267 if (IS_NULL(tmp)) goto err; 6268 list2 = tmp; 6269 np1 = NULL; 6270 6271 r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf); 6272 if (r < 0) goto err; 6273 np1 = node_new_str_raw(buf, buf + r); 6274 if (IS_NULL(np1)) goto err; 6275 6276 tmp = node_new_list(np1, list2); 6277 if (IS_NULL(tmp)) goto err; 6278 list2 = tmp; 6279 np1 = NULL; 6280 6281 tmp = onig_node_new_alt(list2, alt); 6282 if (IS_NULL(tmp)) goto err; 6283 alt = tmp; 6284 list2 = NULL; 6285 6286 /* RI-Sequence := Regional_Indicator{2} */ 6287 np1 = node_new_cclass(); 6288 if (IS_NULL(np1)) goto err; 6289 cc = NCCLASS(np1); 6290 r = add_code_range(&(cc->mbuf), env, 0x1F1E6, 0x1F1FF); 6291 if (r != 0) goto err; 6292 6293 tmp = node_new_quantifier(2, 2, 0); 6294 if (IS_NULL(tmp)) goto err; 6295 NQTFR(tmp)->target = np1; 6296 np1 = tmp; 6297 6298 tmp = node_new_list(np1, list2); 6299 if (IS_NULL(tmp)) goto err; 6300 list2 = tmp; 6301 np1 = NULL; 6302 6303 tmp = onig_node_new_alt(list2, alt); 6304 if (IS_NULL(tmp)) goto err; 6305 alt = tmp; 6306 list2 = NULL; 6307 6308 tmp = node_new_list(alt, list); 6309 if (IS_NULL(tmp)) goto err; 6310 list = tmp; 6311 alt = NULL; 6312 6313 /* Prepend* */ 6314 np1 = node_new_cclass(); 6315 if (IS_NULL(np1)) goto err; 6316 cc = NCCLASS(np1); 6317 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Prepend"), 0, 0, env); 6318 if (r != 0) goto err; 6319 6320 tmp = node_new_quantifier(0, REPEAT_INFINITE, 0); 6321 if (IS_NULL(tmp)) goto err; 6322 NQTFR(tmp)->target = np1; 6323 np1 = tmp; 6324 6325 tmp = node_new_list(np1, list); 6326 if (IS_NULL(tmp)) goto err; 6327 list = tmp; 6328 np1 = NULL; 6329 5766 6330 /* PerlSyntax: (?s:.), RubySyntax: (?m:.) */ 5767 OnigOptionType option;5768 6331 np1 = node_new_anychar(); 5769 6332 if (IS_NULL(np1)) goto err; … … 5771 6334 option = env->option; 5772 6335 ONOFF(option, ONIG_OPTION_MULTILINE, 0); 6336 tmp = node_new_option(option); 6337 if (IS_NULL(tmp)) goto err; 6338 NENCLOSE(tmp)->target = np1; 6339 np1 = tmp; 6340 6341 tmp = onig_node_new_alt(np1, NULL_NODE); 6342 if (IS_NULL(tmp)) goto err; 6343 alt = tmp; 6344 np1 = NULL; 6345 6346 /* Prepend+ */ 6347 r = ONIGENC_CODE_TO_MBC(env->enc, 0x200D, buf); 6348 if (r < 0) goto err; 6349 np1 = node_new_str_raw(buf, buf + r); 6350 if (IS_NULL(np1)) goto err; 6351 6352 tmp = node_new_quantifier(0, 1, 0); 6353 if (IS_NULL(tmp)) goto err; 6354 NQTFR(tmp)->target = np1; 6355 np1 = tmp; 6356 6357 tmp = node_new_list(np1, NULL_NODE); 6358 if (IS_NULL(tmp)) goto err; 6359 list2 = tmp; 6360 np1 = NULL; 6361 6362 np1 = node_new_cclass(); 6363 if (IS_NULL(np1)) goto err; 6364 cc = NCCLASS(np1); 6365 r = add_ctype_to_cc(cc, propname2ctype(env, "Grapheme_Cluster_Break=Prepend"), 0, 0, env); 6366 if (r != 0) goto err; 6367 6368 tmp = node_new_quantifier(1, REPEAT_INFINITE, 0); 6369 if (IS_NULL(tmp)) goto err; 6370 NQTFR(tmp)->target = np1; 6371 np1 = tmp; 6372 6373 tmp = node_new_list(np1, list2); 6374 if (IS_NULL(tmp)) goto err; 6375 list2 = tmp; 6376 np1 = NULL; 6377 6378 tmp = onig_node_new_alt(list2, alt); 6379 if (IS_NULL(tmp)) goto err; 6380 alt = tmp; 6381 list2 = NULL; 6382 6383 tmp = onig_node_new_alt(list, alt); 6384 if (IS_NULL(tmp)) goto err; 6385 alt = tmp; 6386 list = NULL; 6387 } 6388 else 6389 #endif /* USE_UNICODE_PROPERTIES */ 6390 { 6391 /* PerlSyntax: (?s:.), RubySyntax: (?m:.) */ 6392 np1 = node_new_anychar(); 6393 if (IS_NULL(np1)) goto err; 6394 6395 option = env->option; 6396 ONOFF(option, ONIG_OPTION_MULTILINE, 0); 6397 tmp = node_new_option(option); 6398 if (IS_NULL(tmp)) goto err; 6399 NENCLOSE(tmp)->target = np1; 6400 np1 = tmp; 6401 6402 alt = onig_node_new_alt(np1, NULL_NODE); 6403 if (IS_NULL(alt)) goto err; 6404 np1 = NULL; 6405 } 6406 6407 /* \x0D\x0A */ 6408 r = ONIGENC_CODE_TO_MBC(env->enc, 0x0D, buf); 6409 if (r < 0) goto err; 6410 num1 = r; 6411 r = ONIGENC_CODE_TO_MBC(env->enc, 0x0A, buf + num1); 6412 if (r < 0) goto err; 6413 np1 = node_new_str_raw(buf, buf + num1 + r); 6414 if (IS_NULL(np1)) goto err; 6415 6416 tmp = onig_node_new_alt(np1, alt); 6417 if (IS_NULL(tmp)) goto err; 6418 alt = tmp; 6419 np1 = NULL; 6420 6421 /* (?>\x0D\x0A|...) */ 6422 tmp = node_new_enclose(ENCLOSE_STOP_BACKTRACK); 6423 if (IS_NULL(tmp)) goto err; 6424 NENCLOSE(tmp)->target = alt; 6425 np1 = tmp; 6426 6427 #ifdef USE_UNICODE_PROPERTIES 6428 if (ONIGENC_IS_UNICODE(env->enc)) { 6429 /* Don't ignore case. */ 6430 option = env->option; 6431 ONOFF(option, ONIG_OPTION_IGNORECASE, 1); 5773 6432 *np = node_new_option(option); 5774 6433 if (IS_NULL(*np)) goto err; 5775 6434 NENCLOSE(*np)->target = np1; 5776 6435 } 6436 else 6437 #endif 6438 { 6439 *np = np1; 6440 } 5777 6441 return ONIG_NORMAL; 5778 6442 5779 6443 err: 5780 6444 onig_node_free(np1); 5781 onig_node_free(np2); 5782 onig_node_free(qn); 5783 onig_node_free(list1); 6445 onig_node_free(list); 5784 6446 onig_node_free(list2); 6447 onig_node_free(alt); 6448 onig_node_free(alt2); 6449 bbuf_free(pbuf1); 5785 6450 return (r == 0) ? ONIGERR_MEMORY : r; 5786 6451 } … … 5815 6480 c = data[0]; 5816 6481 if (((c < SINGLE_BYTE_SIZE) && BITSET_AT(cc->bs, c))) { 5817 6482 /* skip if c is included in the bitset */ 5818 6483 c = not_found; 5819 6484 } … … 5829 6494 if (b1 != 0) { 5830 6495 if (((b1 & (b1 - 1)) == 0) && (c == not_found)) { 5831 6496 c = BITS_IN_ROOM * i + countbits(b1 - 1); 5832 6497 } else { 5833 6498 return 0; /* the character class contains multiple chars */ 5834 6499 } 5835 6500 } … … 5876 6541 env->option = NENCLOSE(*np)->option; 5877 6542 r = fetch_token(tok, src, end, env); 5878 if (r < 0) return r; 6543 if (r < 0) { 6544 env->option = prev; 6545 return r; 6546 } 5879 6547 r = parse_subexp(&target, tok, term, src, end, env); 5880 6548 env->option = prev; … … 5949 6617 while (1) { 5950 6618 if (len >= ONIGENC_MBC_MINLEN(env->enc)) { 5951 if (len == enclen(env->enc, NSTR(*np)->s )) {6619 if (len == enclen(env->enc, NSTR(*np)->s, NSTR(*np)->end)) { 5952 6620 r = fetch_token(tok, src, end, env); 5953 6621 NSTRING_CLEAR_RAW(*np); … … 6029 6697 CClassNode* cc; 6030 6698 6031 #ifdef USE_SHARED_CCLASS_TABLE 6032 const OnigCodePoint *mbr; 6033 OnigCodePoint sb_out; 6034 6035 r = ONIGENC_GET_CTYPE_CODE_RANGE(env->enc, tok->u.prop.ctype, 6036 &sb_out, &mbr); 6037 if (r == 0 && 6038 ! IS_ASCII_RANGE(env->option) && 6039 ONIGENC_CODE_RANGE_NUM(mbr) 6040 >= THRESHOLD_RANGE_NUM_FOR_SHARE_CCLASS) { 6041 type_cclass_key key; 6042 type_cclass_key* new_key; 6043 6044 key.enc = env->enc; 6045 key.not = tok->u.prop.not; 6046 key.type = tok->u.prop.ctype; 6047 6048 THREAD_ATOMIC_START; 6049 6050 if (IS_NULL(OnigTypeCClassTable)) { 6051 OnigTypeCClassTable 6052 = onig_st_init_table_with_size(&type_type_cclass_hash, 10); 6053 if (IS_NULL(OnigTypeCClassTable)) { 6054 THREAD_ATOMIC_END; 6055 return ONIGERR_MEMORY; 6056 } 6057 } 6058 else { 6059 if (onig_st_lookup(OnigTypeCClassTable, (st_data_t )&key, 6060 (st_data_t* )np)) { 6061 THREAD_ATOMIC_END; 6062 break; 6063 } 6064 } 6065 6066 *np = node_new_cclass_by_codepoint_range(tok->u.prop.not, 6067 sb_out, mbr); 6068 if (IS_NULL(*np)) { 6069 THREAD_ATOMIC_END; 6070 return ONIGERR_MEMORY; 6071 } 6072 6073 cc = NCCLASS(*np); 6074 NCCLASS_SET_SHARE(cc); 6075 new_key = (type_cclass_key* )xmalloc(sizeof(type_cclass_key)); 6076 xmemcpy(new_key, &key, sizeof(type_cclass_key)); 6077 onig_st_add_direct(OnigTypeCClassTable, (st_data_t )new_key, 6078 (st_data_t )*np); 6079 6080 THREAD_ATOMIC_END; 6081 } 6082 else { 6083 #endif 6084 *np = node_new_cclass(); 6085 CHECK_NULL_RETURN_MEMERR(*np); 6086 cc = NCCLASS(*np); 6087 r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 6088 IS_ASCII_RANGE(env->option), env); 6089 if (r != 0) return r; 6090 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); 6091 #ifdef USE_SHARED_CCLASS_TABLE 6092 } 6093 #endif 6699 *np = node_new_cclass(); 6700 CHECK_NULL_RETURN_MEMERR(*np); 6701 cc = NCCLASS(*np); 6702 r = add_ctype_to_cc(cc, tok->u.prop.ctype, 0, 6703 IS_ASCII_RANGE(env->option), env); 6704 if (r != 0) return r; 6705 if (tok->u.prop.not != 0) NCCLASS_SET_NOT(cc); 6094 6706 } 6095 6707 break; … … 6321 6933 6322 6934 *top = NULL; 6935 env->parse_depth++; 6936 if (env->parse_depth > ParseDepthLimit) 6937 return ONIGERR_PARSE_DEPTH_LIMIT_OVER; 6323 6938 r = parse_branch(&node, tok, term, src, end, env); 6324 6939 if (r < 0) { … … 6358 6973 } 6359 6974 6975 env->parse_depth--; 6360 6976 return r; 6361 6977 }
Note:
See TracChangeset
for help on using the changeset viewer.