Changeset 331 for EcnlProtoTool/trunk/onigmo-6.1.3/src/regenc.c
- Timestamp:
- Jan 21, 2018, 12:10:09 AM (6 years ago)
- Location:
- EcnlProtoTool/trunk/onigmo-6.1.3
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/onigmo-6.1.3/src/regenc.c
r321 r331 4 4 /*- 5 5 * Copyright (c) 2002-2007 K.Kosako <sndgk393 AT ybb DOT ne DOT jp> 6 * Copyright (c) 2011 6 * Copyright (c) 2011-2016 K.Takata <kentkt AT csc DOT jp> 7 7 * All rights reserved. 8 8 * … … 52 52 } 53 53 54 extern int 55 onigenc_mbclen_approximate(const OnigUChar* p,const OnigUChar* e, OnigEncoding enc) 56 { 57 int ret = ONIGENC_PRECISE_MBC_ENC_LEN(enc, p, e); 58 if (ONIGENC_MBCLEN_CHARFOUND_P(ret)) 59 return ONIGENC_MBCLEN_CHARFOUND_LEN(ret); 60 else if (ONIGENC_MBCLEN_NEEDMORE_P(ret)) 61 return (int )(e - p) + ONIGENC_MBCLEN_NEEDMORE_LEN(ret); 62 return 1; 63 } 64 54 65 extern UChar* 55 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s )56 { 57 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s );66 onigenc_get_right_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 67 { 68 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 58 69 if (p < s) { 59 p += enclen(enc, p );70 p += enclen(enc, p, end); 60 71 } 61 72 return p; … … 64 75 extern UChar* 65 76 onigenc_get_right_adjust_char_head_with_prev(OnigEncoding enc, 66 const UChar* start, const UChar* s, const UChar* * prev)67 { 68 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s );77 const UChar* start, const UChar* s, const UChar* end, const UChar** prev) 78 { 79 UChar* p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 69 80 70 81 if (p < s) { 71 82 if (prev) *prev = (const UChar* )p; 72 p += enclen(enc, p );83 p += enclen(enc, p, end); 73 84 } 74 85 else { … … 79 90 80 91 extern UChar* 81 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s )92 onigenc_get_prev_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 82 93 { 83 94 if (s <= start) 84 95 return (UChar* )NULL; 85 96 86 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1 );97 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); 87 98 } 88 99 89 100 extern UChar* 90 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, int n)101 onigenc_step_back(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end, int n) 91 102 { 92 103 while (ONIG_IS_NOT_NULL(s) && n-- > 0) { … … 94 105 return (UChar* )NULL; 95 106 96 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1 );107 s = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s - 1, end); 97 108 } 98 109 return (UChar* )s; … … 104 115 UChar* q = (UChar* )p; 105 116 while (n-- > 0) { 106 q += ONIGENC_MBC_ENC_LEN(enc, q );117 q += ONIGENC_MBC_ENC_LEN(enc, q, end); 107 118 } 108 119 return (q <= end ? q : NULL); … … 116 127 117 128 while (q < end) { 118 q += ONIGENC_MBC_ENC_LEN(enc, q );129 q += ONIGENC_MBC_ENC_LEN(enc, q, end); 119 130 n++; 120 131 } … … 127 138 int n = 0; 128 139 UChar* p = (UChar* )s; 140 UChar* e; 129 141 130 142 while (1) { … … 142 154 if (len == 1) return n; 143 155 } 144 p += ONIGENC_MBC_ENC_LEN(enc, p); 156 e = p + ONIGENC_MBC_MAXLEN(enc); 157 p += ONIGENC_MBC_ENC_LEN(enc, p, e); 145 158 n++; 146 159 } … … 152 165 UChar* start = (UChar* )s; 153 166 UChar* p = (UChar* )s; 167 UChar* e; 154 168 155 169 while (1) { … … 167 181 if (len == 1) return (int )(p - start); 168 182 } 169 p += ONIGENC_MBC_ENC_LEN(enc, p); 183 e = p + ONIGENC_MBC_MAXLEN(enc); 184 p += ONIGENC_MBC_ENC_LEN(enc, p, e); 170 185 } 171 186 } … … 350 365 #endif 351 366 367 #if 0 352 368 extern void 353 369 onigenc_set_default_caseconv_table(const UChar* table ARG_UNUSED) … … 356 372 /* obsoleted. */ 357 373 } 374 #endif 358 375 359 376 extern UChar* 360 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s )361 { 362 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s );377 onigenc_get_left_adjust_char_head(OnigEncoding enc, const UChar* start, const UChar* s, const UChar* end) 378 { 379 return ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, start, s, end); 363 380 } 364 381 … … 394 411 extern int 395 412 onigenc_ascii_apply_all_case_fold(OnigCaseFoldType flag ARG_UNUSED, 396 OnigApplyAllCaseFoldFunc f, void* arg) 413 OnigApplyAllCaseFoldFunc f, void* arg, 414 OnigEncoding enc ARG_UNUSED) 397 415 { 398 416 OnigCodePoint code; … … 415 433 onigenc_ascii_get_case_fold_codes_by_str(OnigCaseFoldType flag ARG_UNUSED, 416 434 const OnigUChar* p, const OnigUChar* end ARG_UNUSED, 417 OnigCaseFoldCodeItem items[] )435 OnigCaseFoldCodeItem items[], OnigEncoding enc ARG_UNUSED) 418 436 { 419 437 if (0x41 <= *p && *p <= 0x5a) { … … 451 469 int i, r; 452 470 453 r = onigenc_ascii_apply_all_case_fold(flag, f, arg );471 r = onigenc_ascii_apply_all_case_fold(flag, f, arg, 0); 454 472 if (r != 0) return r; 455 473 … … 555 573 onigenc_not_support_get_ctype_code_range(OnigCtype ctype ARG_UNUSED, 556 574 OnigCodePoint* sb_out ARG_UNUSED, 557 const OnigCodePoint* ranges[] ARG_UNUSED) 575 const OnigCodePoint* ranges[] ARG_UNUSED, 576 OnigEncoding enc) 558 577 { 559 578 return ONIG_NO_SUPPORT_CONFIG; … … 561 580 562 581 extern int 563 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end )582 onigenc_is_mbc_newline_0x0a(const UChar* p, const UChar* end, OnigEncoding enc ARG_UNUSED) 564 583 { 565 584 if (p < end) { … … 572 591 extern int 573 592 onigenc_ascii_mbc_case_fold(OnigCaseFoldType flag ARG_UNUSED, const UChar** p, 574 const UChar*end ARG_UNUSED, UChar* lower)593 const UChar* end, UChar* lower, OnigEncoding enc ARG_UNUSED) 575 594 { 576 595 *lower = ONIGENC_ASCII_CODE_TO_LOWER_CASE(**p); … … 593 612 594 613 extern int 595 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED) 614 onigenc_single_byte_mbc_enc_len(const UChar* p ARG_UNUSED, const UChar* e ARG_UNUSED, 615 OnigEncoding enc ARG_UNUSED) 596 616 { 597 617 return 1; … … 599 619 600 620 extern OnigCodePoint 601 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED) 621 onigenc_single_byte_mbc_to_code(const UChar* p, const UChar* end ARG_UNUSED, 622 OnigEncoding enc ARG_UNUSED) 602 623 { 603 624 return (OnigCodePoint )(*p); … … 605 626 606 627 extern int 607 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED) 608 { 609 return (code < 0x100 ? 1 : ONIGERR_INVALID_CODE_POINT_VALUE); 610 } 611 612 extern int 613 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf) 614 { 628 onigenc_single_byte_code_to_mbclen(OnigCodePoint code ARG_UNUSED, OnigEncoding enc ARG_UNUSED) 629 { 630 return 1; 631 } 632 633 extern int 634 onigenc_single_byte_code_to_mbc(OnigCodePoint code, UChar *buf, OnigEncoding enc ARG_UNUSED) 635 { 636 #ifdef RUBY 637 if (code > 0xff) 638 rb_raise(rb_eRangeError, "%u out of char range", code); 639 #endif 615 640 *buf = (UChar )(code & 0xff); 616 641 return 1; … … 619 644 extern UChar* 620 645 onigenc_single_byte_left_adjust_char_head(const UChar* start ARG_UNUSED, 621 const UChar* s) 646 const UChar* s, 647 const UChar* end ARG_UNUSED, 648 OnigEncoding enc ARG_UNUSED) 622 649 { 623 650 return (UChar* )s; … … 626 653 extern int 627 654 onigenc_always_true_is_allowed_reverse_match(const UChar* s ARG_UNUSED, 628 const UChar* end ARG_UNUSED) 655 const UChar* end ARG_UNUSED, 656 OnigEncoding enc ARG_UNUSED) 629 657 { 630 658 return TRUE; … … 633 661 extern int 634 662 onigenc_always_false_is_allowed_reverse_match(const UChar* s ARG_UNUSED, 635 const UChar* end ARG_UNUSED) 663 const UChar* end ARG_UNUSED, 664 OnigEncoding enc ARG_UNUSED) 636 665 { 637 666 return FALSE; … … 639 668 640 669 extern int 641 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype) 670 onigenc_ascii_is_code_ctype(OnigCodePoint code, unsigned int ctype, 671 OnigEncoding enc ARG_UNUSED) 642 672 { 643 673 if (code < 128) … … 653 683 OnigCodePoint n; 654 684 655 len = enclen(enc, p );685 len = enclen(enc, p, end); 656 686 n = (OnigCodePoint )(*p++); 657 687 if (len == 1) return n; … … 681 711 int i; 682 712 683 len = enclen(enc, p );713 len = enclen(enc, p, end); 684 714 for (i = 0; i < len; i++) { 685 715 *lower++ = *p++; … … 708 738 709 739 extern int 710 onigenc_mb2_code_to_mbclen(OnigCodePoint code) 711 { 712 if ((code & 0xff00) != 0) return 2; 713 else return 1; 714 } 715 716 extern int 717 onigenc_mb4_code_to_mbclen(OnigCodePoint code) 740 onigenc_mb2_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) 741 { 742 if (code <= 0xff) return 1; 743 if (code <= 0xffff) return 2; 744 return ONIGERR_TOO_BIG_WIDE_CHAR_VALUE; 745 } 746 747 extern int 748 onigenc_mb4_code_to_mbclen(OnigCodePoint code, OnigEncoding enc ARG_UNUSED) 718 749 { 719 750 if ((code & 0xff000000) != 0) return 4; … … 734 765 735 766 #if 1 736 if (enclen(enc, buf ) != (p - buf))767 if (enclen(enc, buf, p) != (p - buf)) 737 768 return ONIGERR_INVALID_CODE_POINT_VALUE; 738 769 #endif … … 757 788 758 789 #if 1 759 if (enclen(enc, buf ) != (p - buf))790 if (enclen(enc, buf, p) != (p - buf)) 760 791 return ONIGERR_INVALID_CODE_POINT_VALUE; 761 792 #endif … … 764 795 765 796 extern int 766 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, UChar* p,UChar* end)797 onigenc_minimum_property_name_to_ctype(OnigEncoding enc, const UChar* p, const UChar* end) 767 798 { 768 799 static const PosixBracketEntryType PBS[] = { … … 840 871 841 872 sascii++; 842 p += enclen(enc, p );873 p += enclen(enc, p, end); 843 874 } 844 875 return 0; … … 861 892 862 893 sascii++; 863 p += enclen(enc, p );894 p += enclen(enc, p, end); 864 895 } 865 896 return 0; 866 897 } 867 898 899 #if 0 868 900 /* Property management */ 869 901 static int … … 918 950 return 0; 919 951 } 920 921 extern int 922 onigenc_property_list_init(int (*f)(void)) 923 { 924 int r; 925 926 THREAD_ATOMIC_START; 927 928 r = f(); 929 930 THREAD_ATOMIC_END; 931 return r; 932 } 952 #endif 953 954 extern int 955 onigenc_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, const OnigUChar* end, 956 OnigUChar* to, OnigUChar* to_end, const struct OnigEncodingTypeST* enc) 957 { 958 OnigCodePoint code; 959 OnigUChar *to_start = to; 960 OnigCaseFoldType flags = *flagP; 961 int codepoint_length; 962 963 while (*pp < end && to < to_end) { 964 codepoint_length = ONIGENC_PRECISE_MBC_ENC_LEN(enc, *pp, end); 965 if (codepoint_length < 0) 966 return codepoint_length; /* encoding invalid */ 967 code = ONIGENC_MBC_TO_CODE(enc, *pp, end); 968 *pp += codepoint_length; 969 970 if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { 971 flags |= ONIGENC_CASE_MODIFIED; 972 code += 'A' - 'a'; 973 } else if (code >= 'A' && code <= 'Z' && 974 (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { 975 flags |= ONIGENC_CASE_MODIFIED; 976 code += 'a' - 'A'; 977 } 978 to += ONIGENC_CODE_TO_MBC(enc, code, to); 979 if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ 980 flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); 981 } 982 *flagP = flags; 983 return (int )(to - to_start); 984 } 985 986 extern int 987 onigenc_single_byte_ascii_only_case_map(OnigCaseFoldType* flagP, const OnigUChar** pp, 988 const OnigUChar* end, OnigUChar* to, OnigUChar* to_end, 989 const struct OnigEncodingTypeST* enc) 990 { 991 OnigCodePoint code; 992 OnigUChar *to_start = to; 993 OnigCaseFoldType flags = *flagP; 994 995 while (*pp < end && to < to_end) { 996 code = *(*pp)++; 997 998 if (code >= 'a' && code <= 'z' && (flags & ONIGENC_CASE_UPCASE)) { 999 flags |= ONIGENC_CASE_MODIFIED; 1000 code += 'A' - 'a'; 1001 } else if (code >= 'A' && code <= 'Z' && 1002 (flags & (ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_FOLD))) { 1003 flags |= ONIGENC_CASE_MODIFIED; 1004 code += 'a' - 'A'; 1005 } 1006 *to++ = code; 1007 if (flags & ONIGENC_CASE_TITLECASE) /* switch from titlecase to lowercase for capitalize */ 1008 flags ^= (ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE | ONIGENC_CASE_TITLECASE); 1009 } 1010 *flagP = flags; 1011 return (int )(to - to_start); 1012 }
Note:
See TracChangeset
for help on using the changeset viewer.