Ignore:
Timestamp:
Jul 9, 2020, 8:51:43 AM (4 years ago)
Author:
coas-nagasima
Message:

mrubyを2.1.1に更新

Location:
EcnlProtoTool/trunk/mruby-2.1.1
Files:
2 added
4 edited
1 moved

Legend:

Unmodified
Added
Removed
  • EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/mrbgem.rake

    r331 r439  
    33  spec.author  = 'mruby developers'
    44  spec.summary = 'String class extension'
    5   spec.add_test_dependency 'mruby-enumerator', core: 'mruby-enumerator'
    65end
  • EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/mrblib/string.rb

    r331 r439  
    11class String
    2 
    3   ##
    4   #  call-seq:
    5   #     String.try_convert(obj) -> string or nil
    6   #
    7   # Try to convert <i>obj</i> into a String, using to_str method.
    8   # Returns converted string or nil if <i>obj</i> cannot be converted
    9   # for any reason.
    10   #
    11   #     String.try_convert("str")     #=> "str"
    12   #     String.try_convert(/re/)      #=> nil
    13   #
    14   def self.try_convert(obj)
    15     if obj.respond_to?(:to_str)
    16       obj.to_str
    17     else
    18       nil
    19     end
    20   end
    212
    223  ##
     
    9677  #
    9778  def lstrip!
    98     raise RuntimeError, "can't modify frozen String" if frozen?
     79    raise FrozenError, "can't modify frozen String" if frozen?
    9980    s = self.lstrip
    10081    (s == self) ? nil : self.replace(s)
     
    11394  #
    11495  def rstrip!
    115     raise RuntimeError, "can't modify frozen String" if frozen?
     96    raise FrozenError, "can't modify frozen String" if frozen?
    11697    s = self.rstrip
    11798    (s == self) ? nil : self.replace(s)
     
    126107  #
    127108  def strip!
    128     raise RuntimeError, "can't modify frozen String" if frozen?
     109    raise FrozenError, "can't modify frozen String" if frozen?
    129110    s = self.strip
    130111    (s == self) ? nil : self.replace(s)
     
    143124  #
    144125  def casecmp(str)
    145     self.downcase <=> str.to_str.downcase
     126    self.downcase <=> str.__to_str.downcase
    146127  rescue NoMethodError
    147     raise TypeError, "no implicit conversion of #{str.class} into String"
     128    nil
     129  end
     130
     131  ##
     132  # call-seq:
     133  #   str.casecmp?(other)  -> true, false, or nil
     134  #
     135  # Returns true if str and other_str are equal after case folding,
     136  # false if they are not equal, and nil if other_str is not a string.
     137
     138  def casecmp?(str)
     139    c = self.casecmp(str)
     140    return nil if c.nil?
     141    return c == 0
    148142  end
    149143
     
    187181  #
    188182  def slice!(arg1, arg2=nil)
    189     raise RuntimeError, "can't modify frozen String" if frozen?
     183    raise FrozenError, "can't modify frozen String" if frozen?
    190184    raise "wrong number of arguments (for 1..2)" if arg1.nil? && arg2.nil?
    191185
     
    317311  end
    318312
     313  ##
     314  # Call the given block for each character of
     315  # +self+.
    319316  def each_char(&block)
    320317    return to_enum :each_char unless block
    321 
    322     split('').each do |i|
    323       block.call(i)
     318    pos = 0
     319    while pos < self.size
     320      block.call(self[pos])
     321      pos += 1
    324322    end
    325323    self
     
    353351    self
    354352  end
     353
     354  ##
     355  #  call-seq:
     356  #    string.lines                ->  array of string
     357  #    string.lines {|s| block}    ->  array of string
     358  #
     359  #  Returns strings per line;
     360  #
     361  #    a = "abc\ndef"
     362  #    a.lines    #=> ["abc\n", "def"]
     363  #
     364  #  If a block is given, it works the same as <code>each_line</code>.
     365  def lines(&blk)
     366    lines = self.__lines
     367    if blk
     368      lines.each do |line|
     369        blk.call(line)
     370      end
     371    end
     372    lines
     373  end
     374
     375  ##
     376  #  call-seq:
     377  #     str.upto(other_str, exclusive=false) {|s| block }   -> str
     378  #     str.upto(other_str, exclusive=false)                -> an_enumerator
     379  #
     380  #  Iterates through successive values, starting at <i>str</i> and
     381  #  ending at <i>other_str</i> inclusive, passing each value in turn to
     382  #  the block. The <code>String#succ</code> method is used to generate
     383  #  each value.  If optional second argument exclusive is omitted or is false,
     384  #  the last value will be included; otherwise it will be excluded.
     385  #
     386  #  If no block is given, an enumerator is returned instead.
     387  #
     388  #     "a8".upto("b6") {|s| print s, ' ' }
     389  #     for s in "a8".."b6"
     390  #       print s, ' '
     391  #     end
     392  #
     393  #  <em>produces:</em>
     394  #
     395  #     a8 a9 b0 b1 b2 b3 b4 b5 b6
     396  #     a8 a9 b0 b1 b2 b3 b4 b5 b6
     397  #
     398  #  If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
     399  #  both are recognized as decimal numbers. In addition, the width of
     400  #  string (e.g. leading zeros) is handled appropriately.
     401  #
     402  #     "9".upto("11").to_a   #=> ["9", "10", "11"]
     403  #     "25".upto("5").to_a   #=> []
     404  #     "07".upto("11").to_a  #=> ["07", "08", "09", "10", "11"]
     405  def upto(max, exclusive=false, &block)
     406    return to_enum(:upto, max, exclusive) unless block
     407    raise TypeError, "no implicit conversion of #{max.class} into String" unless max.kind_of? String
     408
     409    len = self.length
     410    maxlen = max.length
     411    # single character
     412    if len == 1 and maxlen == 1
     413      c = self.ord
     414      e = max.ord
     415      while c <= e
     416        break if exclusive and c == e
     417        yield c.chr(__ENCODING__)
     418        c += 1
     419      end
     420      return self
     421    end
     422    # both edges are all digits
     423    bi = self.to_i(10)
     424    ei = max.to_i(10)
     425    len = self.length
     426    if (bi > 0 or bi == "0"*len) and (ei > 0 or ei == "0"*maxlen)
     427      while bi <= ei
     428        break if exclusive and bi == ei
     429        s = bi.to_s
     430        s = s.rjust(len, "0") if s.length < len
     431        yield s
     432        bi += 1
     433      end
     434      return self
     435    end
     436    bs = self
     437    while true
     438      n = (bs <=> max)
     439      break if n > 0
     440      break if exclusive and n == 0
     441      yield bs
     442      break if n == 0
     443      bs = bs.succ
     444    end
     445    self
     446  end
    355447end
  • EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/src/string.c

    r331 r439  
    66#include <mruby/range.h>
    77
    8 static mrb_value
    9 mrb_str_getbyte(mrb_state *mrb, mrb_value str)
    10 {
    11   mrb_int pos;
    12   mrb_get_args(mrb, "i", &pos);
    13 
    14   if (pos < 0)
    15     pos += RSTRING_LEN(str);
    16   if (pos < 0 ||  RSTRING_LEN(str) <= pos)
    17     return mrb_nil_value();
    18 
    19   return mrb_fixnum_value((unsigned char)RSTRING_PTR(str)[pos]);
    20 }
    21 
    22 static mrb_value
    23 mrb_str_setbyte(mrb_state *mrb, mrb_value str)
    24 {
    25   mrb_int pos, byte;
    26   long len;
    27 
    28   mrb_get_args(mrb, "ii", &pos, &byte);
    29 
    30   len = RSTRING_LEN(str);
    31   if (pos < -len || len <= pos)
    32     mrb_raisef(mrb, E_INDEX_ERROR, "index %S is out of array", mrb_fixnum_value(pos));
    33   if (pos < 0)
    34     pos += len;
    35 
    36   mrb_str_modify(mrb, mrb_str_ptr(str));
    37   byte &= 0xff;
    38   RSTRING_PTR(str)[pos] = byte;
    39   return mrb_fixnum_value((unsigned char)byte);
    40 }
    41 
    42 static mrb_value
    43 mrb_str_byteslice(mrb_state *mrb, mrb_value str)
    44 {
    45   mrb_value a1;
     8#define ENC_ASCII_8BIT "ASCII-8BIT"
     9#define ENC_BINARY     "BINARY"
     10#define ENC_UTF8       "UTF-8"
     11
     12#define ENC_COMP_P(enc, enc_lit) \
     13  str_casecmp_p(RSTRING_PTR(enc), RSTRING_LEN(enc), enc_lit, sizeof(enc_lit"")-1)
     14
     15#ifdef MRB_WITHOUT_FLOAT
     16# define mrb_float_p(o) FALSE
     17#endif
     18
     19static mrb_bool
     20str_casecmp_p(const char *s1, mrb_int len1, const char *s2, mrb_int len2)
     21{
     22  const char *e1, *e2;
     23
     24  if (len1 != len2) return FALSE;
     25  e1 = s1 + len1;
     26  e2 = s2 + len2;
     27  while (s1 < e1 && s2 < e2) {
     28    if (*s1 != *s2 && TOUPPER(*s1) != TOUPPER(*s2)) return FALSE;
     29    ++s1;
     30    ++s2;
     31  }
     32  return TRUE;
     33}
     34
     35static mrb_value
     36int_chr_binary(mrb_state *mrb, mrb_value num)
     37{
     38  mrb_int cp = mrb_int(mrb, num);
     39  char c;
     40  mrb_value str;
     41
     42  if (cp < 0 || 0xff < cp) {
     43    mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
     44  }
     45  c = (char)cp;
     46  str = mrb_str_new(mrb, &c, 1);
     47  RSTR_SET_ASCII_FLAG(mrb_str_ptr(str));
     48  return str;
     49}
     50
     51#ifdef MRB_UTF8_STRING
     52static mrb_value
     53int_chr_utf8(mrb_state *mrb, mrb_value num)
     54{
     55  mrb_int cp = mrb_int(mrb, num);
     56  char utf8[4];
    4657  mrb_int len;
    47   int argc;
    48 
    49   argc = mrb_get_args(mrb, "o|i", &a1, &len);
    50   if (argc == 2) {
    51     return mrb_str_substr(mrb, str, mrb_fixnum(a1), len);
    52   }
    53   switch (mrb_type(a1)) {
    54   case MRB_TT_RANGE:
    55     {
    56       mrb_int beg;
    57 
    58       len = RSTRING_LEN(str);
    59       switch (mrb_range_beg_len(mrb, a1, &beg, &len, len, TRUE)) {
    60       case 0:                   /* not range */
    61         break;
    62       case 1:                   /* range */
    63         return mrb_str_substr(mrb, str, beg, len);
    64       case 2:                   /* out of range */
    65         mrb_raisef(mrb, E_RANGE_ERROR, "%S out of range", a1);
    66         break;
    67       }
    68       return mrb_nil_value();
    69     }
    70   case MRB_TT_FLOAT:
    71     a1 = mrb_fixnum_value((mrb_int)mrb_float(a1));
    72     /* fall through */
    73   case MRB_TT_FIXNUM:
    74     return mrb_str_substr(mrb, str, mrb_fixnum(a1), 1);
    75   default:
    76     mrb_raise(mrb, E_TYPE_ERROR, "wrong type of argument");
    77   }
    78   /* not reached */
    79   return mrb_nil_value();
    80 }
     58  mrb_value str;
     59  uint32_t ascii_flag = 0;
     60
     61  if (cp < 0 || 0x10FFFF < cp) {
     62    mrb_raisef(mrb, E_RANGE_ERROR, "%v out of char range", num);
     63  }
     64  if (cp < 0x80) {
     65    utf8[0] = (char)cp;
     66    len = 1;
     67    ascii_flag = MRB_STR_ASCII;
     68  }
     69  else if (cp < 0x800) {
     70    utf8[0] = (char)(0xC0 | (cp >> 6));
     71    utf8[1] = (char)(0x80 | (cp & 0x3F));
     72    len = 2;
     73  }
     74  else if (cp < 0x10000) {
     75    utf8[0] = (char)(0xE0 |  (cp >> 12));
     76    utf8[1] = (char)(0x80 | ((cp >>  6) & 0x3F));
     77    utf8[2] = (char)(0x80 | ( cp        & 0x3F));
     78    len = 3;
     79  }
     80  else {
     81    utf8[0] = (char)(0xF0 |  (cp >> 18));
     82    utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
     83    utf8[2] = (char)(0x80 | ((cp >>  6) & 0x3F));
     84    utf8[3] = (char)(0x80 | ( cp        & 0x3F));
     85    len = 4;
     86  }
     87  str = mrb_str_new(mrb, utf8, len);
     88  mrb_str_ptr(str)->flags |= ascii_flag;
     89  return str;
     90}
     91#endif
    8192
    8293/*
     
    135146}
    136147
    137 static mrb_value mrb_fixnum_chr(mrb_state *mrb, mrb_value num);
    138 
    139148/*
    140149 *  call-seq:
     
    146155 *  Append---Concatenates the given object to <i>str</i>. If the object is a
    147156 *  <code>Integer</code>, it is considered as a codepoint, and is converted
    148  *  to a character before concatenation.
     157 *  to a character before concatenation
     158 *  (equivalent to <code>str.concat(integer.chr(__ENCODING__))</code>).
    149159 *
    150160 *     a = "hello "
     
    153163 */
    154164static mrb_value
    155 mrb_str_concat2(mrb_state *mrb, mrb_value self)
     165mrb_str_concat_m(mrb_state *mrb, mrb_value self)
    156166{
    157167  mrb_value str;
    158168
    159169  mrb_get_args(mrb, "o", &str);
    160   if (mrb_fixnum_p(str))
    161     str = mrb_fixnum_chr(mrb, str);
     170  if (mrb_fixnum_p(str) || mrb_float_p(str))
     171#ifdef MRB_UTF8_STRING
     172    str = int_chr_utf8(mrb, str);
     173#else
     174    str = int_chr_binary(mrb, str);
     175#endif
    162176  else
    163     str = mrb_string_type(mrb, str);
    164   mrb_str_concat(mrb, self, str);
     177    mrb_ensure_string_type(mrb, str);
     178  mrb_str_cat_str(mrb, self, str);
    165179  return self;
    166180}
     
    189203    size_t len_l, len_r;
    190204    int ai = mrb_gc_arena_save(mrb);
    191     sub = mrb_string_type(mrb, argv[i]);
     205    sub = mrb_ensure_string_type(mrb, argv[i]);
    192206    mrb_gc_arena_restore(mrb, ai);
    193207    len_l = RSTRING_LEN(self);
     
    218232    size_t len_l, len_r;
    219233    int ai = mrb_gc_arena_save(mrb);
    220     sub = mrb_string_type(mrb, argv[i]);
     234    sub = mrb_ensure_string_type(mrb, argv[i]);
    221235    mrb_gc_arena_restore(mrb, ai);
    222236    len_l = RSTRING_LEN(self);
     
    233247}
    234248
     249enum tr_pattern_type {
     250  TR_UNINITIALIZED = 0,
     251  TR_IN_ORDER  = 1,
     252  TR_RANGE = 2,
     253};
     254
     255/*
     256  #tr Pattern syntax
     257
     258  <syntax> ::= (<pattern>)* | '^' (<pattern>)*
     259  <pattern> ::= <in order> | <range>
     260  <in order> ::= (<ch>)+
     261  <range> ::= <ch> '-' <ch>
     262*/
     263struct tr_pattern {
     264  uint8_t type;         // 1:in-order, 2:range
     265  mrb_bool flag_reverse : 1;
     266  mrb_bool flag_on_heap : 1;
     267  uint16_t n;
     268  union {
     269    uint16_t start_pos;
     270    char ch[2];
     271  } val;
     272  struct tr_pattern *next;
     273};
     274
     275#define STATIC_TR_PATTERN { 0 }
     276
     277static inline void
     278tr_free_pattern(mrb_state *mrb, struct tr_pattern *pat)
     279{
     280  while (pat) {
     281    struct tr_pattern *p = pat->next;
     282    if (pat->flag_on_heap) {
     283      mrb_free(mrb, pat);
     284    }
     285    pat = p;
     286  }
     287}
     288
     289static struct tr_pattern*
     290tr_parse_pattern(mrb_state *mrb, struct tr_pattern *ret, const mrb_value v_pattern, mrb_bool flag_reverse_enable)
     291{
     292  const char *pattern = RSTRING_PTR(v_pattern);
     293  mrb_int pattern_length = RSTRING_LEN(v_pattern);
     294  mrb_bool flag_reverse = FALSE;
     295  struct tr_pattern *pat1;
     296  mrb_int i = 0;
     297
     298  if(flag_reverse_enable && pattern_length >= 2 && pattern[0] == '^') {
     299    flag_reverse = TRUE;
     300    i++;
     301  }
     302
     303  while (i < pattern_length) {
     304    /* is range pattern ? */
     305    mrb_bool const ret_uninit = (ret->type == TR_UNINITIALIZED);
     306    pat1 = ret_uninit
     307           ? ret
     308           : (struct tr_pattern*)mrb_malloc_simple(mrb, sizeof(struct tr_pattern));
     309    if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-') {
     310      if (pat1 == NULL && ret) {
     311      nomem:
     312        tr_free_pattern(mrb, ret);
     313        mrb_exc_raise(mrb, mrb_obj_value(mrb->nomem_err));
     314        return NULL;            /* not reached */
     315      }
     316      pat1->type = TR_RANGE;
     317      pat1->flag_reverse = flag_reverse;
     318      pat1->flag_on_heap = !ret_uninit;
     319      pat1->n = pattern[i+2] - pattern[i] + 1;
     320      pat1->next = NULL;
     321      pat1->val.ch[0] = pattern[i];
     322      pat1->val.ch[1] = pattern[i+2];
     323      i += 3;
     324    }
     325    else {
     326      /* in order pattern. */
     327      mrb_int start_pos = i++;
     328      mrb_int len;
     329
     330      while (i < pattern_length) {
     331        if ((i+2) < pattern_length && pattern[i] != '\\' && pattern[i+1] == '-')
     332          break;
     333        i++;
     334      }
     335
     336      len = i - start_pos;
     337      if (len > UINT16_MAX) {
     338        mrb_raise(mrb, E_ARGUMENT_ERROR, "tr pattern too long (max 65536)");
     339      }
     340      if (pat1 == NULL && ret) {
     341        goto nomem;
     342      }
     343      pat1->type = TR_IN_ORDER;
     344      pat1->flag_reverse = flag_reverse;
     345      pat1->flag_on_heap = !ret_uninit;
     346      pat1->n = len;
     347      pat1->next = NULL;
     348      pat1->val.start_pos = start_pos;
     349    }
     350
     351    if (ret == NULL || ret_uninit) {
     352      ret = pat1;
     353    }
     354    else {
     355      struct tr_pattern *p = ret;
     356      while (p->next != NULL) {
     357        p = p->next;
     358      }
     359      p->next = pat1;
     360    }
     361  }
     362
     363  return ret;
     364}
     365
     366static inline mrb_int
     367tr_find_character(const struct tr_pattern *pat, const char *pat_str, int ch)
     368{
     369  mrb_int ret = -1;
     370  mrb_int n_sum = 0;
     371  mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
     372
     373  while (pat != NULL) {
     374    if (pat->type == TR_IN_ORDER) {
     375      int i;
     376      for (i = 0; i < pat->n; i++) {
     377        if (pat_str[pat->val.start_pos + i] == ch) ret = n_sum + i;
     378      }
     379    }
     380    else if (pat->type == TR_RANGE) {
     381      if (pat->val.ch[0] <= ch && ch <= pat->val.ch[1])
     382        ret = n_sum + ch - pat->val.ch[0];
     383    }
     384    else {
     385      mrb_assert(pat->type == TR_UNINITIALIZED);
     386    }
     387    n_sum += pat->n;
     388    pat = pat->next;
     389  }
     390
     391  if (flag_reverse) {
     392    return (ret < 0) ? MRB_INT_MAX : -1;
     393  }
     394  return ret;
     395}
     396
     397static inline mrb_int
     398tr_get_character(const struct tr_pattern *pat, const char *pat_str, mrb_int n_th)
     399{
     400  mrb_int n_sum = 0;
     401
     402  while (pat != NULL) {
     403    if (n_th < (n_sum + pat->n)) {
     404      mrb_int i = (n_th - n_sum);
     405
     406      switch (pat->type) {
     407      case TR_IN_ORDER:
     408        return pat_str[pat->val.start_pos + i];
     409      case TR_RANGE:
     410        return pat->val.ch[0]+i;
     411      case TR_UNINITIALIZED:
     412        return -1;
     413      }
     414    }
     415    if (pat->next == NULL) {
     416      switch (pat->type) {
     417      case TR_IN_ORDER:
     418        return pat_str[pat->val.start_pos + pat->n - 1];
     419      case TR_RANGE:
     420        return pat->val.ch[1];
     421      case TR_UNINITIALIZED:
     422        return -1;
     423      }
     424    }
     425    n_sum += pat->n;
     426    pat = pat->next;
     427  }
     428
     429  return -1;
     430}
     431
     432static inline void
     433tr_bitmap_set(uint8_t bitmap[32], uint8_t ch)
     434{
     435  uint8_t idx1 = ch / 8;
     436  uint8_t idx2 = ch % 8;
     437  bitmap[idx1] |= (1<<idx2);
     438}
     439
     440static inline mrb_bool
     441tr_bitmap_detect(uint8_t bitmap[32], uint8_t ch)
     442{
     443  uint8_t idx1 = ch / 8;
     444  uint8_t idx2 = ch % 8;
     445  if (bitmap[idx1] & (1<<idx2))
     446    return TRUE;
     447  return FALSE;
     448}
     449
     450/* compile patter to bitmap */
     451static void
     452tr_compile_pattern(const struct tr_pattern *pat, mrb_value pstr, uint8_t bitmap[32])
     453{
     454  const char *pattern = RSTRING_PTR(pstr);
     455  mrb_int flag_reverse = pat ? pat->flag_reverse : 0;
     456  int i;
     457
     458  for (i=0; i<32; i++) {
     459    bitmap[i] = 0;
     460  }
     461  while (pat != NULL) {
     462    if (pat->type == TR_IN_ORDER) {
     463      for (i = 0; i < pat->n; i++) {
     464        tr_bitmap_set(bitmap, pattern[pat->val.start_pos + i]);
     465      }
     466    }
     467    else if (pat->type == TR_RANGE) {
     468      for (i = pat->val.ch[0]; i < pat->val.ch[1]; i++) {
     469        tr_bitmap_set(bitmap, i);
     470      }
     471    }
     472    else {
     473      mrb_assert(pat->type == TR_UNINITIALIZED);
     474    }
     475    pat = pat->next;
     476  }
     477
     478  if (flag_reverse) {
     479    for (i=0; i<32; i++) {
     480      bitmap[i] ^= 0xff;
     481    }
     482  }
     483}
     484
     485static mrb_bool
     486str_tr(mrb_state *mrb, mrb_value str, mrb_value p1, mrb_value p2, mrb_bool squeeze)
     487{
     488  struct tr_pattern pat = STATIC_TR_PATTERN;
     489  struct tr_pattern rep_storage = STATIC_TR_PATTERN;
     490  char *s;
     491  mrb_int len;
     492  mrb_int i;
     493  mrb_int j;
     494  mrb_bool flag_changed = FALSE;
     495  mrb_int lastch = -1;
     496  struct tr_pattern *rep;
     497
     498  mrb_str_modify(mrb, mrb_str_ptr(str));
     499  tr_parse_pattern(mrb, &pat, p1, TRUE);
     500  rep = tr_parse_pattern(mrb, &rep_storage, p2, FALSE);
     501  s = RSTRING_PTR(str);
     502  len = RSTRING_LEN(str);
     503
     504  for (i=j=0; i<len; i++,j++) {
     505    mrb_int n = tr_find_character(&pat, RSTRING_PTR(p1), s[i]);
     506
     507    if (i>j) s[j] = s[i];
     508    if (n >= 0) {
     509      flag_changed = TRUE;
     510      if (rep == NULL) {
     511        j--;
     512      }
     513      else {
     514        mrb_int c = tr_get_character(rep, RSTRING_PTR(p2), n);
     515
     516        if (c < 0 || (squeeze && c == lastch)) {
     517          j--;
     518          continue;
     519        }
     520        if (c > 0x80) {
     521          mrb_raisef(mrb, E_ARGUMENT_ERROR, "character (%i) out of range", c);
     522        }
     523        lastch = c;
     524        s[i] = (char)c;
     525      }
     526    }
     527  }
     528
     529  tr_free_pattern(mrb, &pat);
     530  tr_free_pattern(mrb, rep);
     531
     532  if (flag_changed) {
     533    RSTR_SET_LEN(RSTRING(str), j);
     534    RSTRING_PTR(str)[j] = 0;
     535  }
     536  return flag_changed;
     537}
     538
     539/*
     540 * call-seq:
     541 *   str.tr(from_str, to_str)   => new_str
     542 *
     543 * Returns a copy of str with the characters in from_str replaced by the
     544 * corresponding characters in to_str.  If to_str is shorter than from_str,
     545 * it is padded with its last character in order to maintain the
     546 * correspondence.
     547 *
     548 *  "hello".tr('el', 'ip')      #=> "hippo"
     549 *  "hello".tr('aeiou', '*')    #=> "h*ll*"
     550 *  "hello".tr('aeiou', 'AA*')  #=> "hAll*"
     551 *
     552 * Both strings may use the c1-c2 notation to denote ranges of characters,
     553 * and from_str may start with a ^, which denotes all characters except
     554 * those listed.
     555 *
     556 *  "hello".tr('a-y', 'b-z')    #=> "ifmmp"
     557 *  "hello".tr('^aeiou', '*')   #=> "*e**o"
     558 *
     559 * The backslash character \ can be used to escape ^ or - and is otherwise
     560 * ignored unless it appears at the end of a range or the end of the
     561 * from_str or to_str:
     562 *
     563 *
     564 *  "hello^world".tr("\\^aeiou", "*") #=> "h*ll**w*rld"
     565 *  "hello-world".tr("a\\-eo", "*")   #=> "h*ll**w*rld"
     566 *
     567 *  "hello\r\nworld".tr("\r", "")   #=> "hello\nworld"
     568 *  "hello\r\nworld".tr("\\r", "")  #=> "hello\r\nwold"
     569 *  "hello\r\nworld".tr("\\\r", "") #=> "hello\nworld"
     570 *
     571 *  "X['\\b']".tr("X\\", "")   #=> "['b']"
     572 *  "X['\\b']".tr("X-\\]", "") #=> "'b'"
     573 *
     574 *  Note: conversion is effective only in ASCII region.
     575 */
     576static mrb_value
     577mrb_str_tr(mrb_state *mrb, mrb_value str)
     578{
     579  mrb_value dup;
     580  mrb_value p1, p2;
     581
     582  mrb_get_args(mrb, "SS", &p1, &p2);
     583  dup = mrb_str_dup(mrb, str);
     584  str_tr(mrb, dup, p1, p2, FALSE);
     585  return dup;
     586}
     587
     588/*
     589 * call-seq:
     590 *   str.tr!(from_str, to_str)   -> str or nil
     591 *
     592 * Translates str in place, using the same rules as String#tr.
     593 * Returns str, or nil if no changes were made.
     594 */
     595static mrb_value
     596mrb_str_tr_bang(mrb_state *mrb, mrb_value str)
     597{
     598  mrb_value p1, p2;
     599
     600  mrb_get_args(mrb, "SS", &p1, &p2);
     601  if (str_tr(mrb, str, p1, p2, FALSE)) {
     602    return str;
     603  }
     604  return mrb_nil_value();
     605}
     606
     607/*
     608 * call-seq:
     609 *   str.tr_s(from_str, to_str)   -> new_str
     610 *
     611 * Processes a copy of str as described under String#tr, then removes
     612 * duplicate characters in regions that were affected by the translation.
     613 *
     614 *  "hello".tr_s('l', 'r')     #=> "hero"
     615 *  "hello".tr_s('el', '*')    #=> "h*o"
     616 *  "hello".tr_s('el', 'hx')   #=> "hhxo"
     617 */
     618static mrb_value
     619mrb_str_tr_s(mrb_state *mrb, mrb_value str)
     620{
     621  mrb_value dup;
     622  mrb_value p1, p2;
     623
     624  mrb_get_args(mrb, "SS", &p1, &p2);
     625  dup = mrb_str_dup(mrb, str);
     626  str_tr(mrb, dup, p1, p2, TRUE);
     627  return dup;
     628}
     629
     630/*
     631 * call-seq:
     632 *   str.tr_s!(from_str, to_str)   -> str or nil
     633 *
     634 * Performs String#tr_s processing on str in place, returning
     635 * str, or nil if no changes were made.
     636 */
     637static mrb_value
     638mrb_str_tr_s_bang(mrb_state *mrb, mrb_value str)
     639{
     640  mrb_value p1, p2;
     641
     642  mrb_get_args(mrb, "SS", &p1, &p2);
     643  if (str_tr(mrb, str, p1, p2, TRUE)) {
     644    return str;
     645  }
     646  return mrb_nil_value();
     647}
     648
     649static mrb_bool
     650str_squeeze(mrb_state *mrb, mrb_value str, mrb_value v_pat)
     651{
     652  struct tr_pattern pat_storage = STATIC_TR_PATTERN;
     653  struct tr_pattern *pat = NULL;
     654  mrb_int i, j;
     655  char *s;
     656  mrb_int len;
     657  mrb_bool flag_changed = FALSE;
     658  mrb_int lastch = -1;
     659  uint8_t bitmap[32];
     660
     661  mrb_str_modify(mrb, mrb_str_ptr(str));
     662  if (!mrb_nil_p(v_pat)) {
     663    pat = tr_parse_pattern(mrb, &pat_storage, v_pat, TRUE);
     664    tr_compile_pattern(pat, v_pat, bitmap);
     665    tr_free_pattern(mrb, pat);
     666  }
     667  s = RSTRING_PTR(str);
     668  len = RSTRING_LEN(str);
     669
     670  if (pat) {
     671    for (i=j=0; i<len; i++,j++) {
     672      if (i>j) s[j] = s[i];
     673      if (tr_bitmap_detect(bitmap, s[i]) && s[i] == lastch) {
     674        flag_changed = TRUE;
     675        j--;
     676      }
     677      lastch = s[i];
     678    }
     679  }
     680  else {
     681    for (i=j=0; i<len; i++,j++) {
     682      if (i>j) s[j] = s[i];
     683      if (s[i] >= 0 && s[i] == lastch) {
     684        flag_changed = TRUE;
     685        j--;
     686      }
     687      lastch = s[i];
     688    }
     689  }
     690
     691  if (flag_changed) {
     692    RSTR_SET_LEN(RSTRING(str), j);
     693    RSTRING_PTR(str)[j] = 0;
     694  }
     695  return flag_changed;
     696}
     697
     698/*
     699 * call-seq:
     700 *   str.squeeze([other_str])    -> new_str
     701 *
     702 * Builds a set of characters from the other_str
     703 * parameter(s) using the procedure described for String#count. Returns a
     704 * new string where runs of the same character that occur in this set are
     705 * replaced by a single character. If no arguments are given, all runs of
     706 * identical characters are replaced by a single character.
     707 *
     708 *  "yellow moon".squeeze                  #=> "yelow mon"
     709 *  "  now   is  the".squeeze(" ")         #=> " now is the"
     710 *  "putters shoot balls".squeeze("m-z")   #=> "puters shot balls"
     711 */
     712static mrb_value
     713mrb_str_squeeze(mrb_state *mrb, mrb_value str)
     714{
     715  mrb_value pat = mrb_nil_value();
     716  mrb_value dup;
     717
     718  mrb_get_args(mrb, "|S", &pat);
     719  dup = mrb_str_dup(mrb, str);
     720  str_squeeze(mrb, dup, pat);
     721  return dup;
     722}
     723
     724/*
     725 * call-seq:
     726 *   str.squeeze!([other_str])   -> str or nil
     727 *
     728 * Squeezes str in place, returning either str, or nil if no
     729 * changes were made.
     730 */
     731static mrb_value
     732mrb_str_squeeze_bang(mrb_state *mrb, mrb_value str)
     733{
     734  mrb_value pat = mrb_nil_value();
     735
     736  mrb_get_args(mrb, "|S", &pat);
     737  if (str_squeeze(mrb, str, pat)) {
     738    return str;
     739  }
     740  return mrb_nil_value();
     741}
     742
     743static mrb_bool
     744str_delete(mrb_state *mrb, mrb_value str, mrb_value v_pat)
     745{
     746  struct tr_pattern pat = STATIC_TR_PATTERN;
     747  mrb_int i, j;
     748  char *s;
     749  mrb_int len;
     750  mrb_bool flag_changed = FALSE;
     751  uint8_t bitmap[32];
     752
     753  mrb_str_modify(mrb, mrb_str_ptr(str));
     754  tr_parse_pattern(mrb, &pat, v_pat, TRUE);
     755  tr_compile_pattern(&pat, v_pat, bitmap);
     756  tr_free_pattern(mrb, &pat);
     757
     758  s = RSTRING_PTR(str);
     759  len = RSTRING_LEN(str);
     760
     761  for (i=j=0; i<len; i++,j++) {
     762    if (i>j) s[j] = s[i];
     763    if (tr_bitmap_detect(bitmap, s[i])) {
     764      flag_changed = TRUE;
     765      j--;
     766    }
     767  }
     768  if (flag_changed) {
     769    RSTR_SET_LEN(RSTRING(str), j);
     770    RSTRING_PTR(str)[j] = 0;
     771  }
     772  return flag_changed;
     773}
     774
     775static mrb_value
     776mrb_str_delete(mrb_state *mrb, mrb_value str)
     777{
     778  mrb_value pat;
     779  mrb_value dup;
     780
     781  mrb_get_args(mrb, "S", &pat);
     782  dup = mrb_str_dup(mrb, str);
     783  str_delete(mrb, dup, pat);
     784  return dup;
     785}
     786
     787static mrb_value
     788mrb_str_delete_bang(mrb_state *mrb, mrb_value str)
     789{
     790  mrb_value pat;
     791
     792  mrb_get_args(mrb, "S", &pat);
     793  if (str_delete(mrb, str, pat)) {
     794    return str;
     795  }
     796  return mrb_nil_value();
     797}
     798
     799/*
     800 * call_seq:
     801 *   str.count([other_str])   -> integer
     802 *
     803 * Each other_str parameter defines a set of characters to count.  The
     804 * intersection of these sets defines the characters to count in str.  Any
     805 * other_str that starts with a caret ^ is negated.  The sequence c1-c2
     806 * means all characters between c1 and c2.  The backslash character \ can
     807 * be used to escape ^ or - and is otherwise ignored unless it appears at
     808 * the end of a sequence or the end of a other_str.
     809 */
     810static mrb_value
     811mrb_str_count(mrb_state *mrb, mrb_value str)
     812{
     813  mrb_value v_pat = mrb_nil_value();
     814  mrb_int i;
     815  char *s;
     816  mrb_int len;
     817  mrb_int count = 0;
     818  struct tr_pattern pat = STATIC_TR_PATTERN;
     819  uint8_t bitmap[32];
     820
     821  mrb_get_args(mrb, "S", &v_pat);
     822  tr_parse_pattern(mrb, &pat, v_pat, TRUE);
     823  tr_compile_pattern(&pat, v_pat, bitmap);
     824  tr_free_pattern(mrb, &pat);
     825
     826  s = RSTRING_PTR(str);
     827  len = RSTRING_LEN(str);
     828  for (i = 0; i < len; i++) {
     829    if (tr_bitmap_detect(bitmap, s[i])) count++;
     830  }
     831  return mrb_fixnum_value(count);
     832}
     833
    235834static mrb_value
    236835mrb_str_hex(mrb_state *mrb, mrb_value self)
     
    260859}
    261860
    262 static mrb_value
    263 mrb_fixnum_chr(mrb_state *mrb, mrb_value num)
    264 {
    265   mrb_int cp = mrb_fixnum(num);
     861/*
     862 *  call-seq:
     863 *     int.chr([encoding])  ->  string
     864 *
     865 *  Returns a string containing the character represented by the +int+'s value
     866 *  according to +encoding+. +"ASCII-8BIT"+ (+"BINARY"+) and +"UTF-8"+ (only
     867 *  with +MRB_UTF8_STRING+) can be specified as +encoding+ (default is
     868 *  +"ASCII-8BIT"+).
     869 *
     870 *     65.chr                  #=> "A"
     871 *     230.chr                 #=> "\xE6"
     872 *     230.chr("ASCII-8BIT")   #=> "\xE6"
     873 *     230.chr("UTF-8")        #=> "\u00E6"
     874 */
     875static mrb_value
     876mrb_int_chr(mrb_state *mrb, mrb_value num)
     877{
     878  mrb_value enc;
     879  mrb_bool enc_given;
     880
     881  mrb_get_args(mrb, "|S?", &enc, &enc_given);
     882  if (!enc_given ||
     883      ENC_COMP_P(enc, ENC_ASCII_8BIT) ||
     884      ENC_COMP_P(enc, ENC_BINARY)) {
     885    return int_chr_binary(mrb, num);
     886  }
    266887#ifdef MRB_UTF8_STRING
    267   char utf8[4];
    268   mrb_int len;
    269 
    270   if (cp < 0 || 0x10FFFF < cp) {
    271     mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
    272   }
    273   if (cp < 0x80) {
    274     utf8[0] = (char)cp;
    275     len = 1;
    276   }
    277   else if (cp < 0x800) {
    278     utf8[0] = (char)(0xC0 | (cp >> 6));
    279     utf8[1] = (char)(0x80 | (cp & 0x3F));
    280     len = 2;
    281   }
    282   else if (cp < 0x10000) {
    283     utf8[0] = (char)(0xE0 |  (cp >> 12));
    284     utf8[1] = (char)(0x80 | ((cp >>  6) & 0x3F));
    285     utf8[2] = (char)(0x80 | ( cp        & 0x3F));
    286     len = 3;
    287   }
     888  else if (ENC_COMP_P(enc, ENC_UTF8)) {
     889    return int_chr_utf8(mrb, num);
     890  }
     891#endif
    288892  else {
    289     utf8[0] = (char)(0xF0 |  (cp >> 18));
    290     utf8[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
    291     utf8[2] = (char)(0x80 | ((cp >>  6) & 0x3F));
    292     utf8[3] = (char)(0x80 | ( cp        & 0x3F));
    293     len = 4;
    294   }
    295   return mrb_str_new(mrb, utf8, len);
    296 #else
    297   char c;
    298 
    299   if (cp < 0 || 0xff < cp) {
    300     mrb_raisef(mrb, E_RANGE_ERROR, "%S out of char range", num);
    301   }
    302   c = (char)cp;
    303   return mrb_str_new(mrb, &c, 1);
    304 #endif
    305 }
    306 
    307 /*
    308  *  call-seq:
    309  *     string.lines    ->  array of string
    310  *
    311  *  Returns strings per line;
    312  *
    313  *     a = "abc\ndef"
    314  *     a.lines    #=> ["abc\n", "def"]
    315  */
    316 static mrb_value
    317 mrb_str_lines(mrb_state *mrb, mrb_value self)
    318 {
    319   mrb_value result;
    320   mrb_value blk;
    321   int ai;
    322   mrb_int len;
    323   mrb_value arg;
    324   char *b = RSTRING_PTR(self);
    325   char *p = b, *t;
    326   char *e = b + RSTRING_LEN(self);
    327 
    328   mrb_get_args(mrb, "&", &blk);
    329 
    330   result = mrb_ary_new(mrb);
    331   ai = mrb_gc_arena_save(mrb);
    332   if (!mrb_nil_p(blk)) {
    333     while (p < e) {
    334       t = p;
    335       while (p < e && *p != '\n') p++;
    336       if (*p == '\n') p++;
    337       len = (mrb_int) (p - t);
    338       arg = mrb_str_new(mrb, t, len);
    339       mrb_yield_argv(mrb, blk, 1, &arg);
    340       mrb_gc_arena_restore(mrb, ai);
    341       if (b != RSTRING_PTR(self)) {
    342         ptrdiff_t diff = p - b;
    343         b = RSTRING_PTR(self);
    344         p = b + diff;
    345       }
    346       e = b + RSTRING_LEN(self);
    347     }
    348     return self;
    349   }
    350   while (p < e) {
    351     t = p;
    352     while (p < e && *p != '\n') p++;
    353     if (*p == '\n') p++;
    354     len = (mrb_int) (p - t);
    355     mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len));
    356     mrb_gc_arena_restore(mrb, ai);
    357   }
    358   return result;
     893    mrb_raisef(mrb, E_ARGUMENT_ERROR, "unknown encoding name - %v", enc);
     894  }
     895  /* not reached */
     896  return mrb_nil_value();
    359897}
    360898
     
    5221060#endif
    5231061
    524 static mrb_bool
    525 all_digits_p(const char *s, mrb_int len)
    526 {
    527   while (len-- > 0) {
    528     if (!ISDIGIT(*s)) return FALSE;
    529     s++;
    530   }
    531   return TRUE;
    532 }
    533 
    5341062/*
    5351063 *  call-seq:
    536  *     str.upto(other_str, exclusive=false) {|s| block }   -> str
    537  *     str.upto(other_str, exclusive=false)                -> an_enumerator
    538  *
    539  *  Iterates through successive values, starting at <i>str</i> and
    540  *  ending at <i>other_str</i> inclusive, passing each value in turn to
    541  *  the block. The <code>String#succ</code> method is used to generate
    542  *  each value.  If optional second argument exclusive is omitted or is false,
    543  *  the last value will be included; otherwise it will be excluded.
    544  *
    545  *  If no block is given, an enumerator is returned instead.
    546  *
    547  *     "a8".upto("b6") {|s| print s, ' ' }
    548  *     for s in "a8".."b6"
    549  *       print s, ' '
    550  *     end
    551  *
    552  *  <em>produces:</em>
    553  *
    554  *     a8 a9 b0 b1 b2 b3 b4 b5 b6
    555  *     a8 a9 b0 b1 b2 b3 b4 b5 b6
    556  *
    557  *  If <i>str</i> and <i>other_str</i> contains only ascii numeric characters,
    558  *  both are recognized as decimal numbers. In addition, the width of
    559  *  string (e.g. leading zeros) is handled appropriately.
    560  *
    561  *     "9".upto("11").to_a   #=> ["9", "10", "11"]
    562  *     "25".upto("5").to_a   #=> []
    563  *     "07".upto("11").to_a  #=> ["07", "08", "09", "10", "11"]
    564  */
    565 static mrb_value
    566 mrb_str_upto(mrb_state *mrb, mrb_value beg)
    567 {
    568   mrb_value end;
    569   mrb_value exclusive = mrb_false_value();
    570   mrb_value block = mrb_nil_value();
    571   mrb_value current, after_end;
    572   mrb_int n;
    573   mrb_bool excl;
    574 
    575   mrb_get_args(mrb, "o|o&", &end, &exclusive, &block);
    576 
    577   if (mrb_nil_p(block)) {
    578     return mrb_funcall(mrb, beg, "to_enum", 3, mrb_symbol_value(mrb_intern_lit(mrb, "upto")), end, exclusive);
    579   }
    580   end = mrb_string_type(mrb, end);
    581   excl = mrb_test(exclusive);
    582 
    583   /* single character */
    584   if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 &&
    585   ISASCII(RSTRING_PTR(beg)[0]) && ISASCII(RSTRING_PTR(end)[0])) {
    586     char c = RSTRING_PTR(beg)[0];
    587     char e = RSTRING_PTR(end)[0];
    588     int ai = mrb_gc_arena_save(mrb);
    589 
    590     if (c > e || (excl && c == e)) return beg;
    591     for (;;) {
    592       mrb_yield(mrb, block, mrb_str_new(mrb, &c, 1));
    593       mrb_gc_arena_restore(mrb, ai);
    594       if (!excl && c == e) break;
    595       c++;
    596       if (excl && c == e) break;
    597     }
    598     return beg;
    599   }
    600   /* both edges are all digits */
    601   if (ISDIGIT(RSTRING_PTR(beg)[0]) && ISDIGIT(RSTRING_PTR(end)[0]) &&
    602       all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
    603       all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
    604     int ai = mrb_gc_arena_save(mrb);
    605     mrb_int min_width = RSTRING_LEN(beg);
    606     mrb_int max_width = RSTRING_LEN(end);
    607     mrb_int bi = mrb_int(mrb, mrb_str_to_inum(mrb, beg, 10, FALSE));
    608     mrb_int ei = mrb_int(mrb, mrb_str_to_inum(mrb, end, 10, FALSE));
    609     mrb_value str = mrb_str_new(mrb, NULL, max_width);
    610     char *buf = RSTRING_PTR(str);
    611 
    612     while (bi <= ei) {
    613       if (excl && bi == ei) break;
    614       snprintf(buf, max_width+1, "%.*" MRB_PRId, (int)min_width, bi);
    615       mrb_yield(mrb, block, mrb_str_new(mrb, buf, strlen(buf)));
    616       mrb_gc_arena_restore(mrb, ai);
    617       bi++;
    618     }
    619 
    620     return beg;
    621   }
    622   /* normal case */
    623   n = mrb_int(mrb, mrb_funcall(mrb, beg, "<=>", 1, end));
    624   if (n > 0 || (excl && n == 0)) return beg;
    625 
    626   after_end = mrb_funcall(mrb, end, "succ", 0);
    627   current = mrb_str_dup(mrb, beg);
    628   while (!mrb_str_equal(mrb, current, after_end)) {
    629     int ai = mrb_gc_arena_save(mrb);
    630     mrb_value next = mrb_nil_value();
    631     if (excl || !mrb_str_equal(mrb, current, end))
    632       next = mrb_funcall(mrb, current, "succ", 0);
    633     mrb_yield(mrb, block, current);
    634     if (mrb_nil_p(next)) break;
    635     current = mrb_str_to_str(mrb, next);
    636     if (excl && mrb_str_equal(mrb, current, end)) break;
    637     if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
    638       break;
     1064 *     str.delete_prefix!(prefix) -> self or nil
     1065 *
     1066 *  Deletes leading <code>prefix</code> from <i>str</i>, returning
     1067 *  <code>nil</code> if no change was made.
     1068 *
     1069 *     "hello".delete_prefix!("hel") #=> "lo"
     1070 *     "hello".delete_prefix!("llo") #=> nil
     1071 */
     1072static mrb_value
     1073mrb_str_del_prefix_bang(mrb_state *mrb, mrb_value self)
     1074{
     1075  mrb_int plen, slen;
     1076  char *ptr, *s;
     1077  struct RString *str = RSTRING(self);
     1078
     1079  mrb_get_args(mrb, "s", &ptr, &plen);
     1080  slen = RSTR_LEN(str);
     1081  if (plen > slen) return mrb_nil_value();
     1082  s = RSTR_PTR(str);
     1083  if (memcmp(s, ptr, plen) != 0) return mrb_nil_value();
     1084  if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
     1085    str->as.heap.ptr += plen;
     1086  }
     1087  else {
     1088    mrb_str_modify(mrb, str);
     1089    s = RSTR_PTR(str);
     1090    memmove(s, s+plen, slen-plen);
     1091  }
     1092  RSTR_SET_LEN(str, slen-plen);
     1093  return self;
     1094}
     1095
     1096/*
     1097 *  call-seq:
     1098 *     str.delete_prefix(prefix) -> new_str
     1099 *
     1100 *  Returns a copy of <i>str</i> with leading <code>prefix</code> deleted.
     1101 *
     1102 *     "hello".delete_prefix("hel") #=> "lo"
     1103 *     "hello".delete_prefix("llo") #=> "hello"
     1104 */
     1105static mrb_value
     1106mrb_str_del_prefix(mrb_state *mrb, mrb_value self)
     1107{
     1108  mrb_int plen, slen;
     1109  char *ptr;
     1110
     1111  mrb_get_args(mrb, "s", &ptr, &plen);
     1112  slen = RSTRING_LEN(self);
     1113  if (plen > slen) return mrb_str_dup(mrb, self);
     1114  if (memcmp(RSTRING_PTR(self), ptr, plen) != 0)
     1115    return mrb_str_dup(mrb, self);
     1116  return mrb_str_substr(mrb, self, plen, slen-plen);
     1117}
     1118
     1119/*
     1120 *  call-seq:
     1121 *     str.delete_suffix!(suffix) -> self or nil
     1122 *
     1123 *  Deletes trailing <code>suffix</code> from <i>str</i>, returning
     1124 *  <code>nil</code> if no change was made.
     1125 *
     1126 *     "hello".delete_suffix!("llo") #=> "he"
     1127 *     "hello".delete_suffix!("hel") #=> nil
     1128 */
     1129static mrb_value
     1130mrb_str_del_suffix_bang(mrb_state *mrb, mrb_value self)
     1131{
     1132  mrb_int plen, slen;
     1133  char *ptr, *s;
     1134  struct RString *str = RSTRING(self);
     1135
     1136  mrb_get_args(mrb, "s", &ptr, &plen);
     1137  slen = RSTR_LEN(str);
     1138  if (plen > slen) return mrb_nil_value();
     1139  s = RSTR_PTR(str);
     1140  if (memcmp(s+slen-plen, ptr, plen) != 0) return mrb_nil_value();
     1141  if (!mrb_frozen_p(str) && (RSTR_SHARED_P(str) || RSTR_FSHARED_P(str))) {
     1142    /* no need to modify string */
     1143  }
     1144  else {
     1145    mrb_str_modify(mrb, str);
     1146  }
     1147  RSTR_SET_LEN(str, slen-plen);
     1148  return self;
     1149}
     1150
     1151/*
     1152 *  call-seq:
     1153 *     str.delete_suffix(suffix) -> new_str
     1154 *
     1155 *  Returns a copy of <i>str</i> with leading <code>suffix</code> deleted.
     1156 *
     1157 *     "hello".delete_suffix("hel") #=> "lo"
     1158 *     "hello".delete_suffix("llo") #=> "hello"
     1159 */
     1160static mrb_value
     1161mrb_str_del_suffix(mrb_state *mrb, mrb_value self)
     1162{
     1163  mrb_int plen, slen;
     1164  char *ptr;
     1165
     1166  mrb_get_args(mrb, "s", &ptr, &plen);
     1167  slen = RSTRING_LEN(self);
     1168  if (plen > slen) return mrb_str_dup(mrb, self);
     1169  if (memcmp(RSTRING_PTR(self)+slen-plen, ptr, plen) != 0)
     1170    return mrb_str_dup(mrb, self);
     1171  return mrb_str_substr(mrb, self, 0, slen-plen);
     1172}
     1173
     1174static mrb_value
     1175mrb_str_lines(mrb_state *mrb, mrb_value self)
     1176{
     1177  mrb_value result;
     1178  int ai;
     1179  mrb_int len;
     1180  char *b = RSTRING_PTR(self);
     1181  char *p = b, *t;
     1182  char *e = b + RSTRING_LEN(self);
     1183
     1184  result = mrb_ary_new(mrb);
     1185  ai = mrb_gc_arena_save(mrb);
     1186  while (p < e) {
     1187    t = p;
     1188    while (p < e && *p != '\n') p++;
     1189    if (*p == '\n') p++;
     1190    len = (mrb_int) (p - t);
     1191    mrb_ary_push(mrb, result, mrb_str_new(mrb, t, len));
    6391192    mrb_gc_arena_restore(mrb, ai);
    6401193  }
    641 
    642   return beg;
     1194  return result;
    6431195}
    6441196
     
    6491201
    6501202  mrb_define_method(mrb, s, "dump",            mrb_str_dump,            MRB_ARGS_NONE());
    651   mrb_define_method(mrb, s, "getbyte",         mrb_str_getbyte,         MRB_ARGS_REQ(1));
    652   mrb_define_method(mrb, s, "setbyte",         mrb_str_setbyte,         MRB_ARGS_REQ(2));
    653   mrb_define_method(mrb, s, "byteslice",       mrb_str_byteslice,       MRB_ARGS_REQ(1)|MRB_ARGS_OPT(1));
    6541203  mrb_define_method(mrb, s, "swapcase!",       mrb_str_swapcase_bang,   MRB_ARGS_NONE());
    6551204  mrb_define_method(mrb, s, "swapcase",        mrb_str_swapcase,        MRB_ARGS_NONE());
    656   mrb_define_method(mrb, s, "concat",          mrb_str_concat2,         MRB_ARGS_REQ(1));
    657   mrb_define_method(mrb, s, "<<",              mrb_str_concat2,         MRB_ARGS_REQ(1));
     1205  mrb_define_method(mrb, s, "concat",          mrb_str_concat_m,        MRB_ARGS_REQ(1));
     1206  mrb_define_method(mrb, s, "<<",              mrb_str_concat_m,        MRB_ARGS_REQ(1));
     1207  mrb_define_method(mrb, s, "count",           mrb_str_count,           MRB_ARGS_REQ(1));
     1208  mrb_define_method(mrb, s, "tr",              mrb_str_tr,              MRB_ARGS_REQ(2));
     1209  mrb_define_method(mrb, s, "tr!",             mrb_str_tr_bang,         MRB_ARGS_REQ(2));
     1210  mrb_define_method(mrb, s, "tr_s",            mrb_str_tr_s,            MRB_ARGS_REQ(2));
     1211  mrb_define_method(mrb, s, "tr_s!",           mrb_str_tr_s_bang,       MRB_ARGS_REQ(2));
     1212  mrb_define_method(mrb, s, "squeeze",         mrb_str_squeeze,         MRB_ARGS_OPT(1));
     1213  mrb_define_method(mrb, s, "squeeze!",        mrb_str_squeeze_bang,    MRB_ARGS_OPT(1));
     1214  mrb_define_method(mrb, s, "delete",          mrb_str_delete,          MRB_ARGS_REQ(1));
     1215  mrb_define_method(mrb, s, "delete!",         mrb_str_delete_bang,     MRB_ARGS_REQ(1));
    6581216  mrb_define_method(mrb, s, "start_with?",     mrb_str_start_with,      MRB_ARGS_REST());
    6591217  mrb_define_method(mrb, s, "end_with?",       mrb_str_end_with,        MRB_ARGS_REST());
     
    6611219  mrb_define_method(mrb, s, "oct",             mrb_str_oct,             MRB_ARGS_NONE());
    6621220  mrb_define_method(mrb, s, "chr",             mrb_str_chr,             MRB_ARGS_NONE());
    663   mrb_define_method(mrb, s, "lines",           mrb_str_lines,           MRB_ARGS_NONE());
    6641221  mrb_define_method(mrb, s, "succ",            mrb_str_succ,            MRB_ARGS_NONE());
    6651222  mrb_define_method(mrb, s, "succ!",           mrb_str_succ_bang,       MRB_ARGS_NONE());
    666   mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next"), mrb_intern_lit(mrb, "succ"));
    667   mrb_alias_method(mrb, s, mrb_intern_lit(mrb, "next!"), mrb_intern_lit(mrb, "succ!"));
    668   mrb_define_method(mrb, s, "ord", mrb_str_ord, MRB_ARGS_NONE());
    669   mrb_define_method(mrb, s, "upto", mrb_str_upto, MRB_ARGS_ANY());
    670 
    671   mrb_define_method(mrb, mrb->fixnum_class, "chr", mrb_fixnum_chr, MRB_ARGS_NONE());
     1223  mrb_define_method(mrb, s, "next",            mrb_str_succ,            MRB_ARGS_NONE());
     1224  mrb_define_method(mrb, s, "next!",           mrb_str_succ_bang,       MRB_ARGS_NONE());
     1225  mrb_define_method(mrb, s, "ord",             mrb_str_ord,             MRB_ARGS_NONE());
     1226  mrb_define_method(mrb, s, "delete_prefix!",  mrb_str_del_prefix_bang, MRB_ARGS_REQ(1));
     1227  mrb_define_method(mrb, s, "delete_prefix",   mrb_str_del_prefix,      MRB_ARGS_REQ(1));
     1228  mrb_define_method(mrb, s, "delete_suffix!",  mrb_str_del_suffix_bang, MRB_ARGS_REQ(1));
     1229  mrb_define_method(mrb, s, "delete_suffix",   mrb_str_del_suffix,      MRB_ARGS_REQ(1));
     1230
     1231  mrb_define_method(mrb, s, "__lines",         mrb_str_lines,           MRB_ARGS_NONE());
     1232
     1233  mrb_define_method(mrb, mrb_module_get(mrb, "Integral"), "chr", mrb_int_chr, MRB_ARGS_OPT(1));
    6721234}
    6731235
  • EcnlProtoTool/trunk/mruby-2.1.1/mrbgems/mruby-string-ext/test/string.rb

    r331 r439  
     1# coding: utf-8
    12##
    23# String(Ext) Test
    34
    4 UTF8STRING = ("\343\201\202".size == 1)
    5 
    6 assert('String.try_convert') do
    7   assert_nil String.try_convert(nil)
    8   assert_nil String.try_convert(:foo)
    9   assert_equal "", String.try_convert("")
    10   assert_equal "1,2,3", String.try_convert("1,2,3")
    11 end
    12 
    13 assert('String#getbyte') do
    14   str1 = "hello"
    15   bytes1 = [104, 101, 108, 108, 111]
    16   assert_equal bytes1[0], str1.getbyte(0)
    17   assert_equal bytes1[-1], str1.getbyte(-1)
    18   assert_equal bytes1[6], str1.getbyte(6)
    19 
    20   str2 = "\xFF"
    21   bytes2 = [0xFF]
    22   assert_equal bytes2[0], str2.getbyte(0)
    23 end
    24 
    25 assert('String#setbyte') do
    26   str1 = "hello"
    27   h = "H".getbyte(0)
    28   str1.setbyte(0, h)
    29   assert_equal(h, str1.getbyte(0))
    30   assert_equal("Hello", str1)
    31 end
    32 
    33 assert("String#setbyte raises IndexError if arg conversion resizes String") do
    34   $s = "01234\n"
    35   class Tmp
    36       def to_i
    37           $s.chomp! ''
    38           95
    39       end
    40   end
    41   tmp = Tmp.new
    42   assert_raise(IndexError) { $s.setbyte(5, tmp) }
    43 end
    44 
    45 assert('String#byteslice') do
    46   str1 = "hello"
    47   assert_equal("e", str1.byteslice(1))
    48   assert_equal("o", str1.byteslice(-1))
    49   assert_equal("ell", str1.byteslice(1..3))
    50   assert_equal("el", str1.byteslice(1...3))
     5UTF8STRING = __ENCODING__ == "UTF-8"
     6
     7def assert_upto(exp, receiver, *args)
     8  act = []
     9  receiver.upto(*args) { |v| act << v }
     10  assert_equal exp, act
    5111end
    5212
    5313assert('String#dump') do
    54   ("\1" * 100).dump     # should not raise an exception - regress #1210
    55   "\0".inspect == "\"\\000\"" and
    56   "foo".dump == "\"foo\""
     14  assert_equal("\"\\x00\"", "\0".dump)
     15  assert_equal("\"foo\"", "foo".dump)
     16  assert_equal('"\xe3\x82\x8b"', "る".dump)
     17  assert_nothing_raised { ("\1" * 100).dump }   # regress #1210
    5718end
    5819
    5920assert('String#strip') do
    6021  s = "  abc  "
    61   "".strip == "" and " \t\r\n\f\v".strip == "" and
    62   "\0a\0".strip == "\0a" and
    63   "abc".strip     == "abc" and
    64   "  abc".strip   == "abc" and
    65   "abc  ".strip   == "abc" and
    66   "  abc  ".strip == "abc" and
    67   s == "  abc  "
     22  assert_equal("abc", s.strip)
     23  assert_equal("  abc  ", s)
     24  assert_equal("", "".strip)
     25  assert_equal("", " \t\r\n\f\v".strip)
     26  assert_equal("\0a", "\0a\0".strip)
     27  assert_equal("abc", "abc".strip)
     28  assert_equal("abc", "  abc".strip)
     29  assert_equal("abc", "abc  ".strip)
    6830end
    6931
    7032assert('String#lstrip') do
    7133  s = "  abc  "
    72   s.lstrip
    73   "".lstrip == "" and " \t\r\n\f\v".lstrip == "" and
    74   "\0a\0".lstrip == "\0a\0" and
    75   "abc".lstrip     == "abc"   and
    76   "  abc".lstrip   == "abc"   and
    77   "abc  ".lstrip   == "abc  " and
    78   "  abc  ".lstrip == "abc  " and
    79   s == "  abc  "
     34  assert_equal("abc  ", s.lstrip)
     35  assert_equal("  abc  ", s)
     36  assert_equal("", "".lstrip)
     37  assert_equal("", " \t\r\n\f\v".lstrip)
     38  assert_equal("\0a\0", "\0a\0".lstrip)
     39  assert_equal("abc", "abc".lstrip)
     40  assert_equal("abc", "  abc".lstrip)
     41  assert_equal("abc  ", "abc  ".lstrip)
    8042end
    8143
    8244assert('String#rstrip') do
    8345  s = "  abc  "
    84   s.rstrip
    85   "".rstrip == "" and " \t\r\n\f\v".rstrip == "" and
    86   "\0a\0".rstrip == "\0a" and
    87   "abc".rstrip     == "abc"   and
    88   "  abc".rstrip   == "  abc" and
    89   "abc  ".rstrip   == "abc"   and
    90   "  abc  ".rstrip == "  abc" and
    91   s == "  abc  "
     46  assert_equal("  abc", s.rstrip)
     47  assert_equal("  abc  ", s)
     48  assert_equal("", "".rstrip)
     49  assert_equal("", " \t\r\n\f\v".rstrip)
     50  assert_equal("\0a", "\0a\0".rstrip)
     51  assert_equal("abc", "abc".rstrip)
     52  assert_equal("  abc", "  abc".rstrip)
     53  assert_equal("abc", "abc  ".rstrip)
    9254end
    9355
     
    9557  s = "  abc  "
    9658  t = "abc"
    97   s.strip! == "abc" and s == "abc" and t.strip! == nil
     59  assert_equal("abc", s.strip!)
     60  assert_equal("abc", s)
     61  assert_nil(t.strip!)
     62  assert_equal("abc", t)
    9863end
    9964
     
    10166  s = "  abc  "
    10267  t = "abc  "
    103   s.lstrip! == "abc  " and s == "abc  " and t.lstrip! == nil
     68  assert_equal("abc  ", s.lstrip!)
     69  assert_equal("abc  ", s)
     70  assert_nil(t.lstrip!)
     71  assert_equal("abc  ", t)
    10472end
    10573
     
    10775  s = "  abc  "
    10876  t = "  abc"
    109   s.rstrip! == "  abc" and s == "  abc" and t.rstrip! == nil
     77  assert_equal("  abc", s.rstrip!)
     78  assert_equal("  abc", s)
     79  assert_nil(t.rstrip!)
     80  assert_equal("  abc", t)
    11081end
    11182
     
    12596  assert_equal "Hello World!", "Hello " << "World" << 33
    12697  assert_equal "Hello World!", "Hello ".concat("World").concat(33)
    127 
    128   o = Object.new
    129   def o.to_str
    130     "to_str"
    131   end
    132   assert_equal "hi to_str", "hi " << o
    133 
    13498  assert_raise(TypeError) { "".concat(Object.new) }
     99
     100  if UTF8STRING
     101    assert_equal "H«", "H" << 0xab
     102    assert_equal "Hは", "H" << 12399
     103  else
     104    assert_equal "H\xab", "H" << 0xab
     105    assert_raise(RangeError) { "H" << 12399 }
     106  end
    135107end
    136108
     
    140112  assert_equal(-1, "abcdef".casecmp("abcdefg"))
    141113  assert_equal 0, "abcdef".casecmp("ABCDEF")
    142   o = Object.new
    143   def o.to_str
    144     "ABCDEF"
    145   end
    146   assert_equal 0, "abcdef".casecmp(o)
     114end
     115
     116assert('String#count') do
     117  s = "abccdeff123"
     118  assert_equal 0, s.count("")
     119  assert_equal 1, s.count("a")
     120  assert_equal 2, s.count("ab")
     121  assert_equal 9, s.count("^c")
     122  assert_equal 8, s.count("a-z")
     123  assert_equal 4, s.count("a0-9")
     124end
     125
     126assert('String#tr') do
     127  assert_equal "ABC", "abc".tr('a-z', 'A-Z')
     128  assert_equal "hippo", "hello".tr('el', 'ip')
     129  assert_equal "Ruby", "Lisp".tr("Lisp", "Ruby")
     130  assert_equal "*e**o", "hello".tr('^aeiou', '*')
     131  assert_equal "heo", "hello".tr('l', '')
     132end
     133
     134assert('String#tr!') do
     135  s = "abcdefghijklmnopqR"
     136  assert_equal "ab12222hijklmnopqR", s.tr!("cdefg", "12")
     137  assert_equal "ab12222hijklmnopqR", s
     138end
     139
     140assert('String#tr_s') do
     141  assert_equal "hero", "hello".tr_s('l', 'r')
     142  assert_equal "h*o", "hello".tr_s('el', '*')
     143  assert_equal "hhxo", "hello".tr_s('el', 'hx')
     144end
     145
     146assert('String#tr_s!') do
     147  s = "hello"
     148  assert_equal "hero", s.tr_s!('l', 'r')
     149  assert_equal "hero", s
     150  assert_nil s.tr_s!('l', 'r')
     151end
     152
     153assert('String#squeeze') do
     154  assert_equal "yelow mon", "yellow moon".squeeze
     155  assert_equal " now is the", "  now   is  the".squeeze(" ")
     156  assert_equal "puters shot balls", "putters shoot balls".squeeze("m-z")
     157end
     158
     159assert('String#squeeze!') do
     160  s = "  now   is  the"
     161  assert_equal " now is the", s.squeeze!(" ")
     162  assert_equal " now is the", s
     163end
     164
     165assert('String#delete') do
     166  assert_equal "he", "hello".delete("lo")
     167  assert_equal "hll", "hello".delete("aeiou")
     168  assert_equal "ll", "hello".delete("^l")
     169  assert_equal "ho", "hello".delete("ej-m")
     170end
     171
     172assert('String#delete!') do
     173  s = "hello"
     174  assert_equal "he", s.delete!("lo")
     175  assert_equal "he", s
     176  assert_nil s.delete!("lz")
    147177end
    148178
     
    202232  assert_equal 8, "010".oct
    203233  assert_equal (-8), "-10".oct
    204 end
    205 
    206 assert('String#chr') do
    207   assert_equal "a", "abcde".chr
    208   # test Fixnum#chr as well
    209   assert_equal "a", 97.chr
    210234end
    211235
     
    496520
    497521assert('String#upto') do
    498   assert_equal %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8".upto("b6").to_a
    499   assert_equal ["9", "10", "11"], "9".upto("11").to_a
    500   assert_equal [], "25".upto("5").to_a
    501   assert_equal ["07", "08", "09", "10", "11"], "07".upto("11").to_a
    502 
    503 if UTF8STRING
    504   assert_equal ["あ", "ぃ", "い", "ぅ", "う", "ぇ", "え", "ぉ", "お"], "あ".upto("お").to_a
    505 end
    506 
    507   assert_equal ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9".upto("A").to_a
     522  assert_upto %w(a8 a9 b0 b1 b2 b3 b4 b5 b6), "a8", "b6"
     523  assert_upto ["9", "10", "11"], "9", "11"
     524  assert_upto [], "25", "5"
     525  assert_upto ["07", "08", "09", "10", "11"], "07", "11"
     526  assert_upto ["9", ":", ";", "<", "=", ">", "?", "@", "A"], "9", "A"
     527
     528  if UTF8STRING
     529    assert_upto %w(あ ぃ い ぅ う ぇ え ぉ お), "あ", "お"
     530  end
    508531
    509532  a     = "aa"
     
    587610
    588611assert('String#chr') do
     612  assert_equal "a", "abcde".chr
    589613  assert_equal "h", "hello!".chr
    590 end
     614  assert_equal "", "".chr
     615end
     616
    591617assert('String#chr(UTF-8)') do
    592618  assert_equal "こ", "こんにちは世界!".chr
     
    614640
    615641assert('String#each_char') do
    616   s = ""
     642  chars = []
    617643  "hello!".each_char do |x|
    618     s += x
    619   end
    620   assert_equal "hello!", s
     644    chars << x
     645  end
     646  assert_equal ["h", "e", "l", "l", "o", "!"], chars
    621647end
    622648
    623649assert('String#each_char(UTF-8)') do
    624   s = ""
     650  chars = []
    625651  "こんにちは世界!".each_char do |x|
    626     s += x
    627   end
    628   assert_equal "こんにちは世界!", s
     652    chars << x
     653  end
     654  assert_equal ["こ", "ん", "に", "ち", "は", "世", "界", "!"], chars
    629655end if UTF8STRING
    630656
     
    666692  assert_equal expect, cp
    667693end if UTF8STRING
     694
     695assert('String#delete_prefix') do
     696  assert_equal "llo", "hello".delete_prefix("he")
     697  assert_equal "hello", "hello".delete_prefix("llo")
     698  assert_equal "llo", "hello".delete_prefix!("he")
     699  assert_nil "hello".delete_prefix!("llo")
     700end
     701
     702assert('String#delete_suffix') do
     703  assert_equal "he", "hello".delete_suffix("llo")
     704  assert_equal "hello", "hello".delete_suffix("he")
     705  assert_equal "he", "hello".delete_suffix!("llo")
     706  assert_nil "hello".delete_suffix!("he")
     707end
Note: See TracChangeset for help on using the changeset viewer.