Changeset 331 for EcnlProtoTool/trunk/mrbgems/mruby-pack
- Timestamp:
- Jan 21, 2018, 12:10:09 AM (6 years ago)
- Location:
- EcnlProtoTool/trunk/mrbgems/mruby-pack
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/mrbgems/mruby-pack/README.md
r279 r331 39 39 - S : 16-bit unsigned, native endian (`uint16_t`) 40 40 - s : 16-bit signed, native endian (`int16_t`) 41 - U : UTF-8 character 41 42 - V : 32-bit unsigned, VAX (little-endian) byte order 42 43 - v : 16-bit unsigned, VAX (little-endian) byte order 44 - x : null byte 43 45 - Z : same as "a", except that null is added with * 46 44 47 45 48 -
EcnlProtoTool/trunk/mrbgems/mruby-pack/src/pack.c
r321 r331 78 78 hex2int(unsigned char ch) 79 79 { 80 if (ch >= '0' && ch <= '9') 80 if (ch >= '0' && ch <= '9') 81 81 return ch - '0'; 82 82 else if (ch >= 'A' && ch <= 'F') … … 415 415 char utf8[4]; 416 416 int len; 417 418 unsigned long c = mrb_fixnum(o); 417 unsigned long c = 0; 418 419 if (mrb_float_p(o)) { 420 goto range_error; 421 } 422 c = mrb_fixnum(o); 419 423 420 424 /* Unicode character */ … … 435 439 len = 3; 436 440 } 437 else {441 else if (c < 0x200000) { 438 442 utf8[0] = (char)(0xF0 | (c >> 18) ); 439 443 utf8[1] = (char)(0x80 | ((c >> 12) & 0x3F)); … … 442 446 len = 4; 443 447 } 444 448 else { 449 range_error: 450 mrb_raise(mrb, E_RANGE_ERROR, "pack(U): value out of range"); 451 } 452 445 453 str = str_len_ensure(mrb, str, sidx + len); 446 454 memcpy(RSTRING_PTR(str) + sidx, utf8, len); 447 455 448 456 return len; 457 } 458 459 static const unsigned long utf8_limits[] = { 460 0x0, /* 1 */ 461 0x80, /* 2 */ 462 0x800, /* 3 */ 463 0x10000, /* 4 */ 464 0x200000, /* 5 */ 465 0x4000000, /* 6 */ 466 0x80000000, /* 7 */ 467 }; 468 469 static unsigned long 470 utf8_to_uv(mrb_state *mrb, const char *p, long *lenp) 471 { 472 int c = *p++ & 0xff; 473 unsigned long uv = c; 474 long n; 475 476 if (!(uv & 0x80)) { 477 *lenp = 1; 478 return uv; 479 } 480 if (!(uv & 0x40)) { 481 *lenp = 1; 482 mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character"); 483 } 484 485 if (!(uv & 0x20)) { n = 2; uv &= 0x1f; } 486 else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; } 487 else if (!(uv & 0x08)) { n = 4; uv &= 0x07; } 488 else if (!(uv & 0x04)) { n = 5; uv &= 0x03; } 489 else if (!(uv & 0x02)) { n = 6; uv &= 0x01; } 490 else { 491 *lenp = 1; 492 mrb_raise(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character"); 493 } 494 if (n > *lenp) { 495 mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character (expected %S bytes, given %S bytes)", 496 mrb_fixnum_value(n), mrb_fixnum_value(*lenp)); 497 } 498 *lenp = n--; 499 if (n != 0) { 500 while (n--) { 501 c = *p++ & 0xff; 502 if ((c & 0xc0) != 0x80) { 503 *lenp -= n + 1; 504 mrb_raisef(mrb, E_ARGUMENT_ERROR, "malformed UTF-8 character"); 505 } 506 else { 507 c &= 0x3f; 508 uv = uv << 6 | c; 509 } 510 } 511 } 512 n = *lenp - 1; 513 if (uv < utf8_limits[n]) { 514 mrb_raisef(mrb, E_ARGUMENT_ERROR, "redundant UTF-8 sequence"); 515 } 516 return uv; 517 } 518 519 static int 520 unpack_utf8(mrb_state *mrb, const unsigned char * src, int srclen, mrb_value ary, unsigned int flags) 521 { 522 unsigned long uv; 523 long lenp = srclen; 524 525 if (srclen == 0) { 526 return 1; 527 } 528 uv = utf8_to_uv(mrb, (const char *)src, &lenp); 529 mrb_ary_push(mrb, ary, mrb_fixnum_value((mrb_int)uv)); 530 return (int)lenp; 449 531 } 450 532 … … 483 565 *dptr++ = pad; 484 566 } 485 567 486 568 return dptr - dptr0; 487 569 } … … 542 624 slen = count; 543 625 } 544 626 545 627 dst = str_len_ensure(mrb, dst, didx + count); 546 628 dptr = RSTRING_PTR(dst) + didx; … … 1148 1230 srcidx += unpack_double(mrb, sptr, srclen - srcidx, result, flags); 1149 1231 break; 1232 case PACK_DIR_UTF8: 1233 srcidx += unpack_utf8(mrb, sptr, srclen - srcidx, result, flags); 1234 break; 1235 default: 1236 mrb_raise(mrb, E_RUNTIME_ERROR, "mruby-pack's bug"); 1150 1237 } 1151 1238 if (count > 0) { -
EcnlProtoTool/trunk/mrbgems/mruby-pack/test/pack.rb
r321 r331 146 146 assert_pack 'I', str, [12345] 147 147 end 148 149 assert 'pack/unpack "U"' do 150 assert_equal [], "".unpack("U") 151 assert_equal [], "".unpack("U*") 152 assert_equal [65, 66], "ABC".unpack("U2") 153 assert_equal [12371, 12435, 12395, 12385, 12399, 19990, 30028], "こんにちは世界".unpack("U*") 154 155 assert_equal "", [].pack("U") 156 assert_equal "", [].pack("U*") 157 assert_equal "AB", [65, 66, 67].pack("U2") 158 assert_equal "こんにちは世界", [12371, 12435, 12395, 12385, 12399, 19990, 30028].pack("U*") 159 160 assert_equal "\000", [0].pack("U") 161 162 assert_raise(RangeError) { [-0x40000000].pack("U") } 163 assert_raise(RangeError) { [-1].pack("U") } 164 assert_raise(RangeError) { [0x40000000].pack("U") } 165 end
Note:
See TracChangeset
for help on using the changeset viewer.