Changeset 331 for EcnlProtoTool/trunk/tcc-0.9.27/x86_64-gen.c
- Timestamp:
- Jan 21, 2018, 12:10:09 AM (6 years ago)
- Location:
- EcnlProtoTool/trunk/tcc-0.9.27
- Files:
-
- 1 edited
- 1 moved
Legend:
- Unmodified
- Added
- Removed
-
EcnlProtoTool/trunk/tcc-0.9.27/x86_64-gen.c
r321 r331 24 24 25 25 /* number of available registers */ 26 #define NB_REGS 5 27 #define NB_ASM_REGS 8 26 #define NB_REGS 25 27 #define NB_ASM_REGS 16 28 #define CONFIG_TCC_ASM 28 29 29 30 /* a register can belong to several classes. The classes must be … … 35 36 #define RC_RCX 0x0008 36 37 #define RC_RDX 0x0010 38 #define RC_ST0 0x0080 /* only for long double */ 37 39 #define RC_R8 0x0100 38 40 #define RC_R9 0x0200 39 41 #define RC_R10 0x0400 40 42 #define RC_R11 0x0800 41 #define RC_XMM0 0x0020 42 #define RC_ST0 0x0040 /* only for long double */ 43 #define RC_XMM0 0x1000 44 #define RC_XMM1 0x2000 45 #define RC_XMM2 0x4000 46 #define RC_XMM3 0x8000 47 #define RC_XMM4 0x10000 48 #define RC_XMM5 0x20000 49 #define RC_XMM6 0x40000 50 #define RC_XMM7 0x80000 43 51 #define RC_IRET RC_RAX /* function return: integer register */ 44 52 #define RC_LRET RC_RDX /* function return: second integer register */ 45 53 #define RC_FRET RC_XMM0 /* function return: float register */ 54 #define RC_QRET RC_XMM1 /* function return: second float register */ 46 55 47 56 /* pretty names for the registers */ … … 50 59 TREG_RCX = 1, 51 60 TREG_RDX = 2, 52 TREG_XMM0 = 3, 53 TREG_ST0 = 4, 54 61 TREG_RSP = 4, 55 62 TREG_RSI = 6, 56 63 TREG_RDI = 7, 64 57 65 TREG_R8 = 8, 58 66 TREG_R9 = 9, 59 60 67 TREG_R10 = 10, 61 68 TREG_R11 = 11, 62 69 63 TREG_MEM = 0x10, 70 TREG_XMM0 = 16, 71 TREG_XMM1 = 17, 72 TREG_XMM2 = 18, 73 TREG_XMM3 = 19, 74 TREG_XMM4 = 20, 75 TREG_XMM5 = 21, 76 TREG_XMM6 = 22, 77 TREG_XMM7 = 23, 78 79 TREG_ST0 = 24, 80 81 TREG_MEM = 0x20 64 82 }; 65 83 … … 71 89 #define REG_LRET TREG_RDX /* second word return register (for long long) */ 72 90 #define REG_FRET TREG_XMM0 /* float return register */ 91 #define REG_QRET TREG_XMM1 /* second float return register */ 73 92 74 93 /* defined if function parameters must be evaluated in reverse order */ … … 80 99 /* long double size and alignment, in bytes */ 81 100 #define LDOUBLE_SIZE 16 82 #define LDOUBLE_ALIGN 8101 #define LDOUBLE_ALIGN 16 83 102 /* maximum alignment (for aligned attribute support) */ 84 #define MAX_ALIGN 8 85 86 /******************************************************/ 87 /* ELF defines */ 88 89 #define EM_TCC_TARGET EM_X86_64 90 91 /* relocation type for 32 bit data relocation */ 92 #define R_DATA_32 R_X86_64_32 93 #define R_DATA_PTR R_X86_64_64 94 #define R_JMP_SLOT R_X86_64_JUMP_SLOT 95 #define R_COPY R_X86_64_COPY 96 97 #define ELF_START_ADDR 0x08048000 98 #define ELF_PAGE_SIZE 0x1000 103 #define MAX_ALIGN 16 99 104 100 105 /******************************************************/ … … 104 109 #include <assert.h> 105 110 106 ST_DATA const int reg_classes[NB_REGS +7] = {111 ST_DATA const int reg_classes[NB_REGS] = { 107 112 /* eax */ RC_INT | RC_RAX, 108 113 /* ecx */ RC_INT | RC_RCX, 109 114 /* edx */ RC_INT | RC_RDX, 110 /* xmm0 */ RC_FLOAT | RC_XMM0,111 /* st0 */ RC_ST0,112 115 0, 113 116 0, 114 117 0, 115 RC_INT | RC_R8, 116 RC_INT | RC_R9, 117 RC_INT | RC_R10, 118 RC_INT | RC_R11 118 0, 119 0, 120 RC_R8, 121 RC_R9, 122 RC_R10, 123 RC_R11, 124 0, 125 0, 126 0, 127 0, 128 /* xmm0 */ RC_FLOAT | RC_XMM0, 129 /* xmm1 */ RC_FLOAT | RC_XMM1, 130 /* xmm2 */ RC_FLOAT | RC_XMM2, 131 /* xmm3 */ RC_FLOAT | RC_XMM3, 132 /* xmm4 */ RC_FLOAT | RC_XMM4, 133 /* xmm5 */ RC_FLOAT | RC_XMM5, 134 /* xmm6 an xmm7 are included so gv() can be used on them, 135 but they are not tagged with RC_FLOAT because they are 136 callee saved on Windows */ 137 RC_XMM6, 138 RC_XMM7, 139 /* st0 */ RC_ST0 119 140 }; 120 141 … … 123 144 124 145 /* XXX: make it faster ? */ 125 void g(int c)146 ST_FUNC void g(int c) 126 147 { 127 148 int ind1; 149 if (nocode_wanted) 150 return; 128 151 ind1 = ind + 1; 129 152 if (ind1 > cur_text_section->data_allocated) … … 133 156 } 134 157 135 void o(unsigned int c)158 ST_FUNC void o(unsigned int c) 136 159 { 137 160 while (c) { … … 141 164 } 142 165 143 void gen_le16(int v)166 ST_FUNC void gen_le16(int v) 144 167 { 145 168 g(v); … … 147 170 } 148 171 149 void gen_le32(int c)172 ST_FUNC void gen_le32(int c) 150 173 { 151 174 g(c); … … 155 178 } 156 179 157 void gen_le64(int64_t c)180 ST_FUNC void gen_le64(int64_t c) 158 181 { 159 182 g(c); … … 167 190 } 168 191 169 void orex(int ll, int r, int r2, int b)192 static void orex(int ll, int r, int r2, int b) 170 193 { 171 194 if ((r & VT_VALMASK) >= VT_CONST) … … 179 202 180 203 /* output a symbol and patch all calls to it */ 181 void gsym_addr(int t, int a) 182 { 183 int n, *ptr; 204 ST_FUNC void gsym_addr(int t, int a) 205 { 184 206 while (t) { 185 ptr = (int *)(cur_text_section->data + t);186 n = *ptr; /* next value */187 *ptr = a - t - 4;207 unsigned char *ptr = cur_text_section->data + t; 208 uint32_t n = read32le(ptr); /* next value */ 209 write32le(ptr, a - t - 4); 188 210 t = n; 189 211 } … … 195 217 } 196 218 197 /* psym is used to put an instruction with a data field which is a198 reference to a symbol. It is in fact the same as oad ! */199 #define psym oad200 219 201 220 static int is64_type(int t) … … 206 225 } 207 226 208 static int is_sse_float(int t) {209 int bt;210 bt = t & VT_BTYPE;211 return bt == VT_DOUBLE || bt == VT_FLOAT;212 }213 214 215 227 /* instruction + 4 bytes data. Return the address of the data */ 216 ST_FUNC int oad(int c, int s) 217 { 218 int ind1; 219 228 static int oad(int c, int s) 229 { 230 int t; 231 if (nocode_wanted) 232 return s; 220 233 o(c); 221 ind1 = ind + 4; 222 if (ind1 > cur_text_section->data_allocated) 223 section_realloc(cur_text_section, ind1); 224 *(int *)(cur_text_section->data + ind) = s; 225 s = ind; 226 ind = ind1; 227 return s; 228 } 234 t = ind; 235 gen_le32(s); 236 return t; 237 } 238 239 /* generate jmp to a label */ 240 #define gjmp2(instr,lbl) oad(instr,lbl) 229 241 230 242 ST_FUNC void gen_addr32(int r, Sym *sym, int c) 231 243 { 232 244 if (r & VT_SYM) 233 greloc (cur_text_section, sym, ind, R_X86_64_32);245 greloca(cur_text_section, sym, ind, R_X86_64_32S, c), c=0; 234 246 gen_le32(c); 235 247 } … … 239 251 { 240 252 if (r & VT_SYM) 241 greloc (cur_text_section, sym, ind, R_X86_64_64);253 greloca(cur_text_section, sym, ind, R_X86_64_64, c), c=0; 242 254 gen_le64(c); 243 255 } … … 247 259 { 248 260 if (r & VT_SYM) 249 greloc (cur_text_section, sym, ind, R_X86_64_PC32);261 greloca(cur_text_section, sym, ind, R_X86_64_PC32, c-4), c=4; 250 262 gen_le32(c-4); 251 263 } … … 254 266 static void gen_gotpcrel(int r, Sym *sym, int c) 255 267 { 256 #ifndef TCC_TARGET_PE 257 Section *sr; 258 ElfW(Rela) *rel; 259 greloc(cur_text_section, sym, ind, R_X86_64_GOTPCREL); 260 sr = cur_text_section->reloc; 261 rel = (ElfW(Rela) *)(sr->data + sr->data_offset - sizeof(ElfW(Rela))); 262 rel->r_addend = -4; 263 #else 264 printf("picpic: %s %x %x | %02x %02x %02x\n", get_tok_str(sym->v, NULL), c, r, 268 #ifdef TCC_TARGET_PE 269 tcc_error("internal error: no GOT on PE: %s %x %x | %02x %02x %02x\n", 270 get_tok_str(sym->v, NULL), c, r, 265 271 cur_text_section->data[ind-3], 266 272 cur_text_section->data[ind-2], 267 273 cur_text_section->data[ind-1] 268 274 ); 269 greloc(cur_text_section, sym, ind, R_X86_64_PC32);270 275 #endif 276 greloca(cur_text_section, sym, ind, R_X86_64_GOTPCREL, -4); 271 277 gen_le32(0); 272 278 if (c) { … … 283 289 if ((r & VT_VALMASK) == VT_CONST) { 284 290 /* constant memory reference */ 285 o(0x05 | op_reg); 286 if (is_got) { 287 gen_gotpcrel(r, sym, c); 288 } else { 289 gen_addrpc32(r, sym, c); 290 } 291 if (!(r & VT_SYM)) { 292 /* Absolute memory reference */ 293 o(0x04 | op_reg); /* [sib] | destreg */ 294 oad(0x25, c); /* disp32 */ 295 } else { 296 o(0x05 | op_reg); /* (%rip)+disp32 | destreg */ 297 if (is_got) { 298 gen_gotpcrel(r, sym, c); 299 } else { 300 gen_addrpc32(r, sym, c); 301 } 302 } 291 303 } else if ((r & VT_VALMASK) == VT_LOCAL) { 292 304 /* currently, we use only ebp as base */ … … 310 322 } 311 323 312 /* generate a modrm reference. 'op_reg' contains the add tionnal 3324 /* generate a modrm reference. 'op_reg' contains the additional 3 313 325 opcode bits */ 314 326 static void gen_modrm(int op_reg, int r, Sym *sym, int c) … … 317 329 } 318 330 319 /* generate a modrm reference. 'op_reg' contains the add tionnal 3331 /* generate a modrm reference. 'op_reg' contains the additional 3 320 332 opcode bits */ 321 333 static void gen_modrm64(int opcode, int op_reg, int r, Sym *sym, int c) … … 340 352 341 353 fr = sv->r; 342 ft = sv->type.t; 343 fc = sv->c.ul; 354 ft = sv->type.t & ~VT_DEFSIGN; 355 fc = sv->c.i; 356 if (fc != sv->c.i && (fr & VT_SYM)) 357 tcc_error("64 bit addend in load"); 358 359 ft &= ~(VT_VOLATILE | VT_CONSTANT); 344 360 345 361 #ifndef TCC_TARGET_PE … … 366 382 v1.type.t = VT_PTR; 367 383 v1.r = VT_LOCAL | VT_LVAL; 368 v1.c. ul= fc;384 v1.c.i = fc; 369 385 fr = r; 370 if (!(reg_classes[fr] & RC_INT))386 if (!(reg_classes[fr] & (RC_INT|RC_R11))) 371 387 fr = get_reg(RC_INT); 372 388 load(fr, &v1); 373 389 } 390 if (fc != sv->c.i) { 391 /* If the addends doesn't fit into a 32bit signed 392 we must use a 64bit move. We've checked above 393 that this doesn't have a sym associated. */ 394 v1.type.t = VT_LLONG; 395 v1.r = VT_CONST; 396 v1.c.i = sv->c.i; 397 fr = r; 398 if (!(reg_classes[fr] & (RC_INT|RC_R11))) 399 fr = get_reg(RC_INT); 400 load(fr, &v1); 401 fc = 0; 402 } 374 403 ll = 0; 404 /* Like GCC we can load from small enough properly sized 405 structs and unions as well. 406 XXX maybe move to generic operand handling, but should 407 occur only with asm, so tccasm.c might also be a better place */ 408 if ((ft & VT_BTYPE) == VT_STRUCT) { 409 int align; 410 switch (type_size(&sv->type, &align)) { 411 case 1: ft = VT_BYTE; break; 412 case 2: ft = VT_SHORT; break; 413 case 4: ft = VT_INT; break; 414 case 8: ft = VT_LLONG; break; 415 default: 416 tcc_error("invalid aggregate type for register load"); 417 break; 418 } 419 } 375 420 if ((ft & VT_BTYPE) == VT_FLOAT) { 376 b = 0x6e0f66, r = 0; /* movd */ 421 b = 0x6e0f66; 422 r = REG_VALUE(r); /* movd */ 377 423 } else if ((ft & VT_BTYPE) == VT_DOUBLE) { 378 b = 0x7e0ff3, r = 0; /* movq */ 424 b = 0x7e0ff3; /* movq */ 425 r = REG_VALUE(r); 379 426 } else if ((ft & VT_BTYPE) == VT_LDOUBLE) { 380 427 b = 0xdb, r = 5; /* fldt */ 381 } else if ((ft & VT_TYPE) == VT_BYTE ) {428 } else if ((ft & VT_TYPE) == VT_BYTE || (ft & VT_TYPE) == VT_BOOL) { 382 429 b = 0xbe0f; /* movsbl */ 383 430 } else if ((ft & VT_TYPE) == (VT_BYTE | VT_UNSIGNED)) { … … 388 435 b = 0xb70f; /* movzwl */ 389 436 } else { 437 assert(((ft & VT_BTYPE) == VT_INT) 438 || ((ft & VT_BTYPE) == VT_LLONG) 439 || ((ft & VT_BTYPE) == VT_PTR) 440 || ((ft & VT_BTYPE) == VT_FUNC) 441 ); 390 442 ll = is64_type(ft); 391 443 b = 0x8b; … … 417 469 } else if (is64_type(ft)) { 418 470 orex(1,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ 419 gen_le64(sv->c. ull);471 gen_le64(sv->c.i); 420 472 } else { 421 473 orex(0,r,0, 0xb8 + REG_VALUE(r)); /* mov $xx, r */ … … 451 503 oad(0xb8 + REG_VALUE(r), t ^ 1); /* mov $0, r */ 452 504 } else if (v != r) { 453 if (r == TREG_XMM0) { 454 assert(v == TREG_ST0); 455 /* gen_cvt_ftof(VT_DOUBLE); */ 456 o(0xf0245cdd); /* fstpl -0x10(%rsp) */ 457 /* movsd -0x10(%rsp),%xmm0 */ 458 o(0x44100ff2); 459 o(0xf024); 505 if ((r >= TREG_XMM0) && (r <= TREG_XMM7)) { 506 if (v == TREG_ST0) { 507 /* gen_cvt_ftof(VT_DOUBLE); */ 508 o(0xf0245cdd); /* fstpl -0x10(%rsp) */ 509 /* movsd -0x10(%rsp),%xmmN */ 510 o(0x100ff2); 511 o(0x44 + REG_VALUE(r)*8); /* %xmmN */ 512 o(0xf024); 513 } else { 514 assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); 515 if ((ft & VT_BTYPE) == VT_FLOAT) { 516 o(0x100ff3); 517 } else { 518 assert((ft & VT_BTYPE) == VT_DOUBLE); 519 o(0x100ff2); 520 } 521 o(0xc0 + REG_VALUE(v) + REG_VALUE(r)*8); 522 } 460 523 } else if (r == TREG_ST0) { 461 assert( v == TREG_XMM0);524 assert((v >= TREG_XMM0) && (v <= TREG_XMM7)); 462 525 /* gen_cvt_ftof(VT_LDOUBLE); */ 463 /* movsd %xmm0,-0x10(%rsp) */ 464 o(0x44110ff2); 526 /* movsd %xmmN,-0x10(%rsp) */ 527 o(0x110ff2); 528 o(0x44 + REG_VALUE(r)*8); /* %xmmN */ 465 529 o(0xf024); 466 530 o(0xf02444dd); /* fldl -0x10(%rsp) */ … … 486 550 #endif 487 551 552 fr = v->r & VT_VALMASK; 488 553 ft = v->type.t; 489 fc = v->c.ul; 490 fr = v->r & VT_VALMASK; 554 fc = v->c.i; 555 if (fc != v->c.i && (fr & VT_SYM)) 556 tcc_error("64 bit addend in store"); 557 ft &= ~(VT_VOLATILE | VT_CONSTANT); 491 558 bt = ft & VT_BTYPE; 492 559 … … 496 563 /* mov xx(%rip), %r11 */ 497 564 o(0x1d8b4c); 498 gen_gotpcrel(TREG_R11, v->sym, v->c. ul);565 gen_gotpcrel(TREG_R11, v->sym, v->c.i); 499 566 pic = is64_type(bt) ? 0x49 : 0x41; 500 567 } … … 506 573 o(pic); 507 574 o(0x7e0f); /* movd */ 508 r = 0;575 r = REG_VALUE(r); 509 576 } else if (bt == VT_DOUBLE) { 510 577 o(0x66); 511 578 o(pic); 512 579 o(0xd60f); /* movq */ 513 r = 0;580 r = REG_VALUE(r); 514 581 } else if (bt == VT_LDOUBLE) { 515 582 o(0xc0d9); /* fld %st(0) */ … … 556 623 { 557 624 int r; 558 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST) { 625 if ((vtop->r & (VT_VALMASK | VT_LVAL)) == VT_CONST && 626 ((vtop->r & VT_SYM) || (vtop->c.i-4) == (int)(vtop->c.i-4))) { 559 627 /* constant case */ 560 628 if (vtop->r & VT_SYM) { 561 629 /* relocation case */ 562 greloc(cur_text_section, vtop->sym, 563 ind + 1, R_X86_64_PC32); 630 #ifdef TCC_TARGET_PE 631 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PC32, (int)(vtop->c.i-4)); 632 #else 633 greloca(cur_text_section, vtop->sym, ind + 1, R_X86_64_PLT32, (int)(vtop->c.i-4)); 634 #endif 564 635 } else { 565 636 /* put an empty PC32 relocation */ 566 put_elf_reloc (symtab_section, cur_text_section,567 ind + 1, R_X86_64_PC32, 0 );568 } 569 oad(0xe8 + is_jmp, vtop->c.ul - 4); /* call/jmp im */637 put_elf_reloca(symtab_section, cur_text_section, 638 ind + 1, R_X86_64_PC32, 0, (int)(vtop->c.i-4)); 639 } 640 oad(0xe8 + is_jmp, 0); /* call/jmp im */ 570 641 } else { 571 642 /* otherwise, indirect call */ … … 578 649 } 579 650 651 #if defined(CONFIG_TCC_BCHECK) 652 #ifndef TCC_TARGET_PE 653 static addr_t func_bound_offset; 654 static unsigned long func_bound_ind; 655 #endif 656 657 static void gen_static_call(int v) 658 { 659 Sym *sym = external_global_sym(v, &func_old_type, 0); 660 oad(0xe8, 0); 661 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4); 662 } 663 664 /* generate a bounded pointer addition */ 665 ST_FUNC void gen_bounded_ptr_add(void) 666 { 667 /* save all temporary registers */ 668 save_regs(0); 669 670 /* prepare fast x86_64 function call */ 671 gv(RC_RAX); 672 o(0xc68948); // mov %rax,%rsi ## second arg in %rsi, this must be size 673 vtop--; 674 675 gv(RC_RAX); 676 o(0xc78948); // mov %rax,%rdi ## first arg in %rdi, this must be ptr 677 vtop--; 678 679 /* do a fast function call */ 680 gen_static_call(TOK___bound_ptr_add); 681 682 /* returned pointer is in rax */ 683 vtop++; 684 vtop->r = TREG_RAX | VT_BOUNDED; 685 686 687 /* relocation offset of the bounding function call point */ 688 vtop->c.i = (cur_text_section->reloc->data_offset - sizeof(ElfW(Rela))); 689 } 690 691 /* patch pointer addition in vtop so that pointer dereferencing is 692 also tested */ 693 ST_FUNC void gen_bounded_ptr_deref(void) 694 { 695 addr_t func; 696 int size, align; 697 ElfW(Rela) *rel; 698 Sym *sym; 699 700 size = 0; 701 /* XXX: put that code in generic part of tcc */ 702 if (!is_float(vtop->type.t)) { 703 if (vtop->r & VT_LVAL_BYTE) 704 size = 1; 705 else if (vtop->r & VT_LVAL_SHORT) 706 size = 2; 707 } 708 if (!size) 709 size = type_size(&vtop->type, &align); 710 switch(size) { 711 case 1: func = TOK___bound_ptr_indir1; break; 712 case 2: func = TOK___bound_ptr_indir2; break; 713 case 4: func = TOK___bound_ptr_indir4; break; 714 case 8: func = TOK___bound_ptr_indir8; break; 715 case 12: func = TOK___bound_ptr_indir12; break; 716 case 16: func = TOK___bound_ptr_indir16; break; 717 default: 718 tcc_error("unhandled size when dereferencing bounded pointer"); 719 func = 0; 720 break; 721 } 722 723 sym = external_global_sym(func, &func_old_type, 0); 724 if (!sym->c) 725 put_extern_sym(sym, NULL, 0, 0); 726 727 /* patch relocation */ 728 /* XXX: find a better solution ? */ 729 730 rel = (ElfW(Rela) *)(cur_text_section->reloc->data + vtop->c.i); 731 rel->r_info = ELF64_R_INFO(sym->c, ELF64_R_TYPE(rel->r_info)); 732 } 733 #endif 734 580 735 #ifdef TCC_TARGET_PE 581 736 582 737 #define REGN 4 583 static const uint8_t arg_regs[ ] = {738 static const uint8_t arg_regs[REGN] = { 584 739 TREG_RCX, TREG_RDX, TREG_R8, TREG_R9 585 740 }; 586 741 587 static int func_scratch; 742 /* Prepare arguments in R10 and R11 rather than RCX and RDX 743 because gv() will not ever use these */ 744 static int arg_prepare_reg(int idx) { 745 if (idx == 0 || idx == 1) 746 /* idx=0: r10, idx=1: r11 */ 747 return idx + 10; 748 else 749 return arg_regs[idx]; 750 } 751 752 static int func_scratch, func_alloca; 588 753 589 754 /* Generate function call. The function address is pushed first, then … … 591 756 parameters and the function address. */ 592 757 593 void gen_offs_sp(int b, int r, int d)758 static void gen_offs_sp(int b, int r, int d) 594 759 { 595 760 orex(1,0,r & 0x100 ? 0 : r, b); … … 603 768 } 604 769 770 static int using_regs(int size) 771 { 772 return !(size > 8 || (size & (size - 1))); 773 } 774 775 /* Return the number of registers needed to return the struct, or 0 if 776 returning via struct pointer. */ 777 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) 778 { 779 int size, align; 780 *ret_align = 1; // Never have to re-align return values for x86-64 781 *regsize = 8; 782 size = type_size(vt, &align); 783 if (!using_regs(size)) 784 return 0; 785 if (size == 8) 786 ret->t = VT_LLONG; 787 else if (size == 4) 788 ret->t = VT_INT; 789 else if (size == 2) 790 ret->t = VT_SHORT; 791 else 792 ret->t = VT_BYTE; 793 ret->ref = NULL; 794 return 1; 795 } 796 797 static int is_sse_float(int t) { 798 int bt; 799 bt = t & VT_BTYPE; 800 return bt == VT_DOUBLE || bt == VT_FLOAT; 801 } 802 803 static int gfunc_arg_size(CType *type) { 804 int align; 805 if (type->t & (VT_ARRAY|VT_BITFIELD)) 806 return 8; 807 return type_size(type, &align); 808 } 809 605 810 void gfunc_call(int nb_args) 606 811 { 607 int size, align, r, args_size, i, d, j, bt, struct_size;608 int nb_reg_args, gen_reg;609 610 nb_reg_args = nb_args;611 arg s_size = (nb_reg_args < REGN ? REGN : nb_reg_args) * PTR_SIZE;812 int size, r, args_size, i, d, bt, struct_size; 813 int arg; 814 815 args_size = (nb_args < REGN ? REGN : nb_args) * PTR_SIZE; 816 arg = nb_args; 612 817 613 818 /* for struct arguments, we need to call memcpy and the function … … 616 821 struct_size = args_size; 617 822 for(i = 0; i < nb_args; i++) { 618 SValue *sv = &vtop[-i]; 823 SValue *sv; 824 825 --arg; 826 sv = &vtop[-i]; 619 827 bt = (sv->type.t & VT_BTYPE); 828 size = gfunc_arg_size(&sv->type); 829 830 if (using_regs(size)) 831 continue; /* arguments smaller than 8 bytes passed in registers or on stack */ 832 620 833 if (bt == VT_STRUCT) { 621 size = type_size(&sv->type, &align);622 834 /* align to stack align size */ 623 835 size = (size + 15) & ~15; … … 632 844 vstore(); 633 845 --vtop; 634 635 846 } else if (bt == VT_LDOUBLE) { 636 637 847 gv(RC_ST0); 638 848 gen_offs_sp(0xdb, 0x107, struct_size); 639 849 struct_size += 16; 640 641 850 } 642 851 } … … 644 853 if (func_scratch < struct_size) 645 854 func_scratch = struct_size; 646 #if 1 647 for (i = 0; i < REGN; ++i) 648 save_reg(arg_regs[i]); 649 save_reg(TREG_RAX); 650 #endif 651 gen_reg = nb_reg_args; 855 856 arg = nb_args; 652 857 struct_size = args_size; 653 858 654 859 for(i = 0; i < nb_args; i++) { 860 --arg; 655 861 bt = (vtop->type.t & VT_BTYPE); 656 862 657 if (bt == VT_STRUCT || bt == VT_LDOUBLE) { 658 if (bt == VT_LDOUBLE) 659 size = 16; 660 else 661 size = type_size(&vtop->type, &align); 863 size = gfunc_arg_size(&vtop->type); 864 if (!using_regs(size)) { 662 865 /* align to stack align size */ 663 866 size = (size + 15) & ~15; 664 j = --gen_reg; 665 if (j >= REGN) { 666 d = TREG_RAX; 867 if (arg >= REGN) { 868 d = get_reg(RC_INT); 667 869 gen_offs_sp(0x8d, d, struct_size); 668 gen_offs_sp(0x89, d, j*8);870 gen_offs_sp(0x89, d, arg*8); 669 871 } else { 670 d = arg_ regs[j];872 d = arg_prepare_reg(arg); 671 873 gen_offs_sp(0x8d, d, struct_size); 672 874 } 673 875 struct_size += size; 674 675 } else if (is_sse_float(vtop->type.t)) { 676 gv(RC_FLOAT); /* only one float register */ 677 j = --gen_reg; 678 if (j >= REGN) { 679 /* movq %xmm0, j*8(%rsp) */ 680 gen_offs_sp(0xd60f66, 0x100, j*8); 876 } else { 877 if (is_sse_float(vtop->type.t)) { 878 if (tcc_state->nosse) 879 tcc_error("SSE disabled"); 880 gv(RC_XMM0); /* only use one float register */ 881 if (arg >= REGN) { 882 /* movq %xmm0, j*8(%rsp) */ 883 gen_offs_sp(0xd60f66, 0x100, arg*8); 884 } else { 885 /* movaps %xmm0, %xmmN */ 886 o(0x280f); 887 o(0xc0 + (arg << 3)); 888 d = arg_prepare_reg(arg); 889 /* mov %xmm0, %rxx */ 890 o(0x66); 891 orex(1,d,0, 0x7e0f); 892 o(0xc0 + REG_VALUE(d)); 893 } 681 894 } else { 682 /* movaps %xmm0, %xmmN */ 683 o(0x280f); 684 o(0xc0 + (j << 3)); 685 d = arg_regs[j]; 686 /* mov %xmm0, %rxx */ 687 o(0x66); 688 orex(1,d,0, 0x7e0f); 689 o(0xc0 + REG_VALUE(d)); 690 } 691 } else { 692 j = --gen_reg; 693 if (j >= REGN) { 895 if (bt == VT_STRUCT) { 896 vtop->type.ref = NULL; 897 vtop->type.t = size > 4 ? VT_LLONG : size > 2 ? VT_INT 898 : size > 1 ? VT_SHORT : VT_BYTE; 899 } 900 694 901 r = gv(RC_INT); 695 gen_offs_sp(0x89, r, j*8); 696 } else { 697 d = arg_regs[j]; 698 if (d < NB_REGS) { 699 gv(reg_classes[d] & ~RC_INT); 902 if (arg >= REGN) { 903 gen_offs_sp(0x89, r, arg*8); 700 904 } else { 701 r = gv(RC_INT); 702 if (d != r) { 703 orex(1,d,r, 0x89); 704 o(0xc0 + REG_VALUE(d) + REG_VALUE(r) * 8); 705 } 905 d = arg_prepare_reg(arg); 906 orex(1,d,r,0x89); /* mov */ 907 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); 706 908 } 707 708 909 } 709 910 } … … 711 912 } 712 913 save_regs(0); 914 915 /* Copy R10 and R11 into RCX and RDX, respectively */ 916 if (nb_args > 0) { 917 o(0xd1894c); /* mov %r10, %rcx */ 918 if (nb_args > 1) { 919 o(0xda894c); /* mov %r11, %rdx */ 920 } 921 } 922 713 923 gcall_or_jmp(0); 924 925 if ((vtop->r & VT_SYM) && vtop->sym->v == TOK_alloca) { 926 /* need to add the "func_scratch" area after alloca */ 927 o(0x0548), gen_le32(func_alloca), func_alloca = ind - 4; 928 } 929 930 /* other compilers don't clear the upper bits when returning char/short */ 931 bt = vtop->type.ref->type.t & (VT_BTYPE | VT_UNSIGNED); 932 if (bt == (VT_BYTE | VT_UNSIGNED)) 933 o(0xc0b60f); /* movzbl %al, %eax */ 934 else if (bt == VT_BYTE) 935 o(0xc0be0f); /* movsbl %al, %eax */ 936 else if (bt == VT_SHORT) 937 o(0x98); /* cwtl */ 938 else if (bt == (VT_SHORT | VT_UNSIGNED)) 939 o(0xc0b70f); /* movzbl %al, %eax */ 940 #if 0 /* handled in gen_cast() */ 941 else if (bt == VT_INT) 942 o(0x9848); /* cltq */ 943 else if (bt == (VT_INT | VT_UNSIGNED)) 944 o(0xc089); /* mov %eax,%eax */ 945 #endif 714 946 vtop--; 715 947 } … … 721 953 void gfunc_prolog(CType *func_type) 722 954 { 723 int addr, reg_param_index, bt ;955 int addr, reg_param_index, bt, size; 724 956 Sym *sym; 725 957 CType *type; … … 727 959 func_ret_sub = 0; 728 960 func_scratch = 0; 961 func_alloca = 0; 729 962 loc = 0; 730 963 … … 739 972 implicit pointer parameter */ 740 973 func_vt = sym->type; 741 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { 974 func_var = (sym->f.func_type == FUNC_ELLIPSIS); 975 size = gfunc_arg_size(&func_vt); 976 if (!using_regs(size)) { 742 977 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); 978 func_vc = addr; 743 979 reg_param_index++; 744 addr += PTR_SIZE;980 addr += 8; 745 981 } 746 982 … … 749 985 type = &sym->type; 750 986 bt = type->t & VT_BTYPE; 751 if (reg_param_index < REGN) { 752 /* save arguments passed by register */ 987 size = gfunc_arg_size(type); 988 if (!using_regs(size)) { 989 if (reg_param_index < REGN) { 990 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); 991 } 992 sym_push(sym->v & ~SYM_FIELD, type, VT_LLOCAL | VT_LVAL, addr); 993 } else { 994 if (reg_param_index < REGN) { 995 /* save arguments passed by register */ 996 if ((bt == VT_FLOAT) || (bt == VT_DOUBLE)) { 997 if (tcc_state->nosse) 998 tcc_error("SSE disabled"); 999 o(0xd60f66); /* movq */ 1000 gen_modrm(reg_param_index, VT_LOCAL, NULL, addr); 1001 } else { 1002 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); 1003 } 1004 } 1005 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr); 1006 } 1007 addr += 8; 1008 reg_param_index++; 1009 } 1010 1011 while (reg_param_index < REGN) { 1012 if (func_type->ref->f.func_type == FUNC_ELLIPSIS) { 753 1013 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr); 754 } 755 if (bt == VT_STRUCT || bt == VT_LDOUBLE) { 756 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL | VT_REF, addr); 757 } else { 758 sym_push(sym->v & ~SYM_FIELD, type, VT_LOCAL | VT_LVAL, addr); 1014 addr += 8; 759 1015 } 760 1016 reg_param_index++; 761 addr += PTR_SIZE;762 }763 764 while (reg_param_index < REGN) {765 if (func_type->ref->c == FUNC_ELLIPSIS)766 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, addr);767 reg_param_index++;768 addr += PTR_SIZE;769 1017 } 770 1018 } … … 787 1035 ind = func_sub_sp_offset - FUNC_PROLOG_SIZE; 788 1036 /* align local size to word & save local variables */ 1037 func_scratch = (func_scratch + 15) & -16; 789 1038 v = (func_scratch + -loc + 15) & -16; 790 1039 … … 792 1041 Sym *sym = external_global_sym(TOK___chkstk, &func_old_type, 0); 793 1042 oad(0xb8, v); /* mov stacksize, %eax */ 794 oad(0xe8, -4); /* call __chkstk, (does the stackframe too) */795 greloc (cur_text_section, sym, ind-4, R_X86_64_PC32);1043 oad(0xe8, 0); /* call __chkstk, (does the stackframe too) */ 1044 greloca(cur_text_section, sym, ind-4, R_X86_64_PC32, -4); 796 1045 o(0x90); /* fill for FUNC_PROLOG_SIZE = 11 bytes */ 797 1046 } else { … … 801 1050 } 802 1051 1052 /* add the "func_scratch" area after each alloca seen */ 1053 while (func_alloca) { 1054 unsigned char *ptr = cur_text_section->data + func_alloca; 1055 func_alloca = read32le(ptr); 1056 write32le(ptr, func_scratch); 1057 } 1058 803 1059 cur_text_section->data_offset = saved_ind; 804 1060 pe_add_unwind_data(ind, saved_ind, v); … … 818 1074 } 819 1075 1076 typedef enum X86_64_Mode { 1077 x86_64_mode_none, 1078 x86_64_mode_memory, 1079 x86_64_mode_integer, 1080 x86_64_mode_sse, 1081 x86_64_mode_x87 1082 } X86_64_Mode; 1083 1084 static X86_64_Mode classify_x86_64_merge(X86_64_Mode a, X86_64_Mode b) 1085 { 1086 if (a == b) 1087 return a; 1088 else if (a == x86_64_mode_none) 1089 return b; 1090 else if (b == x86_64_mode_none) 1091 return a; 1092 else if ((a == x86_64_mode_memory) || (b == x86_64_mode_memory)) 1093 return x86_64_mode_memory; 1094 else if ((a == x86_64_mode_integer) || (b == x86_64_mode_integer)) 1095 return x86_64_mode_integer; 1096 else if ((a == x86_64_mode_x87) || (b == x86_64_mode_x87)) 1097 return x86_64_mode_memory; 1098 else 1099 return x86_64_mode_sse; 1100 } 1101 1102 static X86_64_Mode classify_x86_64_inner(CType *ty) 1103 { 1104 X86_64_Mode mode; 1105 Sym *f; 1106 1107 switch (ty->t & VT_BTYPE) { 1108 case VT_VOID: return x86_64_mode_none; 1109 1110 case VT_INT: 1111 case VT_BYTE: 1112 case VT_SHORT: 1113 case VT_LLONG: 1114 case VT_BOOL: 1115 case VT_PTR: 1116 case VT_FUNC: 1117 return x86_64_mode_integer; 1118 1119 case VT_FLOAT: 1120 case VT_DOUBLE: return x86_64_mode_sse; 1121 1122 case VT_LDOUBLE: return x86_64_mode_x87; 1123 1124 case VT_STRUCT: 1125 f = ty->ref; 1126 1127 mode = x86_64_mode_none; 1128 for (f = f->next; f; f = f->next) 1129 mode = classify_x86_64_merge(mode, classify_x86_64_inner(&f->type)); 1130 1131 return mode; 1132 } 1133 assert(0); 1134 return 0; 1135 } 1136 1137 static X86_64_Mode classify_x86_64_arg(CType *ty, CType *ret, int *psize, int *palign, int *reg_count) 1138 { 1139 X86_64_Mode mode; 1140 int size, align, ret_t = 0; 1141 1142 if (ty->t & (VT_BITFIELD|VT_ARRAY)) { 1143 *psize = 8; 1144 *palign = 8; 1145 *reg_count = 1; 1146 ret_t = ty->t; 1147 mode = x86_64_mode_integer; 1148 } else { 1149 size = type_size(ty, &align); 1150 *psize = (size + 7) & ~7; 1151 *palign = (align + 7) & ~7; 1152 1153 if (size > 16) { 1154 mode = x86_64_mode_memory; 1155 } else { 1156 mode = classify_x86_64_inner(ty); 1157 switch (mode) { 1158 case x86_64_mode_integer: 1159 if (size > 8) { 1160 *reg_count = 2; 1161 ret_t = VT_QLONG; 1162 } else { 1163 *reg_count = 1; 1164 ret_t = (size > 4) ? VT_LLONG : VT_INT; 1165 } 1166 break; 1167 1168 case x86_64_mode_x87: 1169 *reg_count = 1; 1170 ret_t = VT_LDOUBLE; 1171 break; 1172 1173 case x86_64_mode_sse: 1174 if (size > 8) { 1175 *reg_count = 2; 1176 ret_t = VT_QFLOAT; 1177 } else { 1178 *reg_count = 1; 1179 ret_t = (size > 4) ? VT_DOUBLE : VT_FLOAT; 1180 } 1181 break; 1182 default: break; /* nothing to be done for x86_64_mode_memory and x86_64_mode_none*/ 1183 } 1184 } 1185 } 1186 1187 if (ret) { 1188 ret->ref = NULL; 1189 ret->t = ret_t; 1190 } 1191 1192 return mode; 1193 } 1194 1195 ST_FUNC int classify_x86_64_va_arg(CType *ty) 1196 { 1197 /* This definition must be synced with stdarg.h */ 1198 enum __va_arg_type { 1199 __va_gen_reg, __va_float_reg, __va_stack 1200 }; 1201 int size, align, reg_count; 1202 X86_64_Mode mode = classify_x86_64_arg(ty, NULL, &size, &align, ®_count); 1203 switch (mode) { 1204 default: return __va_stack; 1205 case x86_64_mode_integer: return __va_gen_reg; 1206 case x86_64_mode_sse: return __va_float_reg; 1207 } 1208 } 1209 1210 /* Return the number of registers needed to return the struct, or 0 if 1211 returning via struct pointer. */ 1212 ST_FUNC int gfunc_sret(CType *vt, int variadic, CType *ret, int *ret_align, int *regsize) 1213 { 1214 int size, align, reg_count; 1215 *ret_align = 1; // Never have to re-align return values for x86-64 1216 *regsize = 8; 1217 return (classify_x86_64_arg(vt, ret, &size, &align, ®_count) != x86_64_mode_memory); 1218 } 1219 820 1220 #define REGN 6 821 1221 static const uint8_t arg_regs[REGN] = { … … 823 1223 }; 824 1224 1225 static int arg_prepare_reg(int idx) { 1226 if (idx == 2 || idx == 3) 1227 /* idx=2: r10, idx=3: r11 */ 1228 return idx + 8; 1229 else 1230 return arg_regs[idx]; 1231 } 1232 825 1233 /* Generate function call. The function address is pushed first, then 826 1234 all the parameters in call order. This functions pops all the … … 828 1236 void gfunc_call(int nb_args) 829 1237 { 830 int size, align, r, args_size, i; 1238 X86_64_Mode mode; 1239 CType type; 1240 int size, align, r, args_size, stack_adjust, i, reg_count; 831 1241 int nb_reg_args = 0; 832 1242 int nb_sse_args = 0; 833 1243 int sse_reg, gen_reg; 834 835 /* calculate the number of integer/float arguments */ 836 args_size = 0; 837 for(i = 0; i < nb_args; i++) { 838 if ((vtop[-i].type.t & VT_BTYPE) == VT_STRUCT) { 839 args_size += type_size(&vtop[-i].type, &align); 840 args_size = (args_size + 7) & ~7; 841 } else if ((vtop[-i].type.t & VT_BTYPE) == VT_LDOUBLE) { 842 args_size += 16; 843 } else if (is_sse_float(vtop[-i].type.t)) { 844 nb_sse_args++; 845 if (nb_sse_args > 8) args_size += 8; 846 } else { 847 nb_reg_args++; 848 if (nb_reg_args > REGN) args_size += 8; 849 } 850 } 1244 char _onstack[nb_args], *onstack = _onstack; 1245 1246 /* calculate the number of integer/float register arguments, remember 1247 arguments to be passed via stack (in onstack[]), and also remember 1248 if we have to align the stack pointer to 16 (onstack[i] == 2). Needs 1249 to be done in a left-to-right pass over arguments. */ 1250 stack_adjust = 0; 1251 for(i = nb_args - 1; i >= 0; i--) { 1252 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); 1253 if (mode == x86_64_mode_sse && nb_sse_args + reg_count <= 8) { 1254 nb_sse_args += reg_count; 1255 onstack[i] = 0; 1256 } else if (mode == x86_64_mode_integer && nb_reg_args + reg_count <= REGN) { 1257 nb_reg_args += reg_count; 1258 onstack[i] = 0; 1259 } else if (mode == x86_64_mode_none) { 1260 onstack[i] = 0; 1261 } else { 1262 if (align == 16 && (stack_adjust &= 15)) { 1263 onstack[i] = 2; 1264 stack_adjust = 0; 1265 } else 1266 onstack[i] = 1; 1267 stack_adjust += size; 1268 } 1269 } 1270 1271 if (nb_sse_args && tcc_state->nosse) 1272 tcc_error("SSE disabled but floating point arguments passed"); 1273 1274 /* fetch cpu flag before generating any code */ 1275 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP) 1276 gv(RC_INT); 851 1277 852 1278 /* for struct arguments, we need to call memcpy and the function … … 855 1281 gen_reg = nb_reg_args; 856 1282 sse_reg = nb_sse_args; 857 858 /* adjust stack to align SSE boundary */ 859 if (args_size &= 15) { 860 /* fetch cpu flag before the following sub will change the value */ 861 if (vtop >= vstack && (vtop->r & VT_VALMASK) == VT_CMP) 862 gv(RC_INT); 863 864 args_size = 16 - args_size; 865 o(0x48); 866 oad(0xec81, args_size); /* sub $xxx, %rsp */ 867 } 868 869 for(i = 0; i < nb_args; i++) { 870 /* Swap argument to top, it will possibly be changed here, 871 and might use more temps. All arguments must remain on the 872 stack, so that get_reg can correctly evict some of them onto 873 stack. We could use also use a vrott(nb_args) at the end 874 of this loop, but this seems faster. */ 875 SValue tmp = vtop[0]; 876 vtop[0] = vtop[-i]; 877 vtop[-i] = tmp; 878 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT) { 879 size = type_size(&vtop->type, &align); 880 /* align to stack align size */ 881 size = (size + 7) & ~7; 882 /* allocate the necessary size on stack */ 883 o(0x48); 884 oad(0xec81, size); /* sub $xxx, %rsp */ 885 /* generate structure store */ 886 r = get_reg(RC_INT); 887 orex(1, r, 0, 0x89); /* mov %rsp, r */ 888 o(0xe0 + REG_VALUE(r)); 889 vset(&vtop->type, r | VT_LVAL, 0); 890 vswap(); 891 vstore(); 892 args_size += size; 893 } else if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { 894 gv(RC_ST0); 895 size = LDOUBLE_SIZE; 896 oad(0xec8148, size); /* sub $xxx, %rsp */ 897 o(0x7cdb); /* fstpt 0(%rsp) */ 898 g(0x24); 899 g(0x00); 900 args_size += size; 901 } else if (is_sse_float(vtop->type.t)) { 902 int j = --sse_reg; 903 if (j >= 8) { 904 gv(RC_FLOAT); 905 o(0x50); /* push $rax */ 906 /* movq %xmm0, (%rsp) */ 907 o(0x04d60f66); 908 o(0x24); 909 args_size += 8; 910 } 911 } else { 912 int j = --gen_reg; 913 /* simple type */ 914 /* XXX: implicit cast ? */ 915 if (j >= REGN) { 916 r = gv(RC_INT); 917 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */ 918 args_size += 8; 919 } 920 } 921 922 /* And swap the argument back to it's original position. */ 923 tmp = vtop[0]; 924 vtop[0] = vtop[-i]; 925 vtop[-i] = tmp; 1283 args_size = 0; 1284 stack_adjust &= 15; 1285 for (i = 0; i < nb_args;) { 1286 mode = classify_x86_64_arg(&vtop[-i].type, NULL, &size, &align, ®_count); 1287 if (!onstack[i]) { 1288 ++i; 1289 continue; 1290 } 1291 /* Possibly adjust stack to align SSE boundary. We're processing 1292 args from right to left while allocating happens left to right 1293 (stack grows down), so the adjustment needs to happen _after_ 1294 an argument that requires it. */ 1295 if (stack_adjust) { 1296 o(0x50); /* push %rax; aka sub $8,%rsp */ 1297 args_size += 8; 1298 stack_adjust = 0; 1299 } 1300 if (onstack[i] == 2) 1301 stack_adjust = 1; 1302 1303 vrotb(i+1); 1304 1305 switch (vtop->type.t & VT_BTYPE) { 1306 case VT_STRUCT: 1307 /* allocate the necessary size on stack */ 1308 o(0x48); 1309 oad(0xec81, size); /* sub $xxx, %rsp */ 1310 /* generate structure store */ 1311 r = get_reg(RC_INT); 1312 orex(1, r, 0, 0x89); /* mov %rsp, r */ 1313 o(0xe0 + REG_VALUE(r)); 1314 vset(&vtop->type, r | VT_LVAL, 0); 1315 vswap(); 1316 vstore(); 1317 break; 1318 1319 case VT_LDOUBLE: 1320 gv(RC_ST0); 1321 oad(0xec8148, size); /* sub $xxx, %rsp */ 1322 o(0x7cdb); /* fstpt 0(%rsp) */ 1323 g(0x24); 1324 g(0x00); 1325 break; 1326 1327 case VT_FLOAT: 1328 case VT_DOUBLE: 1329 assert(mode == x86_64_mode_sse); 1330 r = gv(RC_FLOAT); 1331 o(0x50); /* push $rax */ 1332 /* movq %xmmN, (%rsp) */ 1333 o(0xd60f66); 1334 o(0x04 + REG_VALUE(r)*8); 1335 o(0x24); 1336 break; 1337 1338 default: 1339 assert(mode == x86_64_mode_integer); 1340 /* simple type */ 1341 /* XXX: implicit cast ? */ 1342 r = gv(RC_INT); 1343 orex(0,r,0,0x50 + REG_VALUE(r)); /* push r */ 1344 break; 1345 } 1346 args_size += size; 1347 1348 vpop(); 1349 --nb_args; 1350 onstack++; 926 1351 } 927 1352 … … 933 1358 may break these temporary registers. Let's use R10 and R11 934 1359 instead of them */ 935 gen_reg = nb_reg_args;936 sse_reg = nb_sse_args;1360 assert(gen_reg <= REGN); 1361 assert(sse_reg <= 8); 937 1362 for(i = 0; i < nb_args; i++) { 938 if ((vtop->type.t & VT_BTYPE) == VT_STRUCT || 939 (vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { 940 } else if (is_sse_float(vtop->type.t)) { 941 int j = --sse_reg; 942 if (j < 8) { 943 gv(RC_FLOAT); /* only one float register */ 944 /* movaps %xmm0, %xmmN */ 945 o(0x280f); 946 o(0xc0 + (sse_reg << 3)); 947 } 948 } else { 949 int j = --gen_reg; 1363 mode = classify_x86_64_arg(&vtop->type, &type, &size, &align, ®_count); 1364 /* Alter stack entry type so that gv() knows how to treat it */ 1365 vtop->type = type; 1366 if (mode == x86_64_mode_sse) { 1367 if (reg_count == 2) { 1368 sse_reg -= 2; 1369 gv(RC_FRET); /* Use pair load into xmm0 & xmm1 */ 1370 if (sse_reg) { /* avoid redundant movaps %xmm0, %xmm0 */ 1371 /* movaps %xmm0, %xmmN */ 1372 o(0x280f); 1373 o(0xc0 + (sse_reg << 3)); 1374 /* movaps %xmm1, %xmmN */ 1375 o(0x280f); 1376 o(0xc1 + ((sse_reg+1) << 3)); 1377 } 1378 } else { 1379 assert(reg_count == 1); 1380 --sse_reg; 1381 /* Load directly to register */ 1382 gv(RC_XMM0 << sse_reg); 1383 } 1384 } else if (mode == x86_64_mode_integer) { 950 1385 /* simple type */ 951 1386 /* XXX: implicit cast ? */ 952 if (j < REGN) { 953 int d = arg_regs[j]; 954 r = gv(RC_INT); 955 if (j == 2 || j == 3) 956 /* j=2: r10, j=3: r11 */ 957 d = j + 8; 958 orex(1,d,r,0x89); /* mov */ 959 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); 1387 int d; 1388 gen_reg -= reg_count; 1389 r = gv(RC_INT); 1390 d = arg_prepare_reg(gen_reg); 1391 orex(1,d,r,0x89); /* mov */ 1392 o(0xc0 + REG_VALUE(r) * 8 + REG_VALUE(d)); 1393 if (reg_count == 2) { 1394 d = arg_prepare_reg(gen_reg+1); 1395 orex(1,d,vtop->r2,0x89); /* mov */ 1396 o(0xc0 + REG_VALUE(vtop->r2) * 8 + REG_VALUE(d)); 960 1397 } 961 1398 } 962 1399 vtop--; 963 1400 } 1401 assert(gen_reg == 0); 1402 assert(sse_reg == 0); 964 1403 965 1404 /* We shouldn't have many operands on the stack anymore, but the … … 977 1416 } 978 1417 979 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */ 1418 if (vtop->type.ref->f.func_type != FUNC_NEW) /* implies FUNC_OLD or FUNC_ELLIPSIS */ 1419 oad(0xb8, nb_sse_args < 8 ? nb_sse_args : 8); /* mov nb_sse_args, %eax */ 980 1420 gcall_or_jmp(0); 981 1421 if (args_size) … … 995 1435 void gfunc_prolog(CType *func_type) 996 1436 { 997 int i, addr, align, size; 998 int param_index, param_addr, reg_param_index, sse_param_index; 1437 X86_64_Mode mode; 1438 int i, addr, align, size, reg_count; 1439 int param_addr = 0, reg_param_index, sse_param_index; 999 1440 Sym *sym; 1000 1441 CType *type; … … 1007 1448 func_ret_sub = 0; 1008 1449 1009 if ( func_type->ref->c== FUNC_ELLIPSIS) {1450 if (sym->f.func_type == FUNC_ELLIPSIS) { 1010 1451 int seen_reg_num, seen_sse_num, seen_stack_size; 1011 1452 seen_reg_num = seen_sse_num = 0; … … 1016 1457 while ((sym = sym->next) != NULL) { 1017 1458 type = &sym->type; 1018 if (is_sse_float(type->t)) {1019 if (seen_sse_num < 8) {1020 seen_sse_num++;1021 } else {1022 seen_stack_size += 8;1023 }1024 } else if ((type->t & VT_BTYPE) == VT_STRUCT) {1025 size = type_size(type, &align);1026 size = (size + 7) & ~7;1027 seen_stack_size += size;1028 } else if ((type->t & VT_BTYPE) == VT_LDOUBLE) { 1029 seen_stack_size += LDOUBLE_SIZE;1030 } else {1031 if (seen_reg_num < REGN) {1032 seen_reg_num++;1033 } else { 1034 seen_stack_size += 8;1035 }1459 mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); 1460 switch (mode) { 1461 default: 1462 stack_arg: 1463 seen_stack_size = ((seen_stack_size + align - 1) & -align) + size; 1464 break; 1465 1466 case x86_64_mode_integer: 1467 if (seen_reg_num + reg_count > REGN) 1468 goto stack_arg; 1469 seen_reg_num += reg_count; 1470 break; 1471 1472 case x86_64_mode_sse: 1473 if (seen_sse_num + reg_count > 8) 1474 goto stack_arg; 1475 seen_sse_num += reg_count; 1476 break; 1036 1477 } 1037 1478 } … … 1051 1492 for (i = 0; i < 8; i++) { 1052 1493 loc -= 16; 1053 o(0xd60f66); /* movq */ 1054 gen_modrm(7 - i, VT_LOCAL, NULL, loc); 1494 if (!tcc_state->nosse) { 1495 o(0xd60f66); /* movq */ 1496 gen_modrm(7 - i, VT_LOCAL, NULL, loc); 1497 } 1055 1498 /* movq $0, loc+8(%rbp) */ 1056 1499 o(0x85c748); … … 1064 1507 1065 1508 sym = func_type->ref; 1066 param_index = 0;1067 1509 reg_param_index = 0; 1068 1510 sse_param_index = 0; … … 1071 1513 implicit pointer parameter */ 1072 1514 func_vt = sym->type; 1073 if ((func_vt.t & VT_BTYPE) == VT_STRUCT) { 1515 mode = classify_x86_64_arg(&func_vt, NULL, &size, &align, ®_count); 1516 if (mode == x86_64_mode_memory) { 1074 1517 push_arg_reg(reg_param_index); 1075 param_addr = loc;1076 1077 1518 func_vc = loc; 1078 param_index++;1079 1519 reg_param_index++; 1080 1520 } … … 1082 1522 while ((sym = sym->next) != NULL) { 1083 1523 type = &sym->type; 1084 size = type_size(type, &align); 1085 size = (size + 7) & ~7; 1086 if (is_sse_float(type->t)) { 1087 if (sse_param_index < 8) { 1524 mode = classify_x86_64_arg(type, NULL, &size, &align, ®_count); 1525 switch (mode) { 1526 case x86_64_mode_sse: 1527 if (tcc_state->nosse) 1528 tcc_error("SSE disabled but floating point arguments used"); 1529 if (sse_param_index + reg_count <= 8) { 1088 1530 /* save arguments passed by register */ 1089 loc -= 8; 1090 o(0xd60f66); /* movq */ 1091 gen_modrm(sse_param_index, VT_LOCAL, NULL, loc); 1531 loc -= reg_count * 8; 1092 1532 param_addr = loc; 1533 for (i = 0; i < reg_count; ++i) { 1534 o(0xd60f66); /* movq */ 1535 gen_modrm(sse_param_index, VT_LOCAL, NULL, param_addr + i*8); 1536 ++sse_param_index; 1537 } 1093 1538 } else { 1539 addr = (addr + align - 1) & -align; 1094 1540 param_addr = addr; 1095 1541 addr += size; 1096 1542 } 1097 sse_param_index++; 1098 1099 } else if ((type->t & VT_BTYPE) == VT_STRUCT || 1100 (type->t & VT_BTYPE) == VT_LDOUBLE) { 1543 break; 1544 1545 case x86_64_mode_memory: 1546 case x86_64_mode_x87: 1547 addr = (addr + align - 1) & -align; 1101 1548 param_addr = addr; 1102 1549 addr += size; 1103 } else { 1104 if (reg_param_index < REGN) { 1550 break; 1551 1552 case x86_64_mode_integer: { 1553 if (reg_param_index + reg_count <= REGN) { 1105 1554 /* save arguments passed by register */ 1106 push_arg_reg(reg_param_index);1555 loc -= reg_count * 8; 1107 1556 param_addr = loc; 1557 for (i = 0; i < reg_count; ++i) { 1558 gen_modrm64(0x89, arg_regs[reg_param_index], VT_LOCAL, NULL, param_addr + i*8); 1559 ++reg_param_index; 1560 } 1108 1561 } else { 1562 addr = (addr + align - 1) & -align; 1109 1563 param_addr = addr; 1110 addr += 8; 1111 } 1112 reg_param_index++; 1564 addr += size; 1565 } 1566 break; 1567 } 1568 default: break; /* nothing to be done for x86_64_mode_none */ 1113 1569 } 1114 1570 sym_push(sym->v & ~SYM_FIELD, type, 1115 1571 VT_LOCAL | VT_LVAL, param_addr); 1116 param_index++; 1117 } 1572 } 1573 1574 #ifdef CONFIG_TCC_BCHECK 1575 /* leave some room for bound checking code */ 1576 if (tcc_state->do_bounds_check) { 1577 func_bound_offset = lbounds_section->data_offset; 1578 func_bound_ind = ind; 1579 oad(0xb8, 0); /* lbound section pointer */ 1580 o(0xc78948); /* mov %rax,%rdi ## first arg in %rdi, this must be ptr */ 1581 oad(0xb8, 0); /* call to function */ 1582 } 1583 #endif 1118 1584 } 1119 1585 … … 1123 1589 int v, saved_ind; 1124 1590 1591 #ifdef CONFIG_TCC_BCHECK 1592 if (tcc_state->do_bounds_check 1593 && func_bound_offset != lbounds_section->data_offset) 1594 { 1595 addr_t saved_ind; 1596 addr_t *bounds_ptr; 1597 Sym *sym_data; 1598 1599 /* add end of table info */ 1600 bounds_ptr = section_ptr_add(lbounds_section, sizeof(addr_t)); 1601 *bounds_ptr = 0; 1602 1603 /* generate bound local allocation */ 1604 sym_data = get_sym_ref(&char_pointer_type, lbounds_section, 1605 func_bound_offset, lbounds_section->data_offset); 1606 saved_ind = ind; 1607 ind = func_bound_ind; 1608 greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0); 1609 ind = ind + 5 + 3; 1610 gen_static_call(TOK___bound_local_new); 1611 ind = saved_ind; 1612 1613 /* generate bound check local freeing */ 1614 o(0x5250); /* save returned value, if any */ 1615 greloca(cur_text_section, sym_data, ind + 1, R_X86_64_64, 0); 1616 oad(0xb8, 0); /* mov xxx, %rax */ 1617 o(0xc78948); /* mov %rax,%rdi # first arg in %rdi, this must be ptr */ 1618 gen_static_call(TOK___bound_local_delete); 1619 o(0x585a); /* restore returned value, if any */ 1620 } 1621 #endif 1125 1622 o(0xc9); /* leave */ 1126 1623 if (func_ret_sub == 0) { … … 1146 1643 int gjmp(int t) 1147 1644 { 1148 return psym(0xe9, t);1645 return gjmp2(0xe9, t); 1149 1646 } 1150 1647 … … 1162 1659 } 1163 1660 1661 ST_FUNC void gtst_addr(int inv, int a) 1662 { 1663 int v = vtop->r & VT_VALMASK; 1664 if (v == VT_CMP) { 1665 inv ^= (vtop--)->c.i; 1666 a -= ind + 2; 1667 if (a == (char)a) { 1668 g(inv - 32); 1669 g(a); 1670 } else { 1671 g(0x0f); 1672 oad(inv - 16, a - 4); 1673 } 1674 } else if ((v & ~1) == VT_JMP) { 1675 if ((v & 1) != inv) { 1676 gjmp_addr(a); 1677 gsym(vtop->c.i); 1678 } else { 1679 gsym(vtop->c.i); 1680 o(0x05eb); 1681 gjmp_addr(a); 1682 } 1683 vtop--; 1684 } 1685 } 1686 1164 1687 /* generate a test. set 'inv' to invert test. Stack entry is popped */ 1165 int gtst(int inv, int t) 1166 { 1167 int v, *p; 1168 1169 v = vtop->r & VT_VALMASK; 1170 if (v == VT_CMP) { 1688 ST_FUNC int gtst(int inv, int t) 1689 { 1690 int v = vtop->r & VT_VALMASK; 1691 1692 if (nocode_wanted) { 1693 ; 1694 } else if (v == VT_CMP) { 1171 1695 /* fast case : can jump directly since flags are set */ 1172 1696 if (vtop->c.i & 0x100) … … 1180 1704 otherwise if unordered we don't want to jump. */ 1181 1705 vtop->c.i &= ~0x100; 1182 if (!inv == (vtop->c.i != TOK_NE))1706 if (inv == (vtop->c.i == TOK_NE)) 1183 1707 o(0x067a); /* jp +6 */ 1184 1708 else 1185 1709 { 1186 1710 g(0x0f); 1187 t = psym(0x8a, t); /* jp t */1711 t = gjmp2(0x8a, t); /* jp t */ 1188 1712 } 1189 1713 } 1190 1714 g(0x0f); 1191 t = psym((vtop->c.i - 16) ^ inv, t);1715 t = gjmp2((vtop->c.i - 16) ^ inv, t); 1192 1716 } else if (v == VT_JMP || v == VT_JMPI) { 1193 1717 /* && or || optimization */ 1194 1718 if ((v & 1) == inv) { 1195 1719 /* insert vtop->c jump list in t */ 1196 p = &vtop->c.i; 1197 while (*p != 0) 1198 p = (int *)(cur_text_section->data + *p); 1199 *p = t; 1200 t = vtop->c.i; 1720 uint32_t n1, n = vtop->c.i; 1721 if (n) { 1722 while ((n1 = read32le(cur_text_section->data + n))) 1723 n = n1; 1724 write32le(cur_text_section->data + n, t); 1725 t = vtop->c.i; 1726 } 1201 1727 } else { 1202 1728 t = gjmp(t); 1203 1729 gsym(vtop->c.i); 1204 }1205 } else {1206 if (is_float(vtop->type.t) ||1207 (vtop->type.t & VT_BTYPE) == VT_LLONG) {1208 vpushi(0);1209 gen_op(TOK_NE);1210 }1211 if ((vtop->r & (VT_VALMASK | VT_LVAL | VT_SYM)) == VT_CONST) {1212 /* constant jmp optimization */1213 if ((vtop->c.i != 0) != inv)1214 t = gjmp(t);1215 } else {1216 v = gv(RC_INT);1217 orex(0,v,v,0x85);1218 o(0xc0 + REG_VALUE(v) * 9);1219 g(0x0f);1220 t = psym(0x85 ^ inv, t);1221 1730 } 1222 1731 } … … 1240 1749 opc = 0; 1241 1750 gen_op8: 1242 if (cc && (!ll || (int)vtop->c. ll == vtop->c.ll)) {1751 if (cc && (!ll || (int)vtop->c.i == vtop->c.i)) { 1243 1752 /* constant case */ 1244 1753 vswap(); … … 1359 1868 1360 1869 /* generate a floating point operation 'v = t1 op t2' instruction. The 1361 two operands are guarante d to have the same floating point type */1870 two operands are guaranteed to have the same floating point type */ 1362 1871 /* XXX: need to use ST1 too */ 1363 1872 void gen_opf(int op) … … 1401 1910 if (swapped) 1402 1911 o(0xc9d9); /* fxch %st(1) */ 1403 o(0xe9da); /* fucompp */ 1912 if (op == TOK_EQ || op == TOK_NE) 1913 o(0xe9da); /* fucompp */ 1914 else 1915 o(0xd9de); /* fcompp */ 1404 1916 o(0xe0df); /* fnstsw %ax */ 1405 1917 if (op == TOK_EQ) { … … 1445 1957 } 1446 1958 ft = vtop->type.t; 1447 fc = vtop->c. ul;1959 fc = vtop->c.i; 1448 1960 o(0xde); /* fxxxp %st, %st(1) */ 1449 1961 o(0xc1 + (a << 3)); … … 1454 1966 /* if saved lvalue, then we must reload it */ 1455 1967 r = vtop->r; 1456 fc = vtop->c. ul;1968 fc = vtop->c.i; 1457 1969 if ((r & VT_VALMASK) == VT_LLOCAL) { 1458 1970 SValue v1; … … 1460 1972 v1.type.t = VT_PTR; 1461 1973 v1.r = VT_LOCAL | VT_LVAL; 1462 v1.c. ul= fc;1974 v1.c.i = fc; 1463 1975 load(r, &v1); 1464 1976 fc = 0; … … 1478 1990 1479 1991 if (swapped) { 1480 o(0x7e0ff3); /* movq */ 1481 gen_modrm(1, r, vtop->sym, fc); 1482 1483 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) { 1484 o(0x66); 1485 } 1486 o(0x2e0f); /* ucomisd %xmm0, %xmm1 */ 1487 o(0xc8); 1992 gv(RC_FLOAT); 1993 vswap(); 1994 } 1995 assert(!(vtop[-1].r & VT_LVAL)); 1996 1997 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) 1998 o(0x66); 1999 if (op == TOK_EQ || op == TOK_NE) 2000 o(0x2e0f); /* ucomisd */ 2001 else 2002 o(0x2f0f); /* comisd */ 2003 2004 if (vtop->r & VT_LVAL) { 2005 gen_modrm(vtop[-1].r, r, vtop->sym, fc); 1488 2006 } else { 1489 if ((vtop->type.t & VT_BTYPE) == VT_DOUBLE) { 1490 o(0x66); 1491 } 1492 o(0x2e0f); /* ucomisd */ 1493 gen_modrm(0, r, vtop->sym, fc); 2007 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); 1494 2008 } 1495 2009 … … 1498 2012 vtop->c.i = op | 0x100; 1499 2013 } else { 1500 /* no memory reference possible for long double operations */ 1501 if ((vtop->type.t & VT_BTYPE) == VT_LDOUBLE) { 1502 load(TREG_XMM0, vtop); 1503 swapped = !swapped; 1504 } 2014 assert((vtop->type.t & VT_BTYPE) != VT_LDOUBLE); 1505 2015 switch(op) { 1506 2016 default: … … 1519 2029 } 1520 2030 ft = vtop->type.t; 1521 fc = vtop->c.ul; 1522 if ((ft & VT_BTYPE) == VT_LDOUBLE) { 1523 o(0xde); /* fxxxp %st, %st(1) */ 1524 o(0xc1 + (a << 3)); 2031 fc = vtop->c.i; 2032 assert((ft & VT_BTYPE) != VT_LDOUBLE); 2033 2034 r = vtop->r; 2035 /* if saved lvalue, then we must reload it */ 2036 if ((vtop->r & VT_VALMASK) == VT_LLOCAL) { 2037 SValue v1; 2038 r = get_reg(RC_INT); 2039 v1.type.t = VT_PTR; 2040 v1.r = VT_LOCAL | VT_LVAL; 2041 v1.c.i = fc; 2042 load(r, &v1); 2043 fc = 0; 2044 } 2045 2046 assert(!(vtop[-1].r & VT_LVAL)); 2047 if (swapped) { 2048 assert(vtop->r & VT_LVAL); 2049 gv(RC_FLOAT); 2050 vswap(); 2051 } 2052 2053 if ((ft & VT_BTYPE) == VT_DOUBLE) { 2054 o(0xf2); 1525 2055 } else { 1526 /* if saved lvalue, then we must reload it */ 1527 r = vtop->r; 1528 if ((r & VT_VALMASK) == VT_LLOCAL) { 1529 SValue v1; 1530 r = get_reg(RC_INT); 1531 v1.type.t = VT_PTR; 1532 v1.r = VT_LOCAL | VT_LVAL; 1533 v1.c.ul = fc; 1534 load(r, &v1); 1535 fc = 0; 1536 } 1537 if (swapped) { 1538 /* movq %xmm0,%xmm1 */ 1539 o(0x7e0ff3); 1540 o(0xc8); 1541 load(TREG_XMM0, vtop); 1542 /* subsd %xmm1,%xmm0 (f2 0f 5c c1) */ 1543 if ((ft & VT_BTYPE) == VT_DOUBLE) { 1544 o(0xf2); 1545 } else { 1546 o(0xf3); 1547 } 1548 o(0x0f); 1549 o(0x58 + a); 1550 o(0xc1); 1551 } else { 1552 if ((ft & VT_BTYPE) == VT_DOUBLE) { 1553 o(0xf2); 1554 } else { 1555 o(0xf3); 1556 } 1557 o(0x0f); 1558 o(0x58 + a); 1559 gen_modrm(0, r, vtop->sym, fc); 1560 } 1561 } 2056 o(0xf3); 2057 } 2058 o(0x0f); 2059 o(0x58 + a); 2060 2061 if (vtop->r & VT_LVAL) { 2062 gen_modrm(vtop[-1].r, r, vtop->sym, fc); 2063 } else { 2064 o(0xc0 + REG_VALUE(vtop[0].r) + REG_VALUE(vtop[-1].r)*8); 2065 } 2066 1562 2067 vtop--; 1563 2068 } … … 1594 2099 vtop->r = TREG_ST0; 1595 2100 } else { 1596 save_reg(TREG_XMM0);2101 int r = get_reg(RC_FLOAT); 1597 2102 gv(RC_INT); 1598 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT ));2103 o(0xf2 + ((t & VT_BTYPE) == VT_FLOAT?1:0)); 1599 2104 if ((vtop->type.t & (VT_BTYPE | VT_UNSIGNED)) == 1600 2105 (VT_INT | VT_UNSIGNED) || … … 1603 2108 } 1604 2109 o(0x2a0f); 1605 o(0xc0 + (vtop->r & VT_VALMASK) ); /* cvtsi2sd */1606 vtop->r = TREG_XMM0;2110 o(0xc0 + (vtop->r & VT_VALMASK) + REG_VALUE(r)*8); /* cvtsi2sd */ 2111 vtop->r = r; 1607 2112 } 1608 2113 } … … 1616 2121 bt = ft & VT_BTYPE; 1617 2122 tbt = t & VT_BTYPE; 1618 2123 1619 2124 if (bt == VT_FLOAT) { 1620 2125 gv(RC_FLOAT); 1621 2126 if (tbt == VT_DOUBLE) { 1622 o(0xc0140f); /* unpcklps */ 1623 o(0xc05a0f); /* cvtps2pd */ 2127 o(0x140f); /* unpcklps */ 2128 o(0xc0 + REG_VALUE(vtop->r)*9); 2129 o(0x5a0f); /* cvtps2pd */ 2130 o(0xc0 + REG_VALUE(vtop->r)*9); 1624 2131 } else if (tbt == VT_LDOUBLE) { 2132 save_reg(RC_ST0); 1625 2133 /* movss %xmm0,-0x10(%rsp) */ 1626 o(0x44110ff3); 2134 o(0x110ff3); 2135 o(0x44 + REG_VALUE(vtop->r)*8); 1627 2136 o(0xf024); 1628 2137 o(0xf02444d9); /* flds -0x10(%rsp) */ … … 1632 2141 gv(RC_FLOAT); 1633 2142 if (tbt == VT_FLOAT) { 1634 o(0xc0140f66); /* unpcklpd */ 1635 o(0xc05a0f66); /* cvtpd2ps */ 2143 o(0x140f66); /* unpcklpd */ 2144 o(0xc0 + REG_VALUE(vtop->r)*9); 2145 o(0x5a0f66); /* cvtpd2ps */ 2146 o(0xc0 + REG_VALUE(vtop->r)*9); 1636 2147 } else if (tbt == VT_LDOUBLE) { 2148 save_reg(RC_ST0); 1637 2149 /* movsd %xmm0,-0x10(%rsp) */ 1638 o(0x44110ff2); 2150 o(0x110ff2); 2151 o(0x44 + REG_VALUE(vtop->r)*8); 1639 2152 o(0xf024); 1640 2153 o(0xf02444dd); /* fldl -0x10(%rsp) */ … … 1642 2155 } 1643 2156 } else { 2157 int r; 1644 2158 gv(RC_ST0); 2159 r = get_reg(RC_FLOAT); 1645 2160 if (tbt == VT_DOUBLE) { 1646 2161 o(0xf0245cdd); /* fstpl -0x10(%rsp) */ 1647 2162 /* movsd -0x10(%rsp),%xmm0 */ 1648 o(0x44100ff2); 2163 o(0x100ff2); 2164 o(0x44 + REG_VALUE(r)*8); 1649 2165 o(0xf024); 1650 vtop->r = TREG_XMM0;2166 vtop->r = r; 1651 2167 } else if (tbt == VT_FLOAT) { 1652 2168 o(0xf0245cd9); /* fstps -0x10(%rsp) */ 1653 2169 /* movss -0x10(%rsp),%xmm0 */ 1654 o(0x44100ff3); 2170 o(0x100ff3); 2171 o(0x44 + REG_VALUE(r)*8); 1655 2172 o(0xf024); 1656 vtop->r = TREG_XMM0;2173 vtop->r = r; 1657 2174 } 1658 2175 } … … 1685 2202 } 1686 2203 orex(size == 8, r, 0, 0x2c0f); /* cvttss2si or cvttsd2si */ 1687 o(0xc0 + (REG_VALUE(r) << 3));2204 o(0xc0 + REG_VALUE(vtop->r) + REG_VALUE(r)*8); 1688 2205 vtop->r = r; 1689 2206 } … … 1695 2212 vtop--; 1696 2213 } 2214 2215 /* Save the stack pointer onto the stack and return the location of its address */ 2216 ST_FUNC void gen_vla_sp_save(int addr) { 2217 /* mov %rsp,addr(%rbp)*/ 2218 gen_modrm64(0x89, TREG_RSP, VT_LOCAL, NULL, addr); 2219 } 2220 2221 /* Restore the SP from a location on the stack */ 2222 ST_FUNC void gen_vla_sp_restore(int addr) { 2223 gen_modrm64(0x8b, TREG_RSP, VT_LOCAL, NULL, addr); 2224 } 2225 2226 #ifdef TCC_TARGET_PE 2227 /* Save result of gen_vla_alloc onto the stack */ 2228 ST_FUNC void gen_vla_result(int addr) { 2229 /* mov %rax,addr(%rbp)*/ 2230 gen_modrm64(0x89, TREG_RAX, VT_LOCAL, NULL, addr); 2231 } 2232 #endif 2233 2234 /* Subtract from the stack pointer, and push the resulting value onto the stack */ 2235 ST_FUNC void gen_vla_alloc(CType *type, int align) { 2236 #ifdef TCC_TARGET_PE 2237 /* alloca does more than just adjust %rsp on Windows */ 2238 vpush_global_sym(&func_old_type, TOK_alloca); 2239 vswap(); /* Move alloca ref past allocation size */ 2240 gfunc_call(1); 2241 #else 2242 int r; 2243 r = gv(RC_INT); /* allocation size */ 2244 /* sub r,%rsp */ 2245 o(0x2b48); 2246 o(0xe0 | REG_VALUE(r)); 2247 /* We align to 16 bytes rather than align */ 2248 /* and ~15, %rsp */ 2249 o(0xf0e48348); 2250 vpop(); 2251 #endif 2252 } 2253 1697 2254 1698 2255 /* end of x86-64 code generator */
Note:
See TracChangeset
for help on using the changeset viewer.