1//===-- AMDGPUInstructions.td - Common instruction defs ---*- tablegen -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains instruction defs that are common to all hw codegen 11// targets. 12// 13//===----------------------------------------------------------------------===// 14 15class AMDGPUInst <dag outs, dag ins, string asm = "", 16 list<dag> pattern = []> : Instruction { 17 field bit isRegisterLoad = 0; 18 field bit isRegisterStore = 0; 19 20 let Namespace = "AMDGPU"; 21 let OutOperandList = outs; 22 let InOperandList = ins; 23 let AsmString = asm; 24 let Pattern = pattern; 25 let Itinerary = NullALU; 26 27 // SoftFail is a field the disassembler can use to provide a way for 28 // instructions to not match without killing the whole decode process. It is 29 // mainly used for ARM, but Tablegen expects this field to exist or it fails 30 // to build the decode table. 31 field bits<64> SoftFail = 0; 32 33 let DecoderNamespace = Namespace; 34 35 let TSFlags{63} = isRegisterLoad; 36 let TSFlags{62} = isRegisterStore; 37} 38 39class AMDGPUShaderInst <dag outs, dag ins, string asm = "", 40 list<dag> pattern = []> : AMDGPUInst<outs, ins, asm, pattern> { 41 42 field bits<32> Inst = 0xffffffff; 43} 44 45def FP16Denormals : Predicate<"Subtarget.hasFP16Denormals()">; 46def FP32Denormals : Predicate<"Subtarget.hasFP32Denormals()">; 47def FP64Denormals : Predicate<"Subtarget.hasFP64Denormals()">; 48def UnsafeFPMath : Predicate<"TM.Options.UnsafeFPMath">; 49 50def InstFlag : OperandWithDefaultOps <i32, (ops (i32 0))>; 51def ADDRIndirect : ComplexPattern<iPTR, 2, "SelectADDRIndirect", [], []>; 52 53let OperandType = "OPERAND_IMMEDIATE" in { 54 55def u32imm : Operand<i32> { 56 let PrintMethod = "printU32ImmOperand"; 57} 58 59def u16imm : Operand<i16> { 60 let PrintMethod = "printU16ImmOperand"; 61} 62 63def u8imm : Operand<i8> { 64 let PrintMethod = "printU8ImmOperand"; 65} 66 67} // End OperandType = "OPERAND_IMMEDIATE" 68 69//===--------------------------------------------------------------------===// 70// Custom Operands 71//===--------------------------------------------------------------------===// 72def brtarget : Operand<OtherVT>; 73 74//===----------------------------------------------------------------------===// 75// PatLeafs for floating-point comparisons 76//===----------------------------------------------------------------------===// 77 78def COND_OEQ : PatLeaf < 79 (cond), 80 [{return N->get() == ISD::SETOEQ || N->get() == ISD::SETEQ;}] 81>; 82 83def COND_ONE : PatLeaf < 84 (cond), 85 [{return N->get() == ISD::SETONE || N->get() == ISD::SETNE;}] 86>; 87 88def COND_OGT : PatLeaf < 89 (cond), 90 [{return N->get() == ISD::SETOGT || N->get() == ISD::SETGT;}] 91>; 92 93def COND_OGE : PatLeaf < 94 (cond), 95 [{return N->get() == ISD::SETOGE || N->get() == ISD::SETGE;}] 96>; 97 98def COND_OLT : PatLeaf < 99 (cond), 100 [{return N->get() == ISD::SETOLT || N->get() == ISD::SETLT;}] 101>; 102 103def COND_OLE : PatLeaf < 104 (cond), 105 [{return N->get() == ISD::SETOLE || N->get() == ISD::SETLE;}] 106>; 107 108 109def COND_O : PatLeaf <(cond), [{return N->get() == ISD::SETO;}]>; 110def COND_UO : PatLeaf <(cond), [{return N->get() == ISD::SETUO;}]>; 111 112//===----------------------------------------------------------------------===// 113// PatLeafs for unsigned / unordered comparisons 114//===----------------------------------------------------------------------===// 115 116def COND_UEQ : PatLeaf <(cond), [{return N->get() == ISD::SETUEQ;}]>; 117def COND_UNE : PatLeaf <(cond), [{return N->get() == ISD::SETUNE;}]>; 118def COND_UGT : PatLeaf <(cond), [{return N->get() == ISD::SETUGT;}]>; 119def COND_UGE : PatLeaf <(cond), [{return N->get() == ISD::SETUGE;}]>; 120def COND_ULT : PatLeaf <(cond), [{return N->get() == ISD::SETULT;}]>; 121def COND_ULE : PatLeaf <(cond), [{return N->get() == ISD::SETULE;}]>; 122 123// XXX - For some reason R600 version is preferring to use unordered 124// for setne? 125def COND_UNE_NE : PatLeaf < 126 (cond), 127 [{return N->get() == ISD::SETUNE || N->get() == ISD::SETNE;}] 128>; 129 130//===----------------------------------------------------------------------===// 131// PatLeafs for signed comparisons 132//===----------------------------------------------------------------------===// 133 134def COND_SGT : PatLeaf <(cond), [{return N->get() == ISD::SETGT;}]>; 135def COND_SGE : PatLeaf <(cond), [{return N->get() == ISD::SETGE;}]>; 136def COND_SLT : PatLeaf <(cond), [{return N->get() == ISD::SETLT;}]>; 137def COND_SLE : PatLeaf <(cond), [{return N->get() == ISD::SETLE;}]>; 138 139//===----------------------------------------------------------------------===// 140// PatLeafs for integer equality 141//===----------------------------------------------------------------------===// 142 143def COND_EQ : PatLeaf < 144 (cond), 145 [{return N->get() == ISD::SETEQ || N->get() == ISD::SETUEQ;}] 146>; 147 148def COND_NE : PatLeaf < 149 (cond), 150 [{return N->get() == ISD::SETNE || N->get() == ISD::SETUNE;}] 151>; 152 153def COND_NULL : PatLeaf < 154 (cond), 155 [{(void)N; return false;}] 156>; 157 158 159//===----------------------------------------------------------------------===// 160// Misc. PatFrags 161//===----------------------------------------------------------------------===// 162 163class HasOneUseBinOp<SDPatternOperator op> : PatFrag< 164 (ops node:$src0, node:$src1), 165 (op $src0, $src1), 166 [{ return N->hasOneUse(); }] 167>; 168 169class HasOneUseTernaryOp<SDPatternOperator op> : PatFrag< 170 (ops node:$src0, node:$src1, node:$src2), 171 (op $src0, $src1, $src2), 172 [{ return N->hasOneUse(); }] 173>; 174 175//===----------------------------------------------------------------------===// 176// Load/Store Pattern Fragments 177//===----------------------------------------------------------------------===// 178 179class PrivateMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 180 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS; 181}]>; 182 183class PrivateLoad <SDPatternOperator op> : PrivateMemOp < 184 (ops node:$ptr), (op node:$ptr) 185>; 186 187class PrivateStore <SDPatternOperator op> : PrivateMemOp < 188 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 189>; 190 191def load_private : PrivateLoad <load>; 192 193def truncstorei8_private : PrivateStore <truncstorei8>; 194def truncstorei16_private : PrivateStore <truncstorei16>; 195def store_private : PrivateStore <store>; 196 197class GlobalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 198 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 199}]>; 200 201// Global address space loads 202class GlobalLoad <SDPatternOperator op> : GlobalMemOp < 203 (ops node:$ptr), (op node:$ptr) 204>; 205 206def global_load : GlobalLoad <load>; 207 208// Global address space stores 209class GlobalStore <SDPatternOperator op> : GlobalMemOp < 210 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 211>; 212 213def global_store : GlobalStore <store>; 214def global_store_atomic : GlobalStore<atomic_store>; 215 216 217class ConstantMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 218 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 219}]>; 220 221// Constant address space loads 222class ConstantLoad <SDPatternOperator op> : ConstantMemOp < 223 (ops node:$ptr), (op node:$ptr) 224>; 225 226def constant_load : ConstantLoad<load>; 227 228class LocalMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 229 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 230}]>; 231 232// Local address space loads 233class LocalLoad <SDPatternOperator op> : LocalMemOp < 234 (ops node:$ptr), (op node:$ptr) 235>; 236 237class LocalStore <SDPatternOperator op> : LocalMemOp < 238 (ops node:$value, node:$ptr), (op node:$value, node:$ptr) 239>; 240 241class FlatMemOp <dag ops, dag frag> : PatFrag <ops, frag, [{ 242 return cast<MemSDNode>(N)->getAddressSPace() == AMDGPUAS::FLAT_ADDRESS; 243}]>; 244 245class FlatLoad <SDPatternOperator op> : FlatMemOp < 246 (ops node:$ptr), (op node:$ptr) 247>; 248 249class AZExtLoadBase <SDPatternOperator ld_node>: PatFrag<(ops node:$ptr), 250 (ld_node node:$ptr), [{ 251 LoadSDNode *L = cast<LoadSDNode>(N); 252 return L->getExtensionType() == ISD::ZEXTLOAD || 253 L->getExtensionType() == ISD::EXTLOAD; 254}]>; 255 256def az_extload : AZExtLoadBase <unindexedload>; 257 258def az_extloadi8 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 259 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i8; 260}]>; 261 262def az_extloadi8_global : GlobalLoad <az_extloadi8>; 263def sextloadi8_global : GlobalLoad <sextloadi8>; 264 265def az_extloadi8_constant : ConstantLoad <az_extloadi8>; 266def sextloadi8_constant : ConstantLoad <sextloadi8>; 267 268def az_extloadi8_local : LocalLoad <az_extloadi8>; 269def sextloadi8_local : LocalLoad <sextloadi8>; 270 271def extloadi8_private : PrivateLoad <az_extloadi8>; 272def sextloadi8_private : PrivateLoad <sextloadi8>; 273 274def az_extloadi16 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 275 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i16; 276}]>; 277 278def az_extloadi16_global : GlobalLoad <az_extloadi16>; 279def sextloadi16_global : GlobalLoad <sextloadi16>; 280 281def az_extloadi16_constant : ConstantLoad <az_extloadi16>; 282def sextloadi16_constant : ConstantLoad <sextloadi16>; 283 284def az_extloadi16_local : LocalLoad <az_extloadi16>; 285def sextloadi16_local : LocalLoad <sextloadi16>; 286 287def extloadi16_private : PrivateLoad <az_extloadi16>; 288def sextloadi16_private : PrivateLoad <sextloadi16>; 289 290def az_extloadi32 : PatFrag<(ops node:$ptr), (az_extload node:$ptr), [{ 291 return cast<LoadSDNode>(N)->getMemoryVT() == MVT::i32; 292}]>; 293 294def az_extloadi32_global : GlobalLoad <az_extloadi32>; 295 296def az_extloadi32_flat : FlatLoad <az_extloadi32>; 297 298def az_extloadi32_constant : ConstantLoad <az_extloadi32>; 299 300def truncstorei8_global : GlobalStore <truncstorei8>; 301def truncstorei16_global : GlobalStore <truncstorei16>; 302 303def local_store : LocalStore <store>; 304def truncstorei8_local : LocalStore <truncstorei8>; 305def truncstorei16_local : LocalStore <truncstorei16>; 306 307def local_load : LocalLoad <load>; 308 309class Aligned8Bytes <dag ops, dag frag> : PatFrag <ops, frag, [{ 310 return cast<MemSDNode>(N)->getAlignment() % 8 == 0; 311}]>; 312 313def local_load_aligned8bytes : Aligned8Bytes < 314 (ops node:$ptr), (local_load node:$ptr) 315>; 316 317def local_store_aligned8bytes : Aligned8Bytes < 318 (ops node:$val, node:$ptr), (local_store node:$val, node:$ptr) 319>; 320 321class local_binary_atomic_op<SDNode atomic_op> : 322 PatFrag<(ops node:$ptr, node:$value), 323 (atomic_op node:$ptr, node:$value), [{ 324 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 325}]>; 326 327 328def atomic_swap_local : local_binary_atomic_op<atomic_swap>; 329def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>; 330def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>; 331def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>; 332def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>; 333def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>; 334def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>; 335def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>; 336def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>; 337def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>; 338def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; 339 340def mskor_global : PatFrag<(ops node:$val, node:$ptr), 341 (AMDGPUstore_mskor node:$val, node:$ptr), [{ 342 return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 343}]>; 344 345multiclass AtomicCmpSwapLocal <SDNode cmp_swap_node> { 346 347 def _32_local : PatFrag < 348 (ops node:$ptr, node:$cmp, node:$swap), 349 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ 350 AtomicSDNode *AN = cast<AtomicSDNode>(N); 351 return AN->getMemoryVT() == MVT::i32 && 352 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 353 }]>; 354 355 def _64_local : PatFrag< 356 (ops node:$ptr, node:$cmp, node:$swap), 357 (cmp_swap_node node:$ptr, node:$cmp, node:$swap), [{ 358 AtomicSDNode *AN = cast<AtomicSDNode>(N); 359 return AN->getMemoryVT() == MVT::i64 && 360 AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 361 }]>; 362} 363 364defm atomic_cmp_swap : AtomicCmpSwapLocal <atomic_cmp_swap>; 365 366multiclass global_binary_atomic_op<SDNode atomic_op> { 367 def "" : PatFrag< 368 (ops node:$ptr, node:$value), 369 (atomic_op node:$ptr, node:$value), 370 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; 371 372 def _noret : PatFrag< 373 (ops node:$ptr, node:$value), 374 (atomic_op node:$ptr, node:$value), 375 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; 376 377 def _ret : PatFrag< 378 (ops node:$ptr, node:$value), 379 (atomic_op node:$ptr, node:$value), 380 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; 381} 382 383defm atomic_swap_global : global_binary_atomic_op<atomic_swap>; 384defm atomic_add_global : global_binary_atomic_op<atomic_load_add>; 385defm atomic_and_global : global_binary_atomic_op<atomic_load_and>; 386defm atomic_max_global : global_binary_atomic_op<atomic_load_max>; 387defm atomic_min_global : global_binary_atomic_op<atomic_load_min>; 388defm atomic_or_global : global_binary_atomic_op<atomic_load_or>; 389defm atomic_sub_global : global_binary_atomic_op<atomic_load_sub>; 390defm atomic_umax_global : global_binary_atomic_op<atomic_load_umax>; 391defm atomic_umin_global : global_binary_atomic_op<atomic_load_umin>; 392defm atomic_xor_global : global_binary_atomic_op<atomic_load_xor>; 393 394//legacy 395def AMDGPUatomic_cmp_swap_global : PatFrag< 396 (ops node:$ptr, node:$value), 397 (AMDGPUatomic_cmp_swap node:$ptr, node:$value), 398 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; 399 400def atomic_cmp_swap_global : PatFrag< 401 (ops node:$ptr, node:$cmp, node:$value), 402 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), 403 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;}]>; 404 405def atomic_cmp_swap_global_noret : PatFrag< 406 (ops node:$ptr, node:$cmp, node:$value), 407 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), 408 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (SDValue(N, 0).use_empty());}]>; 409 410def atomic_cmp_swap_global_ret : PatFrag< 411 (ops node:$ptr, node:$cmp, node:$value), 412 (atomic_cmp_swap node:$ptr, node:$cmp, node:$value), 413 [{return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && (!SDValue(N, 0).use_empty());}]>; 414 415//===----------------------------------------------------------------------===// 416// Misc Pattern Fragments 417//===----------------------------------------------------------------------===// 418 419class Constants { 420int TWO_PI = 0x40c90fdb; 421int PI = 0x40490fdb; 422int TWO_PI_INV = 0x3e22f983; 423int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding 424int FP16_ONE = 0x3C00; 425int FP32_ONE = 0x3f800000; 426int FP32_NEG_ONE = 0xbf800000; 427int FP64_ONE = 0x3ff0000000000000; 428int FP64_NEG_ONE = 0xbff0000000000000; 429} 430def CONST : Constants; 431 432def FP_ZERO : PatLeaf < 433 (fpimm), 434 [{return N->getValueAPF().isZero();}] 435>; 436 437def FP_ONE : PatLeaf < 438 (fpimm), 439 [{return N->isExactlyValue(1.0);}] 440>; 441 442def FP_HALF : PatLeaf < 443 (fpimm), 444 [{return N->isExactlyValue(0.5);}] 445>; 446 447let isCodeGenOnly = 1, isPseudo = 1 in { 448 449let usesCustomInserter = 1 in { 450 451class CLAMP <RegisterClass rc> : AMDGPUShaderInst < 452 (outs rc:$dst), 453 (ins rc:$src0), 454 "CLAMP $dst, $src0", 455 [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] 456>; 457 458class FABS <RegisterClass rc> : AMDGPUShaderInst < 459 (outs rc:$dst), 460 (ins rc:$src0), 461 "FABS $dst, $src0", 462 [(set f32:$dst, (fabs f32:$src0))] 463>; 464 465class FNEG <RegisterClass rc> : AMDGPUShaderInst < 466 (outs rc:$dst), 467 (ins rc:$src0), 468 "FNEG $dst, $src0", 469 [(set f32:$dst, (fneg f32:$src0))] 470>; 471 472} // usesCustomInserter = 1 473 474multiclass RegisterLoadStore <RegisterClass dstClass, Operand addrClass, 475 ComplexPattern addrPat> { 476let UseNamedOperandTable = 1 in { 477 478 def RegisterLoad : AMDGPUShaderInst < 479 (outs dstClass:$dst), 480 (ins addrClass:$addr, i32imm:$chan), 481 "RegisterLoad $dst, $addr", 482 [(set i32:$dst, (AMDGPUregister_load addrPat:$addr, (i32 timm:$chan)))] 483 > { 484 let isRegisterLoad = 1; 485 } 486 487 def RegisterStore : AMDGPUShaderInst < 488 (outs), 489 (ins dstClass:$val, addrClass:$addr, i32imm:$chan), 490 "RegisterStore $val, $addr", 491 [(AMDGPUregister_store i32:$val, addrPat:$addr, (i32 timm:$chan))] 492 > { 493 let isRegisterStore = 1; 494 } 495} 496} 497 498} // End isCodeGenOnly = 1, isPseudo = 1 499 500/* Generic helper patterns for intrinsics */ 501/* -------------------------------------- */ 502 503class POW_Common <AMDGPUInst log_ieee, AMDGPUInst exp_ieee, AMDGPUInst mul> 504 : Pat < 505 (fpow f32:$src0, f32:$src1), 506 (exp_ieee (mul f32:$src1, (log_ieee f32:$src0))) 507>; 508 509/* Other helper patterns */ 510/* --------------------- */ 511 512/* Extract element pattern */ 513class Extract_Element <ValueType sub_type, ValueType vec_type, int sub_idx, 514 SubRegIndex sub_reg> 515 : Pat< 516 (sub_type (extractelt vec_type:$src, sub_idx)), 517 (EXTRACT_SUBREG $src, sub_reg) 518>; 519 520/* Insert element pattern */ 521class Insert_Element <ValueType elem_type, ValueType vec_type, 522 int sub_idx, SubRegIndex sub_reg> 523 : Pat < 524 (insertelt vec_type:$vec, elem_type:$elem, sub_idx), 525 (INSERT_SUBREG $vec, $elem, sub_reg) 526>; 527 528// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 529// can handle COPY instructions. 530// bitconvert pattern 531class BitConvert <ValueType dt, ValueType st, RegisterClass rc> : Pat < 532 (dt (bitconvert (st rc:$src0))), 533 (dt rc:$src0) 534>; 535 536// XXX: Convert to new syntax and use COPY_TO_REG, once the DFAPacketizer 537// can handle COPY instructions. 538class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < 539 (vt (AMDGPUdwordaddr (vt rc:$addr))), 540 (vt rc:$addr) 541>; 542 543// BFI_INT patterns 544 545multiclass BFIPatterns <Instruction BFI_INT, 546 Instruction LoadImm32, 547 RegisterClass RC64> { 548 // Definition from ISA doc: 549 // (y & x) | (z & ~x) 550 def : Pat < 551 (or (and i32:$y, i32:$x), (and i32:$z, (not i32:$x))), 552 (BFI_INT $x, $y, $z) 553 >; 554 555 // SHA-256 Ch function 556 // z ^ (x & (y ^ z)) 557 def : Pat < 558 (xor i32:$z, (and i32:$x, (xor i32:$y, i32:$z))), 559 (BFI_INT $x, $y, $z) 560 >; 561 562 def : Pat < 563 (fcopysign f32:$src0, f32:$src1), 564 (BFI_INT (LoadImm32 (i32 0x7fffffff)), $src0, $src1) 565 >; 566 567 def : Pat < 568 (f64 (fcopysign f64:$src0, f64:$src1)), 569 (REG_SEQUENCE RC64, 570 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, 571 (BFI_INT (LoadImm32 (i32 0x7fffffff)), 572 (i32 (EXTRACT_SUBREG $src0, sub1)), 573 (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) 574 >; 575 576 def : Pat < 577 (f64 (fcopysign f64:$src0, f32:$src1)), 578 (REG_SEQUENCE RC64, 579 (i32 (EXTRACT_SUBREG $src0, sub0)), sub0, 580 (BFI_INT (LoadImm32 (i32 0x7fffffff)), 581 (i32 (EXTRACT_SUBREG $src0, sub1)), 582 $src1), sub1) 583 >; 584} 585 586// SHA-256 Ma patterns 587 588// ((x & z) | (y & (x | z))) -> BFI_INT (XOR x, y), z, y 589class SHA256MaPattern <Instruction BFI_INT, Instruction XOR> : Pat < 590 (or (and i32:$x, i32:$z), (and i32:$y, (or i32:$x, i32:$z))), 591 (BFI_INT (XOR i32:$x, i32:$y), i32:$z, i32:$y) 592>; 593 594// Bitfield extract patterns 595 596def IMMZeroBasedBitfieldMask : PatLeaf <(imm), [{ 597 return isMask_32(N->getZExtValue()); 598}]>; 599 600def IMMPopCount : SDNodeXForm<imm, [{ 601 return CurDAG->getTargetConstant(countPopulation(N->getZExtValue()), SDLoc(N), 602 MVT::i32); 603}]>; 604 605class BFEPattern <Instruction BFE, Instruction MOV> : Pat < 606 (i32 (and (i32 (srl i32:$src, i32:$rshift)), IMMZeroBasedBitfieldMask:$mask)), 607 (BFE $src, $rshift, (MOV (i32 (IMMPopCount $mask)))) 608>; 609 610// rotr pattern 611class ROTRPattern <Instruction BIT_ALIGN> : Pat < 612 (rotr i32:$src0, i32:$src1), 613 (BIT_ALIGN $src0, $src0, $src1) 614>; 615 616// This matches 16 permutations of 617// max(min(x, y), min(max(x, y), z)) 618class IntMed3Pat<Instruction med3Inst, 619 SDPatternOperator max, 620 SDPatternOperator max_oneuse, 621 SDPatternOperator min_oneuse> : Pat< 622 (max (min_oneuse i32:$src0, i32:$src1), 623 (min_oneuse (max_oneuse i32:$src0, i32:$src1), i32:$src2)), 624 (med3Inst $src0, $src1, $src2) 625>; 626 627let Properties = [SDNPCommutative, SDNPAssociative] in { 628def smax_oneuse : HasOneUseBinOp<smax>; 629def smin_oneuse : HasOneUseBinOp<smin>; 630def umax_oneuse : HasOneUseBinOp<umax>; 631def umin_oneuse : HasOneUseBinOp<umin>; 632} // Properties = [SDNPCommutative, SDNPAssociative] 633 634def sub_oneuse : HasOneUseBinOp<sub>; 635 636def select_oneuse : HasOneUseTernaryOp<select>; 637 638// Special conversion patterns 639 640def cvt_rpi_i32_f32 : PatFrag < 641 (ops node:$src), 642 (fp_to_sint (ffloor (fadd $src, FP_HALF))), 643 [{ (void) N; return TM.Options.NoNaNsFPMath; }] 644>; 645 646def cvt_flr_i32_f32 : PatFrag < 647 (ops node:$src), 648 (fp_to_sint (ffloor $src)), 649 [{ (void)N; return TM.Options.NoNaNsFPMath; }] 650>; 651 652class IMad24Pat<Instruction Inst> : Pat < 653 (add (AMDGPUmul_i24 i32:$src0, i32:$src1), i32:$src2), 654 (Inst $src0, $src1, $src2) 655>; 656 657class UMad24Pat<Instruction Inst> : Pat < 658 (add (AMDGPUmul_u24 i32:$src0, i32:$src1), i32:$src2), 659 (Inst $src0, $src1, $src2) 660>; 661 662class RcpPat<Instruction RcpInst, ValueType vt> : Pat < 663 (fdiv FP_ONE, vt:$src), 664 (RcpInst $src) 665>; 666 667class RsqPat<Instruction RsqInst, ValueType vt> : Pat < 668 (AMDGPUrcp (fsqrt vt:$src)), 669 (RsqInst $src) 670>; 671 672include "R600Instructions.td" 673include "R700Instructions.td" 674include "EvergreenInstructions.td" 675include "CaymanInstructions.td" 676 677include "SIInstrInfo.td" 678 679