1; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GCN,MESA-GCN,FUNC 2; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,MESA-VI,MESA-GCN,FUNC 3; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,HSA-VI,FUNC 4; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC 5; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC 6 7; FUNC-LABEL: {{^}}i8_arg: 8; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 9; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 10; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c 11; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff 12; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 13; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 14; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 15; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 16; FIXME: Should be using s_load_dword 17; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 18 19define void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { 20entry: 21 %0 = zext i8 %in to i32 22 store i32 %0, i32 addrspace(1)* %out, align 4 23 ret void 24} 25 26; FUNC-LABEL: {{^}}i8_zext_arg: 27; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 28; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 29; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 30; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 31; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 32; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 33; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 34; FIXME: Should be using s_load_dword 35; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 36 37define void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { 38entry: 39 %0 = zext i8 %in to i32 40 store i32 %0, i32 addrspace(1)* %out, align 4 41 ret void 42} 43 44; FUNC-LABEL: {{^}}i8_sext_arg: 45; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 46; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 47; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 48; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 49; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 50; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 51; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 52; FIXME: Should be using s_load_dword 53; HSA-VI: flat_load_sbyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 54 55define void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { 56entry: 57 %0 = sext i8 %in to i32 58 store i32 %0, i32 addrspace(1)* %out, align 4 59 ret void 60} 61 62; FUNC-LABEL: {{^}}i16_arg: 63; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 64; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 65; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c 66; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff 67; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 68; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 69; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 70; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 71; FIXME: Should be using s_load_dword 72; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 73 74define void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { 75entry: 76 %0 = zext i16 %in to i32 77 store i32 %0, i32 addrspace(1)* %out, align 4 78 ret void 79} 80 81; FUNC-LABEL: {{^}}i16_zext_arg: 82; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 83; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 84; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 85; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 86; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 87; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 88; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 89; FIXME: Should be using s_load_dword 90; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 91 92define void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { 93entry: 94 %0 = zext i16 %in to i32 95 store i32 %0, i32 addrspace(1)* %out, align 4 96 ret void 97} 98 99; FUNC-LABEL: {{^}}i16_sext_arg: 100; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 101; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 102; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 103; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 104; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 105; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 106; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 107; FIXME: Should be using s_load_dword 108; HSA-VI: flat_load_sshort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 109 110define void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { 111entry: 112 %0 = sext i16 %in to i32 113 store i32 %0, i32 addrspace(1)* %out, align 4 114 ret void 115} 116 117; FUNC-LABEL: {{^}}i32_arg: 118; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 119; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 120; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 121; HSA-VI: s_load_dword s{{[0-9]}}, s[4:5], 0x8 122define void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { 123entry: 124 store i32 %in, i32 addrspace(1)* %out, align 4 125 ret void 126} 127 128; FUNC-LABEL: {{^}}f32_arg: 129; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 130; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 131; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 132; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8 133define void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { 134entry: 135 store float %in, float addrspace(1)* %out, align 4 136 ret void 137} 138 139; FUNC-LABEL: {{^}}v2i8_arg: 140; EG: VTX_READ_8 141; EG: VTX_READ_8 142; MESA-GCN: buffer_load_ubyte 143; MESA-GCN: buffer_load_ubyte 144; HSA-VI: flat_load_ubyte 145; HSA-VI: flat_load_ubyte 146define void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { 147entry: 148 store <2 x i8> %in, <2 x i8> addrspace(1)* %out 149 ret void 150} 151 152; FUNC-LABEL: {{^}}v2i16_arg: 153; EG: VTX_READ_16 154; EG: VTX_READ_16 155; MESA-GCN: buffer_load_ushort 156; MESA-GCN: buffer_load_ushort 157; HSA-VI: flat_load_ushort 158; HSA-VI: flat_load_ushort 159define void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { 160entry: 161 store <2 x i16> %in, <2 x i16> addrspace(1)* %out 162 ret void 163} 164 165; FUNC-LABEL: {{^}}v2i32_arg: 166; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 167; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 168; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 169; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c 170; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 171define void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { 172entry: 173 store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 174 ret void 175} 176 177; FUNC-LABEL: {{^}}v2f32_arg: 178; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 179; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 180; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 181; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c 182; HSA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[4:5], 0x8 183define void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { 184entry: 185 store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 186 ret void 187} 188 189; FUNC-LABEL: {{^}}v3i8_arg: 190; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 191; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 192; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 193; MESA-GCN: buffer_load_ubyte 194; MESA-GCN: buffer_load_ubyte 195; MESA-GCN: buffer_load_ubyte 196; HSA-VI: flat_load_ubyte 197; HSA-VI: flat_load_ubyte 198; HSA-VI: flat_load_ubyte 199define void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { 200entry: 201 store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 202 ret void 203} 204 205; FUNC-LABEL: {{^}}v3i16_arg: 206; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 207; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 208; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 209; MESA-GCN: buffer_load_ushort 210; MESA-GCN: buffer_load_ushort 211; MESA-GCN: buffer_load_ushort 212; HSA-VI: flat_load_ushort 213; HSA-VI: flat_load_ushort 214; HSA-VI: flat_load_ushort 215define void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { 216entry: 217 store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 218 ret void 219} 220; FUNC-LABEL: {{^}}v3i32_arg: 221; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 222; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 223; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 224; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 225; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 226; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 227define void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { 228entry: 229 store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 230 ret void 231} 232 233; FUNC-LABEL: {{^}}v3f32_arg: 234; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 235; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 236; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 237; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 238; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 239; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 240define void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { 241entry: 242 store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 243 ret void 244} 245 246; FUNC-LABEL: {{^}}v4i8_arg: 247; EG: VTX_READ_8 248; EG: VTX_READ_8 249; EG: VTX_READ_8 250; EG: VTX_READ_8 251; MESA-GCN: buffer_load_ubyte 252; MESA-GCN: buffer_load_ubyte 253; MESA-GCN: buffer_load_ubyte 254; MESA-GCN: buffer_load_ubyte 255; HSA-VI: flat_load_ubyte 256; HSA-VI: flat_load_ubyte 257; HSA-VI: flat_load_ubyte 258; HSA-VI: flat_load_ubyte 259define void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { 260entry: 261 store <4 x i8> %in, <4 x i8> addrspace(1)* %out 262 ret void 263} 264 265; FUNC-LABEL: {{^}}v4i16_arg: 266; EG: VTX_READ_16 267; EG: VTX_READ_16 268; EG: VTX_READ_16 269; EG: VTX_READ_16 270; MESA-GCN: buffer_load_ushort 271; MESA-GCN: buffer_load_ushort 272; MESA-GCN: buffer_load_ushort 273; MESA-GCN: buffer_load_ushort 274; HSA-GCN: flat_load_ushort 275; HSA-GCN: flat_load_ushort 276; HSA-GCN: flat_load_ushort 277; HSA-GCN: flat_load_ushort 278define void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { 279entry: 280 store <4 x i16> %in, <4 x i16> addrspace(1)* %out 281 ret void 282} 283 284; FUNC-LABEL: {{^}}v4i32_arg: 285; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 286; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 287; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 288; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 289; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 290; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 291; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 292define void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { 293entry: 294 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 295 ret void 296} 297 298; FUNC-LABEL: {{^}}v4f32_arg: 299; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 300; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 301; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 302; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 303; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 304; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 305; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 306define void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { 307entry: 308 store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 309 ret void 310} 311 312; FUNC-LABEL: {{^}}v8i8_arg: 313; EG: VTX_READ_8 314; EG: VTX_READ_8 315; EG: VTX_READ_8 316; EG: VTX_READ_8 317; EG: VTX_READ_8 318; EG: VTX_READ_8 319; EG: VTX_READ_8 320; EG: VTX_READ_8 321; MESA-GCN: buffer_load_ubyte 322; MESA-GCN: buffer_load_ubyte 323; MESA-GCN: buffer_load_ubyte 324; MESA-GCN: buffer_load_ubyte 325; MESA-GCN: buffer_load_ubyte 326; MESA-GCN: buffer_load_ubyte 327; MESA-GCN: buffer_load_ubyte 328; HSA-GCN: float_load_ubyte 329; HSA-GCN: float_load_ubyte 330; HSA-GCN: float_load_ubyte 331; HSA-GCN: float_load_ubyte 332; HSA-GCN: float_load_ubyte 333; HSA-GCN: float_load_ubyte 334; HSA-GCN: float_load_ubyte 335; HSA-GCN: float_load_ubyte 336define void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { 337entry: 338 store <8 x i8> %in, <8 x i8> addrspace(1)* %out 339 ret void 340} 341 342; FUNC-LABEL: {{^}}v8i16_arg: 343; EG: VTX_READ_16 344; EG: VTX_READ_16 345; EG: VTX_READ_16 346; EG: VTX_READ_16 347; EG: VTX_READ_16 348; EG: VTX_READ_16 349; EG: VTX_READ_16 350; EG: VTX_READ_16 351; MESA-GCN: buffer_load_ushort 352; MESA-GCN: buffer_load_ushort 353; MESA-GCN: buffer_load_ushort 354; MESA-GCN: buffer_load_ushort 355; MESA-GCN: buffer_load_ushort 356; MESA-GCN: buffer_load_ushort 357; MESA-GCN: buffer_load_ushort 358; MESA-GCN: buffer_load_ushort 359; HSA-VI: flat_load_ushort 360; HSA-VI: flat_load_ushort 361; HSA-VI: flat_load_ushort 362; HSA-VI: flat_load_ushort 363; HSA-VI: flat_load_ushort 364; HSA-VI: flat_load_ushort 365; HSA-VI: flat_load_ushort 366; HSA-VI: flat_load_ushort 367define void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { 368entry: 369 store <8 x i16> %in, <8 x i16> addrspace(1)* %out 370 ret void 371} 372 373; FUNC-LABEL: {{^}}v8i32_arg: 374; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 375; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 376; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 377; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 378; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 379; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 380; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 381; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 382; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 383; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44 384; HSA-VI: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20 385define void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { 386entry: 387 store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 388 ret void 389} 390 391; FUNC-LABEL: {{^}}v8f32_arg: 392; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 393; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 394; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 395; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 396; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 397; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 398; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 399; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 400; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 401define void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { 402entry: 403 store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 404 ret void 405} 406 407; FUNC-LABEL: {{^}}v16i8_arg: 408; EG: VTX_READ_8 409; EG: VTX_READ_8 410; EG: VTX_READ_8 411; EG: VTX_READ_8 412; EG: VTX_READ_8 413; EG: VTX_READ_8 414; EG: VTX_READ_8 415; EG: VTX_READ_8 416; EG: VTX_READ_8 417; EG: VTX_READ_8 418; EG: VTX_READ_8 419; EG: VTX_READ_8 420; EG: VTX_READ_8 421; EG: VTX_READ_8 422; EG: VTX_READ_8 423; EG: VTX_READ_8 424; MESA-GCN: buffer_load_ubyte 425; MESA-GCN: buffer_load_ubyte 426; MESA-GCN: buffer_load_ubyte 427; MESA-GCN: buffer_load_ubyte 428; MESA-GCN: buffer_load_ubyte 429; MESA-GCN: buffer_load_ubyte 430; MESA-GCN: buffer_load_ubyte 431; MESA-GCN: buffer_load_ubyte 432; MESA-GCN: buffer_load_ubyte 433; MESA-GCN: buffer_load_ubyte 434; MESA-GCN: buffer_load_ubyte 435; MESA-GCN: buffer_load_ubyte 436; MESA-GCN: buffer_load_ubyte 437; MESA-GCN: buffer_load_ubyte 438; MESA-GCN: buffer_load_ubyte 439; MESA-GCN: buffer_load_ubyte 440; HSA-VI: flat_load_ubyte 441; HSA-VI: flat_load_ubyte 442; HSA-VI: flat_load_ubyte 443; HSA-VI: flat_load_ubyte 444; HSA-VI: flat_load_ubyte 445; HSA-VI: flat_load_ubyte 446; HSA-VI: flat_load_ubyte 447; HSA-VI: flat_load_ubyte 448; HSA-VI: flat_load_ubyte 449; HSA-VI: flat_load_ubyte 450; HSA-VI: flat_load_ubyte 451; HSA-VI: flat_load_ubyte 452; HSA-VI: flat_load_ubyte 453; HSA-VI: flat_load_ubyte 454; HSA-VI: flat_load_ubyte 455; HSA-VI: flat_load_ubyte 456define void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { 457entry: 458 store <16 x i8> %in, <16 x i8> addrspace(1)* %out 459 ret void 460} 461 462; FUNC-LABEL: {{^}}v16i16_arg: 463; EG: VTX_READ_16 464; EG: VTX_READ_16 465; EG: VTX_READ_16 466; EG: VTX_READ_16 467; EG: VTX_READ_16 468; EG: VTX_READ_16 469; EG: VTX_READ_16 470; EG: VTX_READ_16 471; EG: VTX_READ_16 472; EG: VTX_READ_16 473; EG: VTX_READ_16 474; EG: VTX_READ_16 475; EG: VTX_READ_16 476; EG: VTX_READ_16 477; EG: VTX_READ_16 478; EG: VTX_READ_16 479; MESA-GCN: buffer_load_ushort 480; MESA-GCN: buffer_load_ushort 481; MESA-GCN: buffer_load_ushort 482; MESA-GCN: buffer_load_ushort 483; MESA-GCN: buffer_load_ushort 484; MESA-GCN: buffer_load_ushort 485; MESA-GCN: buffer_load_ushort 486; MESA-GCN: buffer_load_ushort 487; MESA-GCN: buffer_load_ushort 488; MESA-GCN: buffer_load_ushort 489; MESA-GCN: buffer_load_ushort 490; MESA-GCN: buffer_load_ushort 491; MESA-GCN: buffer_load_ushort 492; MESA-GCN: buffer_load_ushort 493; MESA-GCN: buffer_load_ushort 494; MESA-GCN: buffer_load_ushort 495; HSA-VI: flat_load_ushort 496; HSA-VI: flat_load_ushort 497; HSA-VI: flat_load_ushort 498; HSA-VI: flat_load_ushort 499; HSA-VI: flat_load_ushort 500; HSA-VI: flat_load_ushort 501; HSA-VI: flat_load_ushort 502; HSA-VI: flat_load_ushort 503; HSA-VI: flat_load_ushort 504; HSA-VI: flat_load_ushort 505; HSA-VI: flat_load_ushort 506; HSA-VI: flat_load_ushort 507; HSA-VI: flat_load_ushort 508; HSA-VI: flat_load_ushort 509; HSA-VI: flat_load_ushort 510; HSA-VI: flat_load_ushort 511define void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { 512entry: 513 store <16 x i16> %in, <16 x i16> addrspace(1)* %out 514 ret void 515} 516 517; FUNC-LABEL: {{^}}v16i32_arg: 518; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 519; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 520; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 521; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 522; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 523; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 524; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 525; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 526; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 527; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 528; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 529; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 530; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 531; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 532; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 533; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 534; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 535; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 536; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40 537define void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { 538entry: 539 store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 540 ret void 541} 542 543; FUNC-LABEL: {{^}}v16f32_arg: 544; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 545; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 546; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 547; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 548; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 549; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 550; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 551; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 552; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 553; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 554; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 555; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 556; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 557; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 558; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 559; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 560; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 561; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 562; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40 563define void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { 564entry: 565 store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 566 ret void 567} 568 569; FUNC-LABEL: {{^}}kernel_arg_i64: 570; MESA-GCN: s_load_dwordx2 571; MESA-GCN: s_load_dwordx2 572; MESA-GCN: buffer_store_dwordx2 573; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 574define void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { 575 store i64 %a, i64 addrspace(1)* %out, align 8 576 ret void 577} 578 579; FUNC-LABEL: {{^}}f64_kernel_arg: 580; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 581; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb 582; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24 583; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c 584; MESA-GCN: buffer_store_dwordx2 585; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 586define void @f64_kernel_arg(double addrspace(1)* %out, double %in) { 587entry: 588 store double %in, double addrspace(1)* %out 589 ret void 590} 591 592; XFUNC-LABEL: {{^}}kernel_arg_v1i64: 593; XGCN: s_load_dwordx2 594; XGCN: s_load_dwordx2 595; XGCN: buffer_store_dwordx2 596; define void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { 597; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 598; ret void 599; } 600 601; FUNC-LABEL: {{^}}i1_arg: 602; SI: buffer_load_ubyte 603; SI: v_and_b32_e32 604; SI: buffer_store_byte 605; SI: s_endpgm 606define void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { 607 store i1 %x, i1 addrspace(1)* %out, align 1 608 ret void 609} 610 611; FUNC-LABEL: {{^}}i1_arg_zext_i32: 612; SI: buffer_load_ubyte 613; SI: buffer_store_dword 614; SI: s_endpgm 615define void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { 616 %ext = zext i1 %x to i32 617 store i32 %ext, i32 addrspace(1)* %out, align 4 618 ret void 619} 620 621; FUNC-LABEL: {{^}}i1_arg_zext_i64: 622; SI: buffer_load_ubyte 623; SI: buffer_store_dwordx2 624; SI: s_endpgm 625define void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { 626 %ext = zext i1 %x to i64 627 store i64 %ext, i64 addrspace(1)* %out, align 8 628 ret void 629} 630 631; FUNC-LABEL: {{^}}i1_arg_sext_i32: 632; SI: buffer_load_ubyte 633; SI: buffer_store_dword 634; SI: s_endpgm 635define void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { 636 %ext = sext i1 %x to i32 637 store i32 %ext, i32addrspace(1)* %out, align 4 638 ret void 639} 640 641; FUNC-LABEL: {{^}}i1_arg_sext_i64: 642; SI: buffer_load_ubyte 643; SI: v_bfe_i32 644; SI: v_ashrrev_i32 645; SI: buffer_store_dwordx2 646; SI: s_endpgm 647define void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { 648 %ext = sext i1 %x to i64 649 store i64 %ext, i64 addrspace(1)* %out, align 8 650 ret void 651} 652