1; RUN: llc < %s -march=amdgcn -verify-machineinstrs | FileCheck %s --check-prefixes=SI,GCN,MESA-GCN,FUNC 2; RUN: llc < %s -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,MESA-VI,MESA-GCN,FUNC 3; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefixes=VI,GCN,HSA-VI,FUNC 4; RUN: llc < %s -march=r600 -mcpu=redwood -verify-machineinstrs | FileCheck %s --check-prefix=EG --check-prefix=FUNC 5; RUN: llc < %s -march=r600 -mcpu=cayman -verify-machineinstrs | FileCheck %s --check-prefix=EG --check-prefix=FUNC 6 7; FUNC-LABEL: {{^}}i8_arg: 8; HSA-VI: kernarg_segment_alignment = 4 9; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 10; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 11; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c 12; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff 13; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 14; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 15; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 16; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 17; FIXME: Should be using s_load_dword 18; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 19 20define amdgpu_kernel void @i8_arg(i32 addrspace(1)* nocapture %out, i8 %in) nounwind { 21entry: 22 %0 = zext i8 %in to i32 23 store i32 %0, i32 addrspace(1)* %out, align 4 24 ret void 25} 26 27; FUNC-LABEL: {{^}}i8_zext_arg: 28; HSA-VI: kernarg_segment_alignment = 4 29; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 30; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 31; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 32; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 33; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 34; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 35; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 36; FIXME: Should be using s_load_dword 37; HSA-VI: flat_load_ubyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 38 39define amdgpu_kernel void @i8_zext_arg(i32 addrspace(1)* nocapture %out, i8 zeroext %in) nounwind { 40entry: 41 %0 = zext i8 %in to i32 42 store i32 %0, i32 addrspace(1)* %out, align 4 43 ret void 44} 45 46; FUNC-LABEL: {{^}}i8_sext_arg: 47; HSA-VI: kernarg_segment_alignment = 4 48; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 49; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 50; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 51; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 52; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 53; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 54; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 55; FIXME: Should be using s_load_dword 56; HSA-VI: flat_load_sbyte v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 57 58define amdgpu_kernel void @i8_sext_arg(i32 addrspace(1)* nocapture %out, i8 signext %in) nounwind { 59entry: 60 %0 = sext i8 %in to i32 61 store i32 %0, i32 addrspace(1)* %out, align 4 62 ret void 63} 64 65; FUNC-LABEL: {{^}}i16_arg: 66; HSA-VI: kernarg_segment_alignment = 4 67; EG: AND_INT {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 68; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 69; MESA-VI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0x2c 70; MESA-GCN: s_and_b32 s{{[0-9]+}}, [[VAL]], 0xff 71; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 72; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 73; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 74; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 75; FIXME: Should be using s_load_dword 76; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 77 78define amdgpu_kernel void @i16_arg(i32 addrspace(1)* nocapture %out, i16 %in) nounwind { 79entry: 80 %0 = zext i16 %in to i32 81 store i32 %0, i32 addrspace(1)* %out, align 4 82 ret void 83} 84 85; FUNC-LABEL: {{^}}i16_zext_arg: 86; HSA-VI: kernarg_segment_alignment = 4 87; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 88; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 89; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 90; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 91; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 92; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 93; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 94; FIXME: Should be using s_load_dword 95; HSA-VI: flat_load_ushort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 96 97define amdgpu_kernel void @i16_zext_arg(i32 addrspace(1)* nocapture %out, i16 zeroext %in) nounwind { 98entry: 99 %0 = zext i16 %in to i32 100 store i32 %0, i32 addrspace(1)* %out, align 4 101 ret void 102} 103 104; FUNC-LABEL: {{^}}i16_sext_arg: 105; HSA-VI: kernarg_segment_alignment = 4 106; EG: MOV {{[ *]*}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z 107; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 108; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 109; HSA-VI: s_add_u32 [[SPTR_LO:s[0-9]+]], s4, 8 110; HSA-VI: s_addc_u32 [[SPTR_HI:s[0-9]+]], s5, 0 111; HSA-VI: v_mov_b32_e32 v[[VPTR_LO:[0-9]+]], [[SPTR_LO]] 112; HSA-VI: v_mov_b32_e32 v[[VPTR_HI:[0-9]+]], [[SPTR_HI]] 113; FIXME: Should be using s_load_dword 114; HSA-VI: flat_load_sshort v{{[0-9]+}}, v{{\[}}[[VPTR_LO]]:[[VPTR_HI]]] 115 116define amdgpu_kernel void @i16_sext_arg(i32 addrspace(1)* nocapture %out, i16 signext %in) nounwind { 117entry: 118 %0 = sext i16 %in to i32 119 store i32 %0, i32 addrspace(1)* %out, align 4 120 ret void 121} 122 123; FUNC-LABEL: {{^}}i32_arg: 124; HSA-VI: kernarg_segment_alignment = 4 125; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 126; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 127; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 128; HSA-VI: s_load_dword s{{[0-9]}}, s[4:5], 0x8 129define amdgpu_kernel void @i32_arg(i32 addrspace(1)* nocapture %out, i32 %in) nounwind { 130entry: 131 store i32 %in, i32 addrspace(1)* %out, align 4 132 ret void 133} 134 135; FUNC-LABEL: {{^}}f32_arg: 136; HSA-VI: kernarg_segment_alignment = 4 137; EG: T{{[0-9]\.[XYZW]}}, KC0[2].Z 138; SI: s_load_dword s{{[0-9]}}, s[0:1], 0xb 139; MESA-VI: s_load_dword s{{[0-9]}}, s[0:1], 0x2c 140; HSA-VI: s_load_dword s{{[0-9]+}}, s[4:5], 0x8 141define amdgpu_kernel void @f32_arg(float addrspace(1)* nocapture %out, float %in) nounwind { 142entry: 143 store float %in, float addrspace(1)* %out, align 4 144 ret void 145} 146 147; FUNC-LABEL: {{^}}v2i8_arg: 148; HSA-VI: kernarg_segment_alignment = 4 149; EG: VTX_READ_8 150; EG: VTX_READ_8 151; MESA-GCN: buffer_load_ubyte 152; MESA-GCN: buffer_load_ubyte 153; HSA-VI: flat_load_ubyte 154; HSA-VI: flat_load_ubyte 155define amdgpu_kernel void @v2i8_arg(<2 x i8> addrspace(1)* %out, <2 x i8> %in) { 156entry: 157 store <2 x i8> %in, <2 x i8> addrspace(1)* %out 158 ret void 159} 160 161; FUNC-LABEL: {{^}}v2i16_arg: 162; HSA-VI: kernarg_segment_alignment = 4 163; EG: VTX_READ_16 164; EG: VTX_READ_16 165; MESA-GCN: buffer_load_ushort 166; MESA-GCN: buffer_load_ushort 167; HSA-VI: flat_load_ushort 168; HSA-VI: flat_load_ushort 169define amdgpu_kernel void @v2i16_arg(<2 x i16> addrspace(1)* %out, <2 x i16> %in) { 170entry: 171 store <2 x i16> %in, <2 x i16> addrspace(1)* %out 172 ret void 173} 174 175; FUNC-LABEL: {{^}}v2i32_arg: 176; HSA-VI: kernarg_segment_alignment = 4 177; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 178; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 179; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 180; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c 181; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 182define amdgpu_kernel void @v2i32_arg(<2 x i32> addrspace(1)* nocapture %out, <2 x i32> %in) nounwind { 183entry: 184 store <2 x i32> %in, <2 x i32> addrspace(1)* %out, align 4 185 ret void 186} 187 188; FUNC-LABEL: {{^}}v2f32_arg: 189; HSA-VI: kernarg_segment_alignment = 4 190; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].X 191; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[2].W 192; SI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xb 193; MESA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x2c 194; HSA-VI: s_load_dwordx2 s{{\[[0-9]:[0-9]\]}}, s[4:5], 0x8 195define amdgpu_kernel void @v2f32_arg(<2 x float> addrspace(1)* nocapture %out, <2 x float> %in) nounwind { 196entry: 197 store <2 x float> %in, <2 x float> addrspace(1)* %out, align 4 198 ret void 199} 200 201; FUNC-LABEL: {{^}}v3i8_arg: 202; HSA-VI: kernarg_segment_alignment = 4 203; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 40 204; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 41 205; EG-DAG: VTX_READ_8 T{{[0-9]}}.X, T{{[0-9]}}.X, 42 206; MESA-GCN: buffer_load_ubyte 207; MESA-GCN: buffer_load_ubyte 208; MESA-GCN: buffer_load_ubyte 209; HSA-VI: flat_load_ubyte 210; HSA-VI: flat_load_ubyte 211; HSA-VI: flat_load_ubyte 212define amdgpu_kernel void @v3i8_arg(<3 x i8> addrspace(1)* nocapture %out, <3 x i8> %in) nounwind { 213entry: 214 store <3 x i8> %in, <3 x i8> addrspace(1)* %out, align 4 215 ret void 216} 217 218; FUNC-LABEL: {{^}}v3i16_arg: 219; HSA-VI: kernarg_segment_alignment = 4 220; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 44 221; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 46 222; EG-DAG: VTX_READ_16 T{{[0-9]}}.X, T{{[0-9]}}.X, 48 223; MESA-GCN: buffer_load_ushort 224; MESA-GCN: buffer_load_ushort 225; MESA-GCN: buffer_load_ushort 226; HSA-VI: flat_load_ushort 227; HSA-VI: flat_load_ushort 228; HSA-VI: flat_load_ushort 229define amdgpu_kernel void @v3i16_arg(<3 x i16> addrspace(1)* nocapture %out, <3 x i16> %in) nounwind { 230entry: 231 store <3 x i16> %in, <3 x i16> addrspace(1)* %out, align 4 232 ret void 233} 234; FUNC-LABEL: {{^}}v3i32_arg: 235; HSA-VI: kernarg_segment_alignment = 4 236; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 237; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 238; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 239; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 240; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 241; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 242define amdgpu_kernel void @v3i32_arg(<3 x i32> addrspace(1)* nocapture %out, <3 x i32> %in) nounwind { 243entry: 244 store <3 x i32> %in, <3 x i32> addrspace(1)* %out, align 4 245 ret void 246} 247 248; FUNC-LABEL: {{^}}v3f32_arg: 249; HSA-VI: kernarg_segment_alignment = 4 250; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 251; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 252; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 253; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0xd 254; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]+\]}}, s[0:1], 0x34 255; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 256define amdgpu_kernel void @v3f32_arg(<3 x float> addrspace(1)* nocapture %out, <3 x float> %in) nounwind { 257entry: 258 store <3 x float> %in, <3 x float> addrspace(1)* %out, align 4 259 ret void 260} 261 262; FUNC-LABEL: {{^}}v4i8_arg: 263; HSA-VI: kernarg_segment_alignment = 4 264; EG: VTX_READ_8 265; EG: VTX_READ_8 266; EG: VTX_READ_8 267; EG: VTX_READ_8 268; MESA-GCN: buffer_load_ubyte 269; MESA-GCN: buffer_load_ubyte 270; MESA-GCN: buffer_load_ubyte 271; MESA-GCN: buffer_load_ubyte 272; HSA-VI: flat_load_ubyte 273; HSA-VI: flat_load_ubyte 274; HSA-VI: flat_load_ubyte 275; HSA-VI: flat_load_ubyte 276define amdgpu_kernel void @v4i8_arg(<4 x i8> addrspace(1)* %out, <4 x i8> %in) { 277entry: 278 store <4 x i8> %in, <4 x i8> addrspace(1)* %out 279 ret void 280} 281 282; FUNC-LABEL: {{^}}v4i16_arg: 283; HSA-VI: kernarg_segment_alignment = 4 284; EG: VTX_READ_16 285; EG: VTX_READ_16 286; EG: VTX_READ_16 287; EG: VTX_READ_16 288; MESA-GCN: buffer_load_ushort 289; MESA-GCN: buffer_load_ushort 290; MESA-GCN: buffer_load_ushort 291; MESA-GCN: buffer_load_ushort 292; HSA-GCN: flat_load_ushort 293; HSA-GCN: flat_load_ushort 294; HSA-GCN: flat_load_ushort 295; HSA-GCN: flat_load_ushort 296define amdgpu_kernel void @v4i16_arg(<4 x i16> addrspace(1)* %out, <4 x i16> %in) { 297entry: 298 store <4 x i16> %in, <4 x i16> addrspace(1)* %out 299 ret void 300} 301 302; FUNC-LABEL: {{^}}v4i32_arg: 303; HSA-VI: kernarg_segment_alignment = 4 304; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 305; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 306; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 307; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 308; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 309; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 310; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 311define amdgpu_kernel void @v4i32_arg(<4 x i32> addrspace(1)* nocapture %out, <4 x i32> %in) nounwind { 312entry: 313 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 314 ret void 315} 316 317; FUNC-LABEL: {{^}}v4f32_arg: 318; HSA-VI: kernarg_segment_alignment = 4 319; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Y 320; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].Z 321; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[3].W 322; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].X 323; SI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0xd 324; MESA-VI: s_load_dwordx4 s{{\[[0-9]:[0-9]\]}}, s[0:1], 0x34 325; HSA-VI: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x10 326define amdgpu_kernel void @v4f32_arg(<4 x float> addrspace(1)* nocapture %out, <4 x float> %in) nounwind { 327entry: 328 store <4 x float> %in, <4 x float> addrspace(1)* %out, align 4 329 ret void 330} 331 332; FUNC-LABEL: {{^}}v8i8_arg: 333; HSA-VI: kernarg_segment_alignment = 4 334; EG: VTX_READ_8 335; EG: VTX_READ_8 336; EG: VTX_READ_8 337; EG: VTX_READ_8 338; EG: VTX_READ_8 339; EG: VTX_READ_8 340; EG: VTX_READ_8 341; EG: VTX_READ_8 342; MESA-GCN: buffer_load_ubyte 343; MESA-GCN: buffer_load_ubyte 344; MESA-GCN: buffer_load_ubyte 345; MESA-GCN: buffer_load_ubyte 346; MESA-GCN: buffer_load_ubyte 347; MESA-GCN: buffer_load_ubyte 348; MESA-GCN: buffer_load_ubyte 349; HSA-GCN: float_load_ubyte 350; HSA-GCN: float_load_ubyte 351; HSA-GCN: float_load_ubyte 352; HSA-GCN: float_load_ubyte 353; HSA-GCN: float_load_ubyte 354; HSA-GCN: float_load_ubyte 355; HSA-GCN: float_load_ubyte 356; HSA-GCN: float_load_ubyte 357define amdgpu_kernel void @v8i8_arg(<8 x i8> addrspace(1)* %out, <8 x i8> %in) { 358entry: 359 store <8 x i8> %in, <8 x i8> addrspace(1)* %out 360 ret void 361} 362 363; FUNC-LABEL: {{^}}v8i16_arg: 364; HSA-VI: kernarg_segment_alignment = 4 365; EG: VTX_READ_16 366; EG: VTX_READ_16 367; EG: VTX_READ_16 368; EG: VTX_READ_16 369; EG: VTX_READ_16 370; EG: VTX_READ_16 371; EG: VTX_READ_16 372; EG: VTX_READ_16 373; MESA-GCN: buffer_load_ushort 374; MESA-GCN: buffer_load_ushort 375; MESA-GCN: buffer_load_ushort 376; MESA-GCN: buffer_load_ushort 377; MESA-GCN: buffer_load_ushort 378; MESA-GCN: buffer_load_ushort 379; MESA-GCN: buffer_load_ushort 380; MESA-GCN: buffer_load_ushort 381; HSA-VI: flat_load_ushort 382; HSA-VI: flat_load_ushort 383; HSA-VI: flat_load_ushort 384; HSA-VI: flat_load_ushort 385; HSA-VI: flat_load_ushort 386; HSA-VI: flat_load_ushort 387; HSA-VI: flat_load_ushort 388; HSA-VI: flat_load_ushort 389define amdgpu_kernel void @v8i16_arg(<8 x i16> addrspace(1)* %out, <8 x i16> %in) { 390entry: 391 store <8 x i16> %in, <8 x i16> addrspace(1)* %out 392 ret void 393} 394 395; FUNC-LABEL: {{^}}v8i32_arg: 396; HSA-VI: kernarg_segment_alignment = 5 397; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 398; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 399; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 400; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 401; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 402; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 403; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 404; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 405; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 406; MESA-VI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x44 407; HSA-VI: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x20 408define amdgpu_kernel void @v8i32_arg(<8 x i32> addrspace(1)* nocapture %out, <8 x i32> %in) nounwind { 409entry: 410 store <8 x i32> %in, <8 x i32> addrspace(1)* %out, align 4 411 ret void 412} 413 414; FUNC-LABEL: {{^}}v8f32_arg: 415; HSA-VI: kernarg_segment_alignment = 5 416; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Y 417; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].Z 418; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[4].W 419; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].X 420; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Y 421; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].Z 422; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[5].W 423; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].X 424; SI: s_load_dwordx8 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x11 425define amdgpu_kernel void @v8f32_arg(<8 x float> addrspace(1)* nocapture %out, <8 x float> %in) nounwind { 426entry: 427 store <8 x float> %in, <8 x float> addrspace(1)* %out, align 4 428 ret void 429} 430 431; FUNC-LABEL: {{^}}v16i8_arg: 432; HSA-VI: kernarg_segment_alignment = 4 433; EG: VTX_READ_8 434; EG: VTX_READ_8 435; EG: VTX_READ_8 436; EG: VTX_READ_8 437; EG: VTX_READ_8 438; EG: VTX_READ_8 439; EG: VTX_READ_8 440; EG: VTX_READ_8 441; EG: VTX_READ_8 442; EG: VTX_READ_8 443; EG: VTX_READ_8 444; EG: VTX_READ_8 445; EG: VTX_READ_8 446; EG: VTX_READ_8 447; EG: VTX_READ_8 448; EG: VTX_READ_8 449; MESA-GCN: buffer_load_ubyte 450; MESA-GCN: buffer_load_ubyte 451; MESA-GCN: buffer_load_ubyte 452; MESA-GCN: buffer_load_ubyte 453; MESA-GCN: buffer_load_ubyte 454; MESA-GCN: buffer_load_ubyte 455; MESA-GCN: buffer_load_ubyte 456; MESA-GCN: buffer_load_ubyte 457; MESA-GCN: buffer_load_ubyte 458; MESA-GCN: buffer_load_ubyte 459; MESA-GCN: buffer_load_ubyte 460; MESA-GCN: buffer_load_ubyte 461; MESA-GCN: buffer_load_ubyte 462; MESA-GCN: buffer_load_ubyte 463; MESA-GCN: buffer_load_ubyte 464; MESA-GCN: buffer_load_ubyte 465; HSA-VI: flat_load_ubyte 466; HSA-VI: flat_load_ubyte 467; HSA-VI: flat_load_ubyte 468; HSA-VI: flat_load_ubyte 469; HSA-VI: flat_load_ubyte 470; HSA-VI: flat_load_ubyte 471; HSA-VI: flat_load_ubyte 472; HSA-VI: flat_load_ubyte 473; HSA-VI: flat_load_ubyte 474; HSA-VI: flat_load_ubyte 475; HSA-VI: flat_load_ubyte 476; HSA-VI: flat_load_ubyte 477; HSA-VI: flat_load_ubyte 478; HSA-VI: flat_load_ubyte 479; HSA-VI: flat_load_ubyte 480; HSA-VI: flat_load_ubyte 481define amdgpu_kernel void @v16i8_arg(<16 x i8> addrspace(1)* %out, <16 x i8> %in) { 482entry: 483 store <16 x i8> %in, <16 x i8> addrspace(1)* %out 484 ret void 485} 486 487; FUNC-LABEL: {{^}}v16i16_arg: 488; HSA-VI: kernarg_segment_alignment = 5 489; EG: VTX_READ_16 490; EG: VTX_READ_16 491; EG: VTX_READ_16 492; EG: VTX_READ_16 493; EG: VTX_READ_16 494; EG: VTX_READ_16 495; EG: VTX_READ_16 496; EG: VTX_READ_16 497; EG: VTX_READ_16 498; EG: VTX_READ_16 499; EG: VTX_READ_16 500; EG: VTX_READ_16 501; EG: VTX_READ_16 502; EG: VTX_READ_16 503; EG: VTX_READ_16 504; EG: VTX_READ_16 505; MESA-GCN: buffer_load_ushort 506; MESA-GCN: buffer_load_ushort 507; MESA-GCN: buffer_load_ushort 508; MESA-GCN: buffer_load_ushort 509; MESA-GCN: buffer_load_ushort 510; MESA-GCN: buffer_load_ushort 511; MESA-GCN: buffer_load_ushort 512; MESA-GCN: buffer_load_ushort 513; MESA-GCN: buffer_load_ushort 514; MESA-GCN: buffer_load_ushort 515; MESA-GCN: buffer_load_ushort 516; MESA-GCN: buffer_load_ushort 517; MESA-GCN: buffer_load_ushort 518; MESA-GCN: buffer_load_ushort 519; MESA-GCN: buffer_load_ushort 520; MESA-GCN: buffer_load_ushort 521; HSA-VI: flat_load_ushort 522; HSA-VI: flat_load_ushort 523; HSA-VI: flat_load_ushort 524; HSA-VI: flat_load_ushort 525; HSA-VI: flat_load_ushort 526; HSA-VI: flat_load_ushort 527; HSA-VI: flat_load_ushort 528; HSA-VI: flat_load_ushort 529; HSA-VI: flat_load_ushort 530; HSA-VI: flat_load_ushort 531; HSA-VI: flat_load_ushort 532; HSA-VI: flat_load_ushort 533; HSA-VI: flat_load_ushort 534; HSA-VI: flat_load_ushort 535; HSA-VI: flat_load_ushort 536; HSA-VI: flat_load_ushort 537define amdgpu_kernel void @v16i16_arg(<16 x i16> addrspace(1)* %out, <16 x i16> %in) { 538entry: 539 store <16 x i16> %in, <16 x i16> addrspace(1)* %out 540 ret void 541} 542 543; FUNC-LABEL: {{^}}v16i32_arg: 544; HSA-VI: kernarg_segment_alignment = 6 545; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 546; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 547; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 548; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 549; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 550; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 551; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 552; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 553; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 554; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 555; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 556; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 557; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 558; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 559; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 560; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 561; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 562; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 563; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40 564define amdgpu_kernel void @v16i32_arg(<16 x i32> addrspace(1)* nocapture %out, <16 x i32> %in) nounwind { 565entry: 566 store <16 x i32> %in, <16 x i32> addrspace(1)* %out, align 4 567 ret void 568} 569 570; FUNC-LABEL: {{^}}v16f32_arg: 571; HSA-VI: kernarg_segment_alignment = 6 572; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Y 573; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].Z 574; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[6].W 575; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].X 576; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Y 577; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].Z 578; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[7].W 579; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].X 580; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Y 581; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].Z 582; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[8].W 583; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].X 584; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Y 585; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].Z 586; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[9].W 587; EG-DAG: T{{[0-9]\.[XYZW]}}, KC0[10].X 588; SI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x19 589; MESA-VI: s_load_dwordx16 s{{\[[0-9]+:[0-9]+\]}}, s[0:1], 0x64 590; HSA-VI: s_load_dwordx16 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x40 591define amdgpu_kernel void @v16f32_arg(<16 x float> addrspace(1)* nocapture %out, <16 x float> %in) nounwind { 592entry: 593 store <16 x float> %in, <16 x float> addrspace(1)* %out, align 4 594 ret void 595} 596 597; FUNC-LABEL: {{^}}kernel_arg_i64: 598; MESA-GCN: s_load_dwordx2 599; MESA-GCN: s_load_dwordx2 600; MESA-GCN: buffer_store_dwordx2 601; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 602define amdgpu_kernel void @kernel_arg_i64(i64 addrspace(1)* %out, i64 %a) nounwind { 603 store i64 %a, i64 addrspace(1)* %out, align 8 604 ret void 605} 606 607; FUNC-LABEL: {{^}}f64_kernel_arg: 608; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x9 609; SI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0xb 610; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x24 611; MESA-VI-DAG: s_load_dwordx2 s[{{[0-9]:[0-9]}}], s[0:1], 0x2c 612; MESA-GCN: buffer_store_dwordx2 613; HSA-VI: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[4:5], 0x8 614define amdgpu_kernel void @f64_kernel_arg(double addrspace(1)* %out, double %in) { 615entry: 616 store double %in, double addrspace(1)* %out 617 ret void 618} 619 620; XFUNC-LABEL: {{^}}kernel_arg_v1i64: 621; XGCN: s_load_dwordx2 622; XGCN: s_load_dwordx2 623; XGCN: buffer_store_dwordx2 624; define amdgpu_kernel void @kernel_arg_v1i64(<1 x i64> addrspace(1)* %out, <1 x i64> %a) nounwind { 625; store <1 x i64> %a, <1 x i64> addrspace(1)* %out, align 8 626; ret void 627; } 628 629; FUNC-LABEL: {{^}}i1_arg: 630; SI: buffer_load_ubyte 631; SI: v_and_b32_e32 632; SI: buffer_store_byte 633; SI: s_endpgm 634define amdgpu_kernel void @i1_arg(i1 addrspace(1)* %out, i1 %x) nounwind { 635 store i1 %x, i1 addrspace(1)* %out, align 1 636 ret void 637} 638 639; FUNC-LABEL: {{^}}i1_arg_zext_i32: 640; SI: buffer_load_ubyte 641; SI: buffer_store_dword 642; SI: s_endpgm 643define amdgpu_kernel void @i1_arg_zext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { 644 %ext = zext i1 %x to i32 645 store i32 %ext, i32 addrspace(1)* %out, align 4 646 ret void 647} 648 649; FUNC-LABEL: {{^}}i1_arg_zext_i64: 650; SI: buffer_load_ubyte 651; SI: buffer_store_dwordx2 652; SI: s_endpgm 653define amdgpu_kernel void @i1_arg_zext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { 654 %ext = zext i1 %x to i64 655 store i64 %ext, i64 addrspace(1)* %out, align 8 656 ret void 657} 658 659; FUNC-LABEL: {{^}}i1_arg_sext_i32: 660; SI: buffer_load_ubyte 661; SI: buffer_store_dword 662; SI: s_endpgm 663define amdgpu_kernel void @i1_arg_sext_i32(i32 addrspace(1)* %out, i1 %x) nounwind { 664 %ext = sext i1 %x to i32 665 store i32 %ext, i32addrspace(1)* %out, align 4 666 ret void 667} 668 669; FUNC-LABEL: {{^}}i1_arg_sext_i64: 670; SI: buffer_load_ubyte 671; SI: v_bfe_i32 672; SI: v_ashrrev_i32 673; SI: buffer_store_dwordx2 674; SI: s_endpgm 675define amdgpu_kernel void @i1_arg_sext_i64(i64 addrspace(1)* %out, i1 %x) nounwind { 676 %ext = sext i1 %x to i64 677 store i64 %ext, i64 addrspace(1)* %out, align 8 678 ret void 679} 680