1; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,MESA %s 2; RUN: llc -march=amdgcn -mcpu=hawaii -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI,MESA %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,VI,MESA %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -mattr=-flat-for-global -amdgpu-scalarize-global-loads=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,HSA %s 5 6declare hidden void @external_void_func_i1(i1) #0 7declare hidden void @external_void_func_i1_signext(i1 signext) #0 8declare hidden void @external_void_func_i1_zeroext(i1 zeroext) #0 9 10declare hidden void @external_void_func_i8(i8) #0 11declare hidden void @external_void_func_i8_signext(i8 signext) #0 12declare hidden void @external_void_func_i8_zeroext(i8 zeroext) #0 13 14declare hidden void @external_void_func_i16(i16) #0 15declare hidden void @external_void_func_i16_signext(i16 signext) #0 16declare hidden void @external_void_func_i16_zeroext(i16 zeroext) #0 17 18declare hidden void @external_void_func_i32(i32) #0 19declare hidden void @external_void_func_i64(i64) #0 20declare hidden void @external_void_func_v2i64(<2 x i64>) #0 21declare hidden void @external_void_func_v3i64(<3 x i64>) #0 22declare hidden void @external_void_func_v4i64(<4 x i64>) #0 23 24declare hidden void @external_void_func_f16(half) #0 25declare hidden void @external_void_func_f32(float) #0 26declare hidden void @external_void_func_f64(double) #0 27declare hidden void @external_void_func_v2f32(<2 x float>) #0 28declare hidden void @external_void_func_v2f64(<2 x double>) #0 29declare hidden void @external_void_func_v3f32(<3 x float>) #0 30declare hidden void @external_void_func_v3f64(<3 x double>) #0 31declare hidden void @external_void_func_v5f32(<5 x float>) #0 32 33declare hidden void @external_void_func_v2i16(<2 x i16>) #0 34declare hidden void @external_void_func_v2f16(<2 x half>) #0 35declare hidden void @external_void_func_v3i16(<3 x i16>) #0 36declare hidden void @external_void_func_v3f16(<3 x half>) #0 37declare hidden void @external_void_func_v4i16(<4 x i16>) #0 38declare hidden void @external_void_func_v4f16(<4 x half>) #0 39 40declare hidden void @external_void_func_v2i32(<2 x i32>) #0 41declare hidden void @external_void_func_v3i32(<3 x i32>) #0 42declare hidden void @external_void_func_v3i32_i32(<3 x i32>, i32) #0 43declare hidden void @external_void_func_v4i32(<4 x i32>) #0 44declare hidden void @external_void_func_v5i32(<5 x i32>) #0 45declare hidden void @external_void_func_v8i32(<8 x i32>) #0 46declare hidden void @external_void_func_v16i32(<16 x i32>) #0 47declare hidden void @external_void_func_v32i32(<32 x i32>) #0 48declare hidden void @external_void_func_v32i32_i32(<32 x i32>, i32) #0 49 50; return value and argument 51declare hidden i32 @external_i32_func_i32(i32) #0 52 53; Structs 54declare hidden void @external_void_func_struct_i8_i32({ i8, i32 }) #0 55declare hidden void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 56declare hidden void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }), { i8, i32 } addrspace(5)* byval({ i8, i32 })) #0 57 58declare hidden void @external_void_func_v16i8(<16 x i8>) #0 59 60 61; FIXME: Should be passing -1 62; GCN-LABEL: {{^}}test_call_external_void_func_i1_imm: 63; MESA: s_mov_b32 s36, SCRATCH_RSRC_DWORD 64 65; MESA-DAG: s_mov_b64 s[0:1], s[36:37] 66 67; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 68; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4 69; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+12 70; GCN-DAG: v_mov_b32_e32 v0, 1{{$}} 71; MESA-DAG: s_mov_b64 s[2:3], s[38:39] 72 73; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 74; GCN-NEXT: s_endpgm 75define amdgpu_kernel void @test_call_external_void_func_i1_imm() #0 { 76 call void @external_void_func_i1(i1 true) 77 ret void 78} 79 80; GCN-LABEL: {{^}}test_call_external_void_func_i1_signext: 81 82; HSA: buffer_load_ubyte [[VAR:v[0-9]+]] 83; HSA: s_mov_b32 s32, 0 84; MESA-DAG: buffer_load_ubyte [[VAR:v[0-9]+]] 85; MESA-DAG: s_mov_b32 s32, 0{{$}} 86 87; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 88; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_signext@rel32@lo+4 89; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_signext@rel32@hi+12 90; GCN-NEXT: v_bfe_i32 v0, [[VAR]], 0, 1 91; GCN-NEXT: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 92; GCN-NEXT: s_endpgm 93define amdgpu_kernel void @test_call_external_void_func_i1_signext(i32) #0 { 94 %var = load volatile i1, i1 addrspace(1)* undef 95 call void @external_void_func_i1_signext(i1 signext %var) 96 ret void 97} 98 99; FIXME: load should be scheduled before getpc 100; GCN-LABEL: {{^}}test_call_external_void_func_i1_zeroext: 101 102; HSA: buffer_load_ubyte [[VAL:v[0-9]+]] 103; HSA-DAG: s_mov_b32 s32, 0{{$}} 104 105; MESA: buffer_load_ubyte [[VAL:v[0-9]+]] 106; MESA-DAG: s_mov_b32 s32, 0{{$}} 107 108; GCN: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 109; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1_zeroext@rel32@lo+4 110; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1_zeroext@rel32@hi+12 111; GCN-NEXT: v_and_b32_e32 v0, 1, [[VAL]] 112; GCN-NEXT: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 113; GCN-NEXT: s_endpgm 114define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 { 115 %var = load volatile i1, i1 addrspace(1)* undef 116 call void @external_void_func_i1_zeroext(i1 zeroext %var) 117 ret void 118} 119 120; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm: 121 122; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 123; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4 124; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+12 125; GCN-DAG: v_mov_b32_e32 v0, 0x7b 126 127; GCN-DAG: s_mov_b32 s32, 0{{$}} 128 129; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 130; GCN-NEXT: s_endpgm 131define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 { 132 call void @external_void_func_i8(i8 123) 133 ret void 134} 135 136; FIXME: don't wait before call 137; GCN-LABEL: {{^}}test_call_external_void_func_i8_signext: 138 139; GCN-DAG: buffer_load_sbyte [[VAL:v[0-9]+]] 140; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 141; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4 142; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+12 143 144; GCN-DAG: s_mov_b32 s32, 0 145 146; GCN-NOT: s_waitcnt 147; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 148; GCN-NEXT: s_endpgm 149define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 { 150 %var = load volatile i8, i8 addrspace(1)* undef 151 call void @external_void_func_i8_signext(i8 signext %var) 152 ret void 153} 154 155; GCN-LABEL: {{^}}test_call_external_void_func_i8_zeroext: 156 157; GCN-DAG: buffer_load_ubyte [[VAL:v[0-9]+]] 158; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 159; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4 160; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+12 161 162; GCN-DAG: s_mov_b32 s32, 0 163 164; GCN-NOT: s_waitcnt 165; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 166; GCN-NEXT: s_endpgm 167define amdgpu_kernel void @test_call_external_void_func_i8_zeroext(i32) #0 { 168 %var = load volatile i8, i8 addrspace(1)* undef 169 call void @external_void_func_i8_zeroext(i8 zeroext %var) 170 ret void 171} 172 173; GCN-LABEL: {{^}}test_call_external_void_func_i16_imm: 174; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}} 175 176; GCN-DAG: s_mov_b32 s32, 0 177 178; GCN: s_swappc_b64 179define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 { 180 call void @external_void_func_i16(i16 123) 181 ret void 182} 183 184; GCN-LABEL: {{^}}test_call_external_void_func_i16_signext: 185 186; GCN-DAG: buffer_load_sshort [[VAL:v[0-9]+]] 187; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 188; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4 189; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+12 190 191; GCN-DAG: s_mov_b32 s32, 0 192 193; GCN-NOT: s_waitcnt 194; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 195; GCN-NEXT: s_endpgm 196define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 { 197 %var = load volatile i16, i16 addrspace(1)* undef 198 call void @external_void_func_i16_signext(i16 signext %var) 199 ret void 200} 201 202; GCN-LABEL: {{^}}test_call_external_void_func_i16_zeroext: 203 204; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 205; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4 206; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+12 207 208; GCN-DAG: s_mov_b32 s32, 0 209 210; GCN-NOT: s_waitcnt 211; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 212; GCN-NEXT: s_endpgm 213define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 { 214 %var = load volatile i16, i16 addrspace(1)* undef 215 call void @external_void_func_i16_zeroext(i16 zeroext %var) 216 ret void 217} 218 219; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm: 220 221; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 222; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4 223; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+12 224; GCN-DAG: v_mov_b32_e32 v0, 42 225; GCN-DAG: s_mov_b32 s32, 0 226 227; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 228; GCN-NEXT: s_endpgm 229define amdgpu_kernel void @test_call_external_void_func_i32_imm(i32) #0 { 230 call void @external_void_func_i32(i32 42) 231 ret void 232} 233 234; GCN-LABEL: {{^}}test_call_external_void_func_i64_imm: 235; GCN-DAG: v_mov_b32_e32 v0, 0x7b{{$}} 236; GCN-DAG: v_mov_b32_e32 v1, 0{{$}} 237; GCN-DAG: s_getpc_b64 s[[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]] 238; GCN-DAG: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i64@rel32@lo+4 239; GCN-DAG: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i64@rel32@hi+12 240; GCN: s_swappc_b64 s[30:31], s[[[PC_LO]]:[[PC_HI]]] 241; GCN-NEXT: s_endpgm 242define amdgpu_kernel void @test_call_external_void_func_i64_imm() #0 { 243 call void @external_void_func_i64(i64 123) 244 ret void 245} 246 247; GCN-LABEL: {{^}}test_call_external_void_func_v2i64: 248; GCN: buffer_load_dwordx4 v[0:3] 249; GCN-NOT: s_waitcnt 250; GCN: s_swappc_b64 251define amdgpu_kernel void @test_call_external_void_func_v2i64() #0 { 252 %val = load <2 x i64>, <2 x i64> addrspace(1)* null 253 call void @external_void_func_v2i64(<2 x i64> %val) 254 ret void 255} 256 257; GCN-LABEL: {{^}}test_call_external_void_func_v2i64_imm: 258; GCN-DAG: v_mov_b32_e32 v0, 1 259; GCN-DAG: v_mov_b32_e32 v1, 2 260; GCN-DAG: v_mov_b32_e32 v2, 3 261; GCN-DAG: v_mov_b32_e32 v3, 4 262; GCN: s_swappc_b64 263define amdgpu_kernel void @test_call_external_void_func_v2i64_imm() #0 { 264 call void @external_void_func_v2i64(<2 x i64> <i64 8589934593, i64 17179869187>) 265 ret void 266} 267 268; GCN-LABEL: {{^}}test_call_external_void_func_v3i64: 269; GCN: buffer_load_dwordx4 v[0:3] 270; GCN: v_mov_b32_e32 v4, 1 271; GCN: v_mov_b32_e32 v5, 2 272; GCN-NOT: s_waitcnt 273; GCN: s_swappc_b64 274define amdgpu_kernel void @test_call_external_void_func_v3i64() #0 { 275 %load = load <2 x i64>, <2 x i64> addrspace(1)* null 276 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 undef>, <3 x i32> <i32 0, i32 1, i32 2> 277 278 call void @external_void_func_v3i64(<3 x i64> %val) 279 ret void 280} 281 282; GCN-LABEL: {{^}}test_call_external_void_func_v4i64: 283; GCN: buffer_load_dwordx4 v[0:3] 284; GCN-DAG: v_mov_b32_e32 v4, 1 285; GCN-DAG: v_mov_b32_e32 v5, 2 286; GCN-DAG: v_mov_b32_e32 v6, 3 287; GCN-DAG: v_mov_b32_e32 v7, 4 288 289; GCN-NOT: s_waitcnt 290; GCN: s_swappc_b64 291define amdgpu_kernel void @test_call_external_void_func_v4i64() #0 { 292 %load = load <2 x i64>, <2 x i64> addrspace(1)* null 293 %val = shufflevector <2 x i64> %load, <2 x i64> <i64 8589934593, i64 17179869187>, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 294 call void @external_void_func_v4i64(<4 x i64> %val) 295 ret void 296} 297 298; GCN-LABEL: {{^}}test_call_external_void_func_f16_imm: 299; VI: v_mov_b32_e32 v0, 0x4400 300; CI: v_mov_b32_e32 v0, 4.0 301; GCN-NOT: v0 302; GCN: s_swappc_b64 303define amdgpu_kernel void @test_call_external_void_func_f16_imm() #0 { 304 call void @external_void_func_f16(half 4.0) 305 ret void 306} 307 308; GCN-LABEL: {{^}}test_call_external_void_func_f32_imm: 309; GCN: v_mov_b32_e32 v0, 4.0 310; GCN-NOT: v0 311; GCN: s_swappc_b64 312define amdgpu_kernel void @test_call_external_void_func_f32_imm() #0 { 313 call void @external_void_func_f32(float 4.0) 314 ret void 315} 316 317; GCN-LABEL: {{^}}test_call_external_void_func_v2f32_imm: 318; GCN-DAG: v_mov_b32_e32 v0, 1.0 319; GCN-DAG: v_mov_b32_e32 v1, 2.0 320; GCN: s_swappc_b64 321define amdgpu_kernel void @test_call_external_void_func_v2f32_imm() #0 { 322 call void @external_void_func_v2f32(<2 x float> <float 1.0, float 2.0>) 323 ret void 324} 325 326; GCN-LABEL: {{^}}test_call_external_void_func_v3f32_imm: 327; GCN-DAG: v_mov_b32_e32 v0, 1.0 328; GCN-DAG: v_mov_b32_e32 v1, 2.0 329; GCN-DAG: v_mov_b32_e32 v2, 4.0 330; GCN-NOT: v3, 331; GCN: s_swappc_b64 332define amdgpu_kernel void @test_call_external_void_func_v3f32_imm() #0 { 333 call void @external_void_func_v3f32(<3 x float> <float 1.0, float 2.0, float 4.0>) 334 ret void 335} 336 337; GCN-LABEL: {{^}}test_call_external_void_func_v5f32_imm: 338; GCN-DAG: v_mov_b32_e32 v0, 1.0 339; GCN-DAG: v_mov_b32_e32 v1, 2.0 340; GCN-DAG: v_mov_b32_e32 v2, 4.0 341; GCN-DAG: v_mov_b32_e32 v3, -1.0 342; GCN-DAG: v_mov_b32_e32 v4, 0.5 343; GCN-NOT: v5, 344; GCN: s_swappc_b64 345define amdgpu_kernel void @test_call_external_void_func_v5f32_imm() #0 { 346 call void @external_void_func_v5f32(<5 x float> <float 1.0, float 2.0, float 4.0, float -1.0, float 0.5>) 347 ret void 348} 349 350; GCN-LABEL: {{^}}test_call_external_void_func_f64_imm: 351; GCN: v_mov_b32_e32 v0, 0{{$}} 352; GCN: v_mov_b32_e32 v1, 0x40100000 353; GCN: s_swappc_b64 354define amdgpu_kernel void @test_call_external_void_func_f64_imm() #0 { 355 call void @external_void_func_f64(double 4.0) 356 ret void 357} 358 359; GCN-LABEL: {{^}}test_call_external_void_func_v2f64_imm: 360; GCN: v_mov_b32_e32 v0, 0{{$}} 361; GCN: v_mov_b32_e32 v1, 2.0 362; GCN: v_mov_b32_e32 v2, 0{{$}} 363; GCN: v_mov_b32_e32 v3, 0x40100000 364; GCN: s_swappc_b64 365define amdgpu_kernel void @test_call_external_void_func_v2f64_imm() #0 { 366 call void @external_void_func_v2f64(<2 x double> <double 2.0, double 4.0>) 367 ret void 368} 369 370; GCN-LABEL: {{^}}test_call_external_void_func_v3f64_imm: 371; GCN-DAG: v_mov_b32_e32 v0, 0{{$}} 372; GCN-DAG: v_mov_b32_e32 v1, 2.0 373; GCN-DAG: v_mov_b32_e32 v2, 0{{$}} 374; GCN-DAG: v_mov_b32_e32 v3, 0x40100000 375; GCN-DAG: v_mov_b32_e32 v4, 0{{$}} 376; GCN-DAG: v_mov_b32_e32 v5, 0x40200000 377; GCN-DAG: s_swappc_b64 378define amdgpu_kernel void @test_call_external_void_func_v3f64_imm() #0 { 379 call void @external_void_func_v3f64(<3 x double> <double 2.0, double 4.0, double 8.0>) 380 ret void 381} 382 383; GCN-LABEL: {{^}}test_call_external_void_func_v2i16: 384; GFX9: buffer_load_dword v0 385; GFX9-NOT: v0 386; GFX9: s_swappc_b64 387define amdgpu_kernel void @test_call_external_void_func_v2i16() #0 { 388 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 389 call void @external_void_func_v2i16(<2 x i16> %val) 390 ret void 391} 392 393; GCN-LABEL: {{^}}test_call_external_void_func_v3i16: 394; GFX9: buffer_load_dwordx2 v[0:1] 395; GFX9-NOT: v0 396; GFX9-NOT: v1 397; GFX9: s_swappc_b64 398define amdgpu_kernel void @test_call_external_void_func_v3i16() #0 { 399 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 400 call void @external_void_func_v3i16(<3 x i16> %val) 401 ret void 402} 403 404; GCN-LABEL: {{^}}test_call_external_void_func_v3f16: 405; GFX9: buffer_load_dwordx2 v[0:1] 406; GFX9-NOT: v0 407; GFX9-NOT: v1 408; GFX9: s_swappc_b64 409define amdgpu_kernel void @test_call_external_void_func_v3f16() #0 { 410 %val = load <3 x half>, <3 x half> addrspace(1)* undef 411 call void @external_void_func_v3f16(<3 x half> %val) 412 ret void 413} 414 415; GCN-LABEL: {{^}}test_call_external_void_func_v3i16_imm: 416; GFX9: v_mov_b32_e32 v0, 0x20001 417; GFX9: v_mov_b32_e32 v1, 3 418; GFX9: s_swappc_b64 419define amdgpu_kernel void @test_call_external_void_func_v3i16_imm() #0 { 420 call void @external_void_func_v3i16(<3 x i16> <i16 1, i16 2, i16 3>) 421 ret void 422} 423 424; GCN-LABEL: {{^}}test_call_external_void_func_v3f16_imm: 425; GFX9: v_mov_b32_e32 v0, 0x40003c00 426; GFX9: v_mov_b32_e32 v1, 0x4400 427; GFX9: s_swappc_b64 428define amdgpu_kernel void @test_call_external_void_func_v3f16_imm() #0 { 429 call void @external_void_func_v3f16(<3 x half> <half 1.0, half 2.0, half 4.0>) 430 ret void 431} 432 433; GCN-LABEL: {{^}}test_call_external_void_func_v4i16: 434; GFX9: buffer_load_dwordx2 v[0:1] 435; GFX9-NOT: v0 436; GFX9-NOT: v1 437; GFX9: s_swappc_b64 438define amdgpu_kernel void @test_call_external_void_func_v4i16() #0 { 439 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 440 call void @external_void_func_v4i16(<4 x i16> %val) 441 ret void 442} 443 444; GCN-LABEL: {{^}}test_call_external_void_func_v4i16_imm: 445; GFX9-DAG: v_mov_b32_e32 v0, 0x20001 446; GFX9-DAG: v_mov_b32_e32 v1, 0x40003 447; GFX9: s_swappc_b64 448define amdgpu_kernel void @test_call_external_void_func_v4i16_imm() #0 { 449 call void @external_void_func_v4i16(<4 x i16> <i16 1, i16 2, i16 3, i16 4>) 450 ret void 451} 452 453; GCN-LABEL: {{^}}test_call_external_void_func_v2f16: 454; GFX9: buffer_load_dword v0 455; GFX9-NOT: v0 456; GFX9: s_swappc_b64 457define amdgpu_kernel void @test_call_external_void_func_v2f16() #0 { 458 %val = load <2 x half>, <2 x half> addrspace(1)* undef 459 call void @external_void_func_v2f16(<2 x half> %val) 460 ret void 461} 462 463; GCN-LABEL: {{^}}test_call_external_void_func_v2i32: 464; GCN: buffer_load_dwordx2 v[0:1] 465; GCN-NOT: s_waitcnt 466; GCN: s_swappc_b64 467define amdgpu_kernel void @test_call_external_void_func_v2i32() #0 { 468 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 469 call void @external_void_func_v2i32(<2 x i32> %val) 470 ret void 471} 472 473; GCN-LABEL: {{^}}test_call_external_void_func_v2i32_imm: 474; GCN-DAG: v_mov_b32_e32 v0, 1 475; GCN-DAG: v_mov_b32_e32 v1, 2 476; GCN: s_swappc_b64 477define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 { 478 call void @external_void_func_v2i32(<2 x i32> <i32 1, i32 2>) 479 ret void 480} 481 482; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_imm: {{.*}} 483 484; GCN-NOT: v3{{$}} 485; GCN-DAG: v_mov_b32_e32 v0, 3 486; GCN-DAG: v_mov_b32_e32 v1, 4 487; GCN-DAG: v_mov_b32_e32 v2, 5 488 489; GCN: s_swappc_b64 490define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 { 491 call void @external_void_func_v3i32(<3 x i32> <i32 3, i32 4, i32 5>) 492 ret void 493} 494 495; GCN-LABEL: {{^}}test_call_external_void_func_v3i32_i32: 496; GCN-DAG: v_mov_b32_e32 v0, 3 497; GCN-DAG: v_mov_b32_e32 v1, 4 498; GCN-DAG: v_mov_b32_e32 v2, 5 499; GCN-DAG: v_mov_b32_e32 v3, 6 500define amdgpu_kernel void @test_call_external_void_func_v3i32_i32(i32) #0 { 501 call void @external_void_func_v3i32_i32(<3 x i32> <i32 3, i32 4, i32 5>, i32 6) 502 ret void 503} 504 505; GCN-LABEL: {{^}}test_call_external_void_func_v4i32: 506; GCN: buffer_load_dwordx4 v[0:3] 507; GCN-NOT: s_waitcnt 508; GCN: s_swappc_b64 509define amdgpu_kernel void @test_call_external_void_func_v4i32() #0 { 510 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 511 call void @external_void_func_v4i32(<4 x i32> %val) 512 ret void 513} 514 515; GCN-LABEL: {{^}}test_call_external_void_func_v4i32_imm: 516; GCN-DAG: v_mov_b32_e32 v0, 1 517; GCN-DAG: v_mov_b32_e32 v1, 2 518; GCN-DAG: v_mov_b32_e32 v2, 3 519; GCN-DAG: v_mov_b32_e32 v3, 4 520; GCN: s_swappc_b64 521define amdgpu_kernel void @test_call_external_void_func_v4i32_imm() #0 { 522 call void @external_void_func_v4i32(<4 x i32> <i32 1, i32 2, i32 3, i32 4>) 523 ret void 524} 525 526; GCN-LABEL: {{^}}test_call_external_void_func_v5i32_imm: 527; GCN-DAG: v_mov_b32_e32 v0, 1 528; GCN-DAG: v_mov_b32_e32 v1, 2 529; GCN-DAG: v_mov_b32_e32 v2, 3 530; GCN-DAG: v_mov_b32_e32 v3, 4 531; GCN-DAG: v_mov_b32_e32 v4, 5 532; GCN-NOT: v5, 533; GCN: s_swappc_b64 534define amdgpu_kernel void @test_call_external_void_func_v5i32_imm() #0 { 535 call void @external_void_func_v5i32(<5 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5>) 536 ret void 537} 538 539; GCN-LABEL: {{^}}test_call_external_void_func_v8i32: 540; GCN-DAG: buffer_load_dwordx4 v[0:3], off 541; GCN-DAG: buffer_load_dwordx4 v[4:7], off 542; GCN-NOT: s_waitcnt 543; GCN: s_swappc_b64 544define amdgpu_kernel void @test_call_external_void_func_v8i32() #0 { 545 %ptr = load <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 546 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 547 call void @external_void_func_v8i32(<8 x i32> %val) 548 ret void 549} 550 551; GCN-LABEL: {{^}}test_call_external_void_func_v8i32_imm: 552; GCN-DAG: v_mov_b32_e32 v0, 1 553; GCN-DAG: v_mov_b32_e32 v1, 2 554; GCN-DAG: v_mov_b32_e32 v2, 3 555; GCN-DAG: v_mov_b32_e32 v3, 4 556; GCN-DAG: v_mov_b32_e32 v4, 5 557; GCN-DAG: v_mov_b32_e32 v5, 6 558; GCN-DAG: v_mov_b32_e32 v6, 7 559; GCN-DAG: v_mov_b32_e32 v7, 8 560; GCN: s_swappc_b64 561define amdgpu_kernel void @test_call_external_void_func_v8i32_imm() #0 { 562 call void @external_void_func_v8i32(<8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>) 563 ret void 564} 565 566; GCN-LABEL: {{^}}test_call_external_void_func_v16i32: 567; GCN-DAG: buffer_load_dwordx4 v[0:3], off 568; GCN-DAG: buffer_load_dwordx4 v[4:7], off 569; GCN-DAG: buffer_load_dwordx4 v[8:11], off 570; GCN-DAG: buffer_load_dwordx4 v[12:15], off 571; GCN-NOT: s_waitcnt 572; GCN: s_swappc_b64 573define amdgpu_kernel void @test_call_external_void_func_v16i32() #0 { 574 %ptr = load <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 575 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 576 call void @external_void_func_v16i32(<16 x i32> %val) 577 ret void 578} 579 580; GCN-LABEL: {{^}}test_call_external_void_func_v32i32: 581; GCN-DAG: buffer_load_dwordx4 v[0:3], off 582; GCN-DAG: buffer_load_dwordx4 v[4:7], off 583; GCN-DAG: buffer_load_dwordx4 v[8:11], off 584; GCN-DAG: buffer_load_dwordx4 v[12:15], off 585; GCN-DAG: buffer_load_dwordx4 v[16:19], off 586; GCN-DAG: buffer_load_dwordx4 v[20:23], off 587; GCN-DAG: buffer_load_dwordx4 v[24:27], off 588; GCN-DAG: buffer_load_dwordx4 v[28:31], off 589; GCN: buffer_store_dword v31, off, s{{\[[0-9]+:[0-9]+\]}}, s32 590; GCN: s_swappc_b64 591define amdgpu_kernel void @test_call_external_void_func_v32i32() #0 { 592 %ptr = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 593 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 594 call void @external_void_func_v32i32(<32 x i32> %val) 595 ret void 596} 597 598; GCN-LABEL: {{^}}test_call_external_void_func_v32i32_i32: 599; HSA-NOT: s_add_u32 s32 600 601; MESA-NOT: s_add_u32 s32 602 603; GCN-DAG: buffer_load_dword [[VAL1:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}} 604; GCN-DAG: buffer_load_dwordx4 v[0:3], off 605; GCN-DAG: buffer_load_dwordx4 v[4:7], off 606; GCN-DAG: buffer_load_dwordx4 v[8:11], off 607; GCN-DAG: buffer_load_dwordx4 v[12:15], off 608; GCN-DAG: buffer_load_dwordx4 v[16:19], off 609; GCN-DAG: buffer_load_dwordx4 v[20:23], off 610; GCN-DAG: buffer_load_dwordx4 v[24:27], off 611; GCN-DAG: buffer_load_dwordx4 v[28:31], off 612 613; GCN: s_waitcnt 614; GCN-DAG: buffer_store_dword [[VAL1]], off, s[{{[0-9]+}}:{{[0-9]+}}], s32 offset:4{{$}} 615; GCN-DAG: buffer_store_dword v31, off, s[{{[0-9]+}}:{{[0-9]+}}], s32{{$}} 616; GCN: s_swappc_b64 617; GCN-NEXT: s_endpgm 618define amdgpu_kernel void @test_call_external_void_func_v32i32_i32(i32) #0 { 619 %ptr0 = load <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 620 %val0 = load <32 x i32>, <32 x i32> addrspace(1)* %ptr0 621 %val1 = load i32, i32 addrspace(1)* undef 622 call void @external_void_func_v32i32_i32(<32 x i32> %val0, i32 %val1) 623 ret void 624} 625 626; GCN-LABEL: {{^}}test_call_external_i32_func_i32_imm: 627; GCN: v_mov_b32_e32 v0, 42 628; GCN: s_swappc_b64 s[30:31], 629; GCN-NOT: s_waitcnt 630; GCN: buffer_store_dword v0, off, s[36:39], 0 631define amdgpu_kernel void @test_call_external_i32_func_i32_imm(i32 addrspace(1)* %out) #0 { 632 %val = call i32 @external_i32_func_i32(i32 42) 633 store volatile i32 %val, i32 addrspace(1)* %out 634 ret void 635} 636 637; GCN-LABEL: {{^}}test_call_external_void_func_struct_i8_i32: 638; GCN: buffer_load_ubyte v0, off 639; GCN: buffer_load_dword v1, off 640; GCN: s_swappc_b64 641define amdgpu_kernel void @test_call_external_void_func_struct_i8_i32() #0 { 642 %ptr0 = load { i8, i32 } addrspace(1)*, { i8, i32 } addrspace(1)* addrspace(4)* undef 643 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* %ptr0 644 call void @external_void_func_struct_i8_i32({ i8, i32 } %val) 645 ret void 646} 647 648; GCN-LABEL: {{^}}test_call_external_void_func_byval_struct_i8_i32: 649; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3 650; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8 651; MESA-DAG: buffer_store_byte [[VAL0]], off, s[36:39], 0 offset:8 652; MESA-DAG: buffer_store_dword [[VAL1]], off, s[36:39], 0 offset:12 653 654; HSA-DAG: buffer_store_byte [[VAL0]], off, s[0:3], 0 offset:8 655; HSA-DAG: buffer_store_dword [[VAL1]], off, s[0:3], 0 offset:12 656 657; HSA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[0:3], 0 offset:12 658; HSA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[0:3], 0 offset:8 659 660; MESA: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s[36:39], 0 offset:12 661; MESA: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s[36:39], 0 offset:8 662 663; GCN-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x400{{$}} 664 665; HSA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[0:3], [[SP]]{{$}} 666; HSA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[0:3], [[SP]] offset:4 667 668; MESA-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s[36:39], [[SP]]{{$}} 669; MESA-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s[36:39], [[SP]] offset:4 670 671; GCN-NEXT: s_swappc_b64 672; GCN-NOT: [[SP]] 673define amdgpu_kernel void @test_call_external_void_func_byval_struct_i8_i32() #0 { 674 %val = alloca { i8, i32 }, align 4, addrspace(5) 675 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 0 676 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %val, i32 0, i32 1 677 store i8 3, i8 addrspace(5)* %gep0 678 store i32 8, i32 addrspace(5)* %gep1 679 call void @external_void_func_byval_struct_i8_i32({ i8, i32 } addrspace(5)* byval({ i8, i32 }) %val) 680 ret void 681} 682 683; GCN-LABEL: {{^}}test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32: 684; GCN-DAG: s_movk_i32 [[SP:s[0-9]+]], 0x800{{$}} 685 686; GCN-DAG: v_mov_b32_e32 [[VAL0:v[0-9]+]], 3 687; GCN-DAG: v_mov_b32_e32 [[VAL1:v[0-9]+]], 8 688; GCN-DAG: buffer_store_byte [[VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 689; GCN-DAG: buffer_store_dword [[VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 690 691; GCN-DAG: buffer_load_dword [[RELOAD_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:8 692; GCN-DAG: buffer_load_dword [[RELOAD_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:12 693 694; GCN-NOT: s_add_u32 [[SP]] 695; GCN-DAG: buffer_store_dword [[RELOAD_VAL0]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]]{{$}} 696; GCN-DAG: buffer_store_dword [[RELOAD_VAL1]], off, s{{\[[0-9]+:[0-9]+\]}}, [[SP]] offset:4 697; GCN: s_swappc_b64 698; GCN-DAG: buffer_load_ubyte [[LOAD_OUT_VAL0:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16 699; GCN-DAG: buffer_load_dword [[LOAD_OUT_VAL1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:20 700; GCN-NOT: s_sub_u32 [[SP]] 701 702; GCN: buffer_store_byte [[LOAD_OUT_VAL0]], off 703; GCN: buffer_store_dword [[LOAD_OUT_VAL1]], off 704define amdgpu_kernel void @test_call_external_void_func_sret_struct_i8_i32_byval_struct_i8_i32(i32) #0 { 705 %in.val = alloca { i8, i32 }, align 4, addrspace(5) 706 %out.val = alloca { i8, i32 }, align 4, addrspace(5) 707 %in.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 0 708 %in.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %in.val, i32 0, i32 1 709 store i8 3, i8 addrspace(5)* %in.gep0 710 store i32 8, i32 addrspace(5)* %in.gep1 711 call void @external_void_func_sret_struct_i8_i32_byval_struct_i8_i32({ i8, i32 } addrspace(5)* %out.val, { i8, i32 } addrspace(5)* byval({ i8, i32 }) %in.val) 712 %out.gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 0 713 %out.gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %out.val, i32 0, i32 1 714 %out.val0 = load i8, i8 addrspace(5)* %out.gep0 715 %out.val1 = load i32, i32 addrspace(5)* %out.gep1 716 717 store volatile i8 %out.val0, i8 addrspace(1)* undef 718 store volatile i32 %out.val1, i32 addrspace(1)* undef 719 ret void 720} 721 722; GCN-LABEL: {{^}}test_call_external_void_func_v16i8: 723define amdgpu_kernel void @test_call_external_void_func_v16i8() #0 { 724 %ptr = load <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 725 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 726 call void @external_void_func_v16i8(<16 x i8> %val) 727 ret void 728} 729 730; GCN-LABEL: {{^}}stack_passed_arg_alignment_v32i32_f64: 731; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32{{$}} 732; GCN: buffer_store_dword v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4 733; GCN: s_swappc_b64 734define amdgpu_kernel void @stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { 735entry: 736 call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) 737 ret void 738} 739 740; GCN-LABEL: {{^}}tail_call_byval_align16: 741; GCN-NOT: s32 742; GCN: buffer_load_dword [[VREG2:v[0-9]+]], off, s[0:3], s32 offset:28 743; GCN: buffer_load_dword [[VREG1:v[0-9]+]], off, s[0:3], s32{{$}} 744 745; GCN: s_getpc_b64 746 747; GCN: buffer_store_dword [[VREG2]], off, s[0:3], s32 offset:20 748; GCN: buffer_load_dword [[VREG3:v[0-9]+]], off, s[0:3], s32 offset:24{{$}} 749; GCN: buffer_store_dword [[VREG1]], off, s[0:3], s32{{$}} 750; GCN: buffer_store_dword [[VREG3]], off, s[0:3], s32 offset:16{{$}} 751; GCN-NOT: s32 752; GCN: s_setpc_b64 753define void @tail_call_byval_align16(<32 x i32> %val, double %tmp) #0 { 754entry: 755 %alloca = alloca double, align 8, addrspace(5) 756 tail call void @byval_align16_f64_arg(<32 x i32> %val, double addrspace(5)* byval(double) align 16 %alloca) 757 ret void 758} 759 760; GCN-LABEL: {{^}}tail_call_stack_passed_arg_alignment_v32i32_f64: 761; GCN-NOT: s32 762; GCN-DAG: buffer_load_dword v33, off, s[0:3], s32 offset:8 763; GCN-DAG: buffer_load_dword v32, off, s[0:3], s32 offset:4 764; GCN-DAG: buffer_load_dword v31, off, s[0:3], s32{{$}} 765; GCN: s_getpc_b64 766; GCN: buffer_store_dword v31, off, s[0:3], s32{{$}} 767; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:4 768; GCN: buffer_store_dword v33, off, s[0:3], s32 offset:8 769; GCN-NOT: s32 770; GCN: s_setpc_b64 771define void @tail_call_stack_passed_arg_alignment_v32i32_f64(<32 x i32> %val, double %tmp) #0 { 772entry: 773 tail call void @stack_passed_f64_arg(<32 x i32> %val, double %tmp) 774 ret void 775} 776 777; GCN-LABEL: {{^}}stack_12xv3i32: 778; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 779; GCN: buffer_store_dword [[REG11]], off, s[0:3], s32{{$}} 780; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 781; GCN: buffer_store_dword [[REG12]], {{.*$}} 782; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 783; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8 784; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 785; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12 786; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 787; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16 788; GCN: s_getpc 789define void @stack_12xv3i32() #0 { 790entry: 791 call void @external_void_func_12xv3i32( 792 <3 x i32><i32 0, i32 0, i32 0>, 793 <3 x i32><i32 1, i32 1, i32 1>, 794 <3 x i32><i32 2, i32 2, i32 2>, 795 <3 x i32><i32 3, i32 3, i32 3>, 796 <3 x i32><i32 4, i32 4, i32 4>, 797 <3 x i32><i32 5, i32 5, i32 5>, 798 <3 x i32><i32 6, i32 6, i32 6>, 799 <3 x i32><i32 7, i32 7, i32 7>, 800 <3 x i32><i32 8, i32 8, i32 8>, 801 <3 x i32><i32 9, i32 9, i32 9>, 802 <3 x i32><i32 10, i32 11, i32 12>, 803 <3 x i32><i32 13, i32 14, i32 15>) 804 ret void 805} 806 807; GCN-LABEL: {{^}}stack_12xv3f32: 808; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 809; GCN: buffer_store_dword [[REG11]], {{.*$}} 810; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 811; GCN: buffer_store_dword [[REG12]], {{.*}} offset:4 812; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 813; GCN: buffer_store_dword [[REG13]], {{.*}} offset:8 814; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 815; GCN: buffer_store_dword [[REG14]], {{.*}} offset:12 816; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 817; GCN: buffer_store_dword [[REG15]], {{.*}} offset:16 818; GCN: s_getpc 819define void @stack_12xv3f32() #0 { 820entry: 821 call void @external_void_func_12xv3f32( 822 <3 x float><float 0.0, float 0.0, float 0.0>, 823 <3 x float><float 1.0, float 1.0, float 1.0>, 824 <3 x float><float 2.0, float 2.0, float 2.0>, 825 <3 x float><float 3.0, float 3.0, float 3.0>, 826 <3 x float><float 4.0, float 4.0, float 4.0>, 827 <3 x float><float 5.0, float 5.0, float 5.0>, 828 <3 x float><float 6.0, float 6.0, float 6.0>, 829 <3 x float><float 7.0, float 7.0, float 7.0>, 830 <3 x float><float 8.0, float 8.0, float 8.0>, 831 <3 x float><float 9.0, float 9.0, float 9.0>, 832 <3 x float><float 10.0, float 11.0, float 12.0>, 833 <3 x float><float 13.0, float 14.0, float 15.0>) 834 ret void 835} 836 837; GCN-LABEL: {{^}}stack_8xv5i32: 838; GCN: v_mov_b32_e32 [[REG7:v[0-9]+]], 7 839; GCN: buffer_store_dword [[REG7]], {{.*$}} 840; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 8 841; GCN: buffer_store_dword [[REG8]], {{.*}} offset:4 842; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 9 843; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8 844; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 10 845; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12 846; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 11 847; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16 848; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 12 849; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20 850; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 13 851; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24 852; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 14 853; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28 854; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 15 855; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32 856; GCN: s_getpc 857define void @stack_8xv5i32() #0 { 858entry: 859 call void @external_void_func_8xv5i32( 860 <5 x i32><i32 0, i32 0, i32 0, i32 0, i32 0>, 861 <5 x i32><i32 1, i32 1, i32 1, i32 1, i32 1>, 862 <5 x i32><i32 2, i32 2, i32 2, i32 2, i32 2>, 863 <5 x i32><i32 3, i32 3, i32 3, i32 3, i32 3>, 864 <5 x i32><i32 4, i32 4, i32 4, i32 4, i32 4>, 865 <5 x i32><i32 5, i32 5, i32 5, i32 5, i32 5>, 866 <5 x i32><i32 6, i32 7, i32 8, i32 9, i32 10>, 867 <5 x i32><i32 11, i32 12, i32 13, i32 14, i32 15>) 868 ret void 869} 870 871; GCN-LABEL: {{^}}stack_8xv5f32: 872; GCN: v_mov_b32_e32 [[REG7:v[0-9]+]], 0x40e00000 873; GCN: buffer_store_dword [[REG7]], {{.*$}} 874; GCN: v_mov_b32_e32 [[REG8:v[0-9]+]], 0x41000000 875; GCN: buffer_store_dword [[REG8]], {{.*}} offset:4 876; GCN: v_mov_b32_e32 [[REG9:v[0-9]+]], 0x41100000 877; GCN: buffer_store_dword [[REG9]], {{.*}} offset:8 878; GCN: v_mov_b32_e32 [[REG10:v[0-9]+]], 0x41200000 879; GCN: buffer_store_dword [[REG10]], {{.*}} offset:12 880; GCN: v_mov_b32_e32 [[REG11:v[0-9]+]], 0x41300000 881; GCN: buffer_store_dword [[REG11]], {{.*}} offset:16 882; GCN: v_mov_b32_e32 [[REG12:v[0-9]+]], 0x41400000 883; GCN: buffer_store_dword [[REG12]], {{.*}} offset:20 884; GCN: v_mov_b32_e32 [[REG13:v[0-9]+]], 0x41500000 885; GCN: buffer_store_dword [[REG13]], {{.*}} offset:24 886; GCN: v_mov_b32_e32 [[REG14:v[0-9]+]], 0x41600000 887; GCN: buffer_store_dword [[REG14]], {{.*}} offset:28 888; GCN: v_mov_b32_e32 [[REG15:v[0-9]+]], 0x41700000 889; GCN: buffer_store_dword [[REG15]], {{.*}} offset:32 890; GCN: s_getpc 891define void @stack_8xv5f32() #0 { 892entry: 893 call void @external_void_func_8xv5f32( 894 <5 x float><float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, 895 <5 x float><float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, 896 <5 x float><float 2.0, float 2.0, float 2.0, float 2.0, float 2.0>, 897 <5 x float><float 3.0, float 3.0, float 3.0, float 3.0, float 3.0>, 898 <5 x float><float 4.0, float 4.0, float 4.0, float 4.0, float 4.0>, 899 <5 x float><float 5.0, float 5.0, float 5.0, float 5.0, float 5.0>, 900 <5 x float><float 6.0, float 7.0, float 8.0, float 9.0, float 10.0>, 901 <5 x float><float 11.0, float 12.0, float 13.0, float 14.0, float 15.0>) 902 ret void 903} 904 905declare hidden void @byval_align16_f64_arg(<32 x i32>, double addrspace(5)* byval(double) align 16) #0 906declare hidden void @stack_passed_f64_arg(<32 x i32>, double) #0 907declare hidden void @external_void_func_12xv3i32(<3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, 908 <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>, <3 x i32>) #0 909declare hidden void @external_void_func_8xv5i32(<5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>, 910 <5 x i32>, <5 x i32>, <5 x i32>, <5 x i32>) #0 911declare hidden void @external_void_func_12xv3f32(<3 x float>, <3 x float>, <3 x float>, <3 x float>, 912 <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>, <3 x float>) #0 913declare hidden void @external_void_func_8xv5f32(<5 x float>, <5 x float>, <5 x float>, <5 x float>, 914 <5 x float>, <5 x float>, <5 x float>, <5 x float>) #0 915attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 916attributes #1 = { nounwind readnone } 917attributes #2 = { nounwind noinline } 918 919 920 921