1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s 3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s 4 5; GCN-LABEL: {{^}}i1_func_void: 6; GCN: buffer_load_ubyte v0, off 7; GCN-NEXT: s_waitcnt 8; GCN-NEXT: s_setpc_b64 9define i1 @i1_func_void() #0 { 10 %val = load i1, i1 addrspace(1)* undef 11 ret i1 %val 12} 13 14; FIXME: Missing and? 15; GCN-LABEL: {{^}}i1_zeroext_func_void: 16; GCN: buffer_load_ubyte v0, off 17; GCN-NEXT: s_waitcnt vmcnt(0) 18; GCN-NEXT: s_setpc_b64 19define zeroext i1 @i1_zeroext_func_void() #0 { 20 %val = load i1, i1 addrspace(1)* undef 21 ret i1 %val 22} 23 24; GCN-LABEL: {{^}}i1_signext_func_void: 25; GCN: buffer_load_ubyte v0, off 26; GCN-NEXT: s_waitcnt vmcnt(0) 27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}} 28; GCN-NEXT: s_setpc_b64 29define signext i1 @i1_signext_func_void() #0 { 30 %val = load i1, i1 addrspace(1)* undef 31 ret i1 %val 32} 33 34; GCN-LABEL: {{^}}i8_func_void: 35; GCN: buffer_load_ubyte v0, off 36; GCN-NEXT: s_waitcnt vmcnt(0) 37; GCN-NEXT: s_setpc_b64 38define i8 @i8_func_void() #0 { 39 %val = load i8, i8 addrspace(1)* undef 40 ret i8 %val 41} 42 43; GCN-LABEL: {{^}}i8_zeroext_func_void: 44; GCN: buffer_load_ubyte v0, off 45; GCN-NEXT: s_waitcnt vmcnt(0) 46; GCN-NEXT: s_setpc_b64 47define zeroext i8 @i8_zeroext_func_void() #0 { 48 %val = load i8, i8 addrspace(1)* undef 49 ret i8 %val 50} 51 52; GCN-LABEL: {{^}}i8_signext_func_void: 53; GCN: buffer_load_sbyte v0, off 54; GCN-NEXT: s_waitcnt vmcnt(0) 55; GCN-NEXT: s_setpc_b64 56define signext i8 @i8_signext_func_void() #0 { 57 %val = load i8, i8 addrspace(1)* undef 58 ret i8 %val 59} 60 61; GCN-LABEL: {{^}}i16_func_void: 62; GCN: buffer_load_ushort v0, off 63; GCN-NEXT: s_waitcnt vmcnt(0) 64; GCN-NEXT: s_setpc_b64 65define i16 @i16_func_void() #0 { 66 %val = load i16, i16 addrspace(1)* undef 67 ret i16 %val 68} 69 70; GCN-LABEL: {{^}}i16_zeroext_func_void: 71; GCN: buffer_load_ushort v0, off 72; GCN-NEXT: s_waitcnt vmcnt(0) 73; GCN-NEXT: s_setpc_b64 74define zeroext i16 @i16_zeroext_func_void() #0 { 75 %val = load i16, i16 addrspace(1)* undef 76 ret i16 %val 77} 78 79; GCN-LABEL: {{^}}i16_signext_func_void: 80; GCN: buffer_load_sshort v0, off 81; GCN-NEXT: s_waitcnt vmcnt(0) 82; GCN-NEXT: s_setpc_b64 83define signext i16 @i16_signext_func_void() #0 { 84 %val = load i16, i16 addrspace(1)* undef 85 ret i16 %val 86} 87 88; GCN-LABEL: {{^}}i32_func_void: 89; GCN: buffer_load_dword v0, off 90; GCN-NEXT: s_waitcnt vmcnt(0) 91; GCN-NEXT: s_setpc_b64 92define i32 @i32_func_void() #0 { 93 %val = load i32, i32 addrspace(1)* undef 94 ret i32 %val 95} 96 97; GCN-LABEL: {{^}}i48_func_void: 98; GCN: buffer_load_dword v0, off 99; GCN-NEXT: buffer_load_ushort v1, off 100; GCN-NEXT: s_waitcnt vmcnt(0) 101; GCN-NEXT: s_setpc_b64 102define i48 @i48_func_void() #0 { 103 %val = load i48, i48 addrspace(1)* undef, align 8 104 ret i48 %val 105} 106 107; GCN-LABEL: {{^}}i64_func_void: 108; GCN: buffer_load_dwordx2 v[0:1], off 109; GCN-NEXT: s_waitcnt vmcnt(0) 110; GCN-NEXT: s_setpc_b64 111define i64 @i64_func_void() #0 { 112 %val = load i64, i64 addrspace(1)* undef 113 ret i64 %val 114} 115 116; GCN-LABEL: {{^}}i65_func_void: 117; GCN-DAG: buffer_load_dwordx2 v[0:1], off 118; GCN-DAG: buffer_load_ubyte v2, off 119; GCN: s_waitcnt vmcnt(0) 120; GCN-NEXT: s_setpc_b64 121define i65 @i65_func_void() #0 { 122 %val = load i65, i65 addrspace(1)* undef 123 ret i65 %val 124} 125 126; GCN-LABEL: {{^}}f32_func_void: 127; GCN: buffer_load_dword v0, off, s[4:7], 0 128; GCN-NEXT: s_waitcnt vmcnt(0) 129; GCN-NEXT: s_setpc_b64 130define float @f32_func_void() #0 { 131 %val = load float, float addrspace(1)* undef 132 ret float %val 133} 134 135; GCN-LABEL: {{^}}f64_func_void: 136; GCN: buffer_load_dwordx2 v[0:1], off 137; GCN-NEXT: s_waitcnt vmcnt(0) 138; GCN-NEXT: s_setpc_b64 139define double @f64_func_void() #0 { 140 %val = load double, double addrspace(1)* undef 141 ret double %val 142} 143 144; GCN-LABEL: {{^}}v2f64_func_void: 145; GCN: buffer_load_dwordx4 v[0:3], off 146; GCN-NEXT: s_waitcnt vmcnt(0) 147; GCN-NEXT: s_setpc_b64 148define <2 x double> @v2f64_func_void() #0 { 149 %val = load <2 x double>, <2 x double> addrspace(1)* undef 150 ret <2 x double> %val 151} 152 153; GCN-LABEL: {{^}}v2i32_func_void: 154; GCN: buffer_load_dwordx2 v[0:1], off 155; GCN-NEXT: s_waitcnt vmcnt(0) 156; GCN-NEXT: s_setpc_b64 157define <2 x i32> @v2i32_func_void() #0 { 158 %val = load <2 x i32>, <2 x i32> addrspace(1)* undef 159 ret <2 x i32> %val 160} 161 162; GCN-LABEL: {{^}}v3i32_func_void: 163; GCN: buffer_load_dwordx3 v[0:2], off 164; GCN-NEXT: s_waitcnt vmcnt(0) 165; GCN-NEXT: s_setpc_b64 166define <3 x i32> @v3i32_func_void() #0 { 167 %val = load <3 x i32>, <3 x i32> addrspace(1)* undef 168 ret <3 x i32> %val 169} 170 171; GCN-LABEL: {{^}}v4i32_func_void: 172; GCN: buffer_load_dwordx4 v[0:3], off 173; GCN-NEXT: s_waitcnt vmcnt(0) 174; GCN-NEXT: s_setpc_b64 175define <4 x i32> @v4i32_func_void() #0 { 176 %val = load <4 x i32>, <4 x i32> addrspace(1)* undef 177 ret <4 x i32> %val 178} 179 180; GCN-LABEL: {{^}}v5i32_func_void: 181; GCN-DAG: buffer_load_dword v4, off 182; GCN-DAG: buffer_load_dwordx4 v[0:3], off 183; GCN: s_waitcnt vmcnt(0) 184; GCN-NEXT: s_setpc_b64 185define <5 x i32> @v5i32_func_void() #0 { 186 %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef 187 ret <5 x i32> %val 188} 189 190; GCN-LABEL: {{^}}v8i32_func_void: 191; GCN-DAG: buffer_load_dwordx4 v[0:3], off 192; GCN-DAG: buffer_load_dwordx4 v[4:7], off 193; GCN: s_waitcnt vmcnt(0) 194; GCN-NEXT: s_setpc_b64 195define <8 x i32> @v8i32_func_void() #0 { 196 %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef 197 %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr 198 ret <8 x i32> %val 199} 200 201; GCN-LABEL: {{^}}v16i32_func_void: 202; GCN-DAG: buffer_load_dwordx4 v[0:3], off 203; GCN-DAG: buffer_load_dwordx4 v[4:7], off 204; GCN-DAG: buffer_load_dwordx4 v[8:11], off 205; GCN-DAG: buffer_load_dwordx4 v[12:15], off 206; GCN: s_waitcnt vmcnt(0) 207; GCN-NEXT: s_setpc_b64 208define <16 x i32> @v16i32_func_void() #0 { 209 %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef 210 %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr 211 ret <16 x i32> %val 212} 213 214; GCN-LABEL: {{^}}v32i32_func_void: 215; GCN-DAG: buffer_load_dwordx4 v[0:3], off 216; GCN-DAG: buffer_load_dwordx4 v[4:7], off 217; GCN-DAG: buffer_load_dwordx4 v[8:11], off 218; GCN-DAG: buffer_load_dwordx4 v[12:15], off 219; GCN-DAG: buffer_load_dwordx4 v[16:19], off 220; GCN-DAG: buffer_load_dwordx4 v[20:23], off 221; GCN-DAG: buffer_load_dwordx4 v[24:27], off 222; GCN-DAG: buffer_load_dwordx4 v[28:31], off 223; GCN: s_waitcnt vmcnt(0) 224; GCN-NEXT: s_setpc_b64 225define <32 x i32> @v32i32_func_void() #0 { 226 %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef 227 %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr 228 ret <32 x i32> %val 229} 230 231; GCN-LABEL: {{^}}v2i64_func_void: 232; GCN: buffer_load_dwordx4 v[0:3], off 233; GCN-NEXT: s_waitcnt vmcnt(0) 234; GCN-NEXT: s_setpc_b64 235define <2 x i64> @v2i64_func_void() #0 { 236 %val = load <2 x i64>, <2 x i64> addrspace(1)* undef 237 ret <2 x i64> %val 238} 239 240; GCN-LABEL: {{^}}v3i64_func_void: 241; GCN-DAG: buffer_load_dwordx4 v[0:3], off 242; GCN-DAG: buffer_load_dwordx4 v[4:7], off 243; GCN: s_waitcnt vmcnt(0) 244; GCN-NEXT: s_setpc_b64 245define <3 x i64> @v3i64_func_void() #0 { 246 %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef 247 %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr 248 ret <3 x i64> %val 249} 250 251; GCN-LABEL: {{^}}v4i64_func_void: 252; GCN: buffer_load_dwordx4 v[0:3], off 253; GCN: buffer_load_dwordx4 v[4:7], off 254; GCN-NEXT: s_waitcnt vmcnt(0) 255; GCN-NEXT: s_setpc_b64 256define <4 x i64> @v4i64_func_void() #0 { 257 %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef 258 %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr 259 ret <4 x i64> %val 260} 261 262; GCN-LABEL: {{^}}v5i64_func_void: 263; GCN-DAG: buffer_load_dwordx4 v[0:3], off 264; GCN-DAG: buffer_load_dwordx4 v[4:7], off 265; GCN-DAG: buffer_load_dwordx4 v[8:11], off 266; GCN: s_waitcnt vmcnt(0) 267; GCN-NEXT: s_setpc_b64 268define <5 x i64> @v5i64_func_void() #0 { 269 %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef 270 %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr 271 ret <5 x i64> %val 272} 273 274; GCN-LABEL: {{^}}v8i64_func_void: 275; GCN-DAG: buffer_load_dwordx4 v[0:3], off 276; GCN-DAG: buffer_load_dwordx4 v[4:7], off 277; GCN-DAG: buffer_load_dwordx4 v[8:11], off 278; GCN-DAG: buffer_load_dwordx4 v[12:15], off 279; GCN: s_waitcnt vmcnt(0) 280; GCN-NEXT: s_setpc_b64 281define <8 x i64> @v8i64_func_void() #0 { 282 %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef 283 %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr 284 ret <8 x i64> %val 285} 286 287; GCN-LABEL: {{^}}v16i64_func_void: 288; GCN-DAG: buffer_load_dwordx4 v[0:3], off 289; GCN-DAG: buffer_load_dwordx4 v[4:7], off 290; GCN-DAG: buffer_load_dwordx4 v[8:11], off 291; GCN-DAG: buffer_load_dwordx4 v[12:15], off 292; GCN-DAG: buffer_load_dwordx4 v[16:19], off 293; GCN-DAG: buffer_load_dwordx4 v[20:23], off 294; GCN-DAG: buffer_load_dwordx4 v[24:27], off 295; GCN-DAG: buffer_load_dwordx4 v[28:31], off 296; GCN: s_waitcnt vmcnt(0) 297; GCN-NEXT: s_setpc_b64 298define <16 x i64> @v16i64_func_void() #0 { 299 %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef 300 %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr 301 ret <16 x i64> %val 302} 303 304; GCN-LABEL: {{^}}v2i16_func_void: 305; GFX9: buffer_load_dword v0, off 306; GFX9-NEXT: s_waitcnt vmcnt(0) 307; GFX9-NEXT: s_setpc_b64 308define <2 x i16> @v2i16_func_void() #0 { 309 %val = load <2 x i16>, <2 x i16> addrspace(1)* undef 310 ret <2 x i16> %val 311} 312 313; GCN-LABEL: {{^}}v3i16_func_void: 314; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off 315; GFX9-NEXT: s_waitcnt vmcnt(0) 316; GFX9-NEXT: s_setpc_b64 317define <3 x i16> @v3i16_func_void() #0 { 318 %val = load <3 x i16>, <3 x i16> addrspace(1)* undef 319 ret <3 x i16> %val 320} 321 322; GCN-LABEL: {{^}}v4i16_func_void: 323; GFX9: buffer_load_dwordx2 v[0:1], off 324; GFX9-NEXT: s_waitcnt vmcnt(0) 325; GFX9-NEXT: s_setpc_b64 326define <4 x i16> @v4i16_func_void() #0 { 327 %val = load <4 x i16>, <4 x i16> addrspace(1)* undef 328 ret <4 x i16> %val 329} 330 331; GCN-LABEL: {{^}}v4f16_func_void: 332; GFX9: buffer_load_dwordx2 v[0:1], off 333; GFX9-NEXT: s_waitcnt vmcnt(0) 334; GFX9-NEXT: s_setpc_b64 335define <4 x half> @v4f16_func_void() #0 { 336 %val = load <4 x half>, <4 x half> addrspace(1)* undef 337 ret <4 x half> %val 338} 339 340; FIXME: Mixing buffer and global 341; FIXME: Should not scalarize 342; GCN-LABEL: {{^}}v5i16_func_void: 343; GFX9: buffer_load_dwordx2 v[0:1] 344; GFX9-NEXT: global_load_short_d16 v2 345; GFX9-NEXT: s_waitcnt 346; GFX9-NEXT: s_setpc_b64 347define <5 x i16> @v5i16_func_void() #0 { 348 %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef 349 %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr 350 ret <5 x i16> %val 351} 352 353; GCN-LABEL: {{^}}v8i16_func_void: 354; GFX9-DAG: buffer_load_dwordx4 v[0:3], off 355; GFX9: s_waitcnt vmcnt(0) 356; GFX9-NEXT: s_setpc_b64 357define <8 x i16> @v8i16_func_void() #0 { 358 %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef 359 %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr 360 ret <8 x i16> %val 361} 362 363; GCN-LABEL: {{^}}v16i16_func_void: 364; GFX9: buffer_load_dwordx4 v[0:3], off 365; GFX9: buffer_load_dwordx4 v[4:7], off 366; GFX9: s_waitcnt vmcnt(0) 367; GFX9-NEXT: s_setpc_b64 368define <16 x i16> @v16i16_func_void() #0 { 369 %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef 370 %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr 371 ret <16 x i16> %val 372} 373 374; FIXME: Should pack 375; GCN-LABEL: {{^}}v16i8_func_void: 376; GCN-DAG: v12 377; GCN-DAG: v13 378; GCN-DAG: v14 379; GCN-DAG: v15 380define <16 x i8> @v16i8_func_void() #0 { 381 %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef 382 %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr 383 ret <16 x i8> %val 384} 385 386; FIXME: Should pack 387; GCN-LABEL: {{^}}v4i8_func_void: 388; GCN: buffer_load_dword v0 389; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0 390; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0 391; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0 392; GCN: s_setpc_b64 393define <4 x i8> @v4i8_func_void() #0 { 394 %ptr = load volatile <4 x i8> addrspace(1)*, <4 x i8> addrspace(1)* addrspace(4)* undef 395 %val = load <4 x i8>, <4 x i8> addrspace(1)* %ptr 396 ret <4 x i8> %val 397} 398 399; GCN-LABEL: {{^}}struct_i8_i32_func_void: 400; GCN-DAG: buffer_load_dword v1 401; GCN-DAG: buffer_load_ubyte v0 402; GCN: s_waitcnt vmcnt(0) 403; GCN-NEXT: s_setpc_b64 404define {i8, i32} @struct_i8_i32_func_void() #0 { 405 %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef 406 ret { i8, i32 } %val 407} 408 409; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32: 410; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]] 411; GCN: buffer_load_dword [[VAL1:v[0-9]+]] 412; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s33 offen{{$}} 413; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s33 offen offset:4{{$}} 414define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 { 415 %val0 = load volatile i8, i8 addrspace(1)* undef 416 %val1 = load volatile i32, i32 addrspace(1)* undef 417 %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0 418 %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1 419 store i8 %val0, i8 addrspace(5)* %gep0 420 store i32 %val1, i32 addrspace(5)* %gep1 421 ret void 422} 423 424; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call 425; lowering introduces an extra CopyToReg/CopyFromReg obscuring the 426; AssertZext inserted. Not using it introduces the spills. 427 428; GCN-LABEL: {{^}}v33i32_func_void: 429; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}} 430; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}} 431; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}} 432; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}} 433; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}} 434; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}} 435; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}} 436; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}} 437; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}} 438; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}} 439; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}} 440; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}} 441; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}} 442; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}} 443; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}} 444; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}} 445; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}} 446; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}} 447; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}} 448; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}} 449; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}} 450; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}} 451; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}} 452; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}} 453; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}} 454; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}} 455; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}} 456; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}} 457; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}} 458; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}} 459; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}} 460; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}} 461; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}} 462; GFX9: s_waitcnt vmcnt(0) 463; GFX9-NEXT: s_setpc_b64 464define <33 x i32> @v33i32_func_void() #0 { 465 %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef 466 %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr 467 ret <33 x i32> %val 468} 469 470; GCN-LABEL: {{^}}struct_v32i32_i32_func_void: 471; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}} 472; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}} 473; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}} 474; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}} 475; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}} 476; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}} 477; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}} 478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}} 479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}} 480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}} 481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}} 482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}} 483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}} 484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}} 485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}} 486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}} 487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}} 488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}} 489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}} 490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}} 491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}} 492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}} 493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}} 494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}} 495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}} 496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}} 497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}} 498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}} 499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}} 500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}} 501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}} 502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}} 503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}} 504; GFX9: s_waitcnt vmcnt(0) 505; GFX9-NEXT: s_setpc_b64 506define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { 507 %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef 508 %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr 509 ret { <32 x i32>, i32 }%val 510} 511 512; GCN-LABEL: {{^}}struct_i32_v32i32_func_void: 513; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}} 514; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}} 515; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:132{{$}} 516; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:136{{$}} 517; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:140{{$}} 518; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:144{{$}} 519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:148{{$}} 520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:152{{$}} 521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:156{{$}} 522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:160{{$}} 523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:164{{$}} 524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:168{{$}} 525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:172{{$}} 526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:176{{$}} 527; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:180{{$}} 528; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:184{{$}} 529; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:188{{$}} 530; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:192{{$}} 531; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:196{{$}} 532; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:200{{$}} 533; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:204{{$}} 534; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:208{{$}} 535; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:212{{$}} 536; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:216{{$}} 537; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:220{{$}} 538; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:224{{$}} 539; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:228{{$}} 540; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:232{{$}} 541; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:236{{$}} 542; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:240{{$}} 543; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:244{{$}} 544; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:248{{$}} 545; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:252{{$}} 546; GFX9: s_waitcnt vmcnt(0) 547; GFX9-NEXT: s_setpc_b64 548define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { 549 %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef 550 %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr 551 ret { i32, <32 x i32> }%val 552} 553 554; Make sure the last struct component is returned in v3, not v4. 555; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg: 556; GCN: ds_read_b32 v0, 557; GCN: ds_read_b32 v1, 558; GCN: ds_read_b32 v2, 559; GCN: ds_read_b32 v3, 560define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 { 561 %load0 = load volatile i32, i32 addrspace(3)* undef 562 %load1 = load volatile i32, i32 addrspace(3)* undef 563 %load2 = load volatile i32, i32 addrspace(3)* undef 564 %load3 = load volatile i32, i32 addrspace(3)* undef 565 566 %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0 567 %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1 568 %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2 569 %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0 570 %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1 571 ret { <3 x i32>, i32 } %insert.4 572} 573 574; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg: 575; GCN: ds_read_b32 v0, 576; GCN: ds_read_b32 v1, 577; GCN: ds_read_b32 v2, 578; GCN: ds_read_b32 v3, 579define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 { 580 %load0 = load volatile float, float addrspace(3)* undef 581 %load1 = load volatile float, float addrspace(3)* undef 582 %load2 = load volatile float, float addrspace(3)* undef 583 %load3 = load volatile i32, i32 addrspace(3)* undef 584 585 %insert.0 = insertelement <3 x float> undef, float %load0, i32 0 586 %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1 587 %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2 588 %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0 589 %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1 590 ret { <3 x float>, i32 } %insert.4 591} 592 593; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits: 594; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0 595; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]] 596 597; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0 598; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 599; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]] 600define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 { 601 %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32 602 603 %lshr0 = lshr i32 %arg0.int, 16 604 %lshr1 = lshr i32 %arg0.int, 17 605 %lshr2 = lshr i32 %arg0.int, 18 606 607 store volatile i32 %lshr0, i32 addrspace(3)* undef 608 store volatile i32 %lshr1, i32 addrspace(3)* undef 609 store volatile i32 %lshr2, i32 addrspace(3)* undef 610 ret void 611} 612 613attributes #0 = { nounwind } 614