1; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tahiti < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-SI %s 2; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-CI %s 3; RUN: opt -S -codegenprepare -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck -check-prefix=OPT -check-prefix=OPT-VI %s 4; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s 5; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=CI %s 6; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-promote-alloca -amdgpu-sroa=0 < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s 7 8; OPT-LABEL: @test_sink_global_small_offset_i32( 9; OPT-CI-NOT: getelementptr i32, i32 addrspace(1)* %in 10; OPT-VI: getelementptr i32, i32 addrspace(1)* %in 11; OPT: br i1 12; OPT-CI: ptrtoint 13 14; GCN-LABEL: {{^}}test_sink_global_small_offset_i32: 15; GCN: {{^}}BB0_2: 16define void @test_sink_global_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 17entry: 18 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 19 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 7 20 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 21 %tmp0 = icmp eq i32 %tid, 0 22 br i1 %tmp0, label %endif, label %if 23 24if: 25 %tmp1 = load i32, i32 addrspace(1)* %in.gep 26 br label %endif 27 28endif: 29 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 30 store i32 %x, i32 addrspace(1)* %out.gep 31 br label %done 32 33done: 34 ret void 35} 36 37; OPT-LABEL: @test_sink_global_small_max_i32_ds_offset( 38; OPT: %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 39; OPT: br i1 40 41; GCN-LABEL: {{^}}test_sink_global_small_max_i32_ds_offset: 42; GCN: s_and_saveexec_b64 43; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 44; GCN: {{^}}BB1_2: 45; GCN: s_or_b64 exec 46define void @test_sink_global_small_max_i32_ds_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 47entry: 48 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 49 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 65535 50 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 51 %tmp0 = icmp eq i32 %tid, 0 52 br i1 %tmp0, label %endif, label %if 53 54if: 55 %tmp1 = load i8, i8 addrspace(1)* %in.gep 56 %tmp2 = sext i8 %tmp1 to i32 57 br label %endif 58 59endif: 60 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 61 store i32 %x, i32 addrspace(1)* %out.gep 62 br label %done 63 64done: 65 ret void 66} 67 68; GCN-LABEL: {{^}}test_sink_global_small_max_mubuf_offset: 69; GCN: s_and_saveexec_b64 70; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0 offset:4095{{$}} 71; GCN: {{^}}BB2_2: 72; GCN: s_or_b64 exec 73define void @test_sink_global_small_max_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 74entry: 75 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 76 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4095 77 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 78 %tmp0 = icmp eq i32 %tid, 0 79 br i1 %tmp0, label %endif, label %if 80 81if: 82 %tmp1 = load i8, i8 addrspace(1)* %in.gep 83 %tmp2 = sext i8 %tmp1 to i32 84 br label %endif 85 86endif: 87 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 88 store i32 %x, i32 addrspace(1)* %out.gep 89 br label %done 90 91done: 92 ret void 93} 94 95; GCN-LABEL: {{^}}test_sink_global_small_max_plus_1_mubuf_offset: 96; GCN: s_and_saveexec_b64 97; GCN: buffer_load_sbyte {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, s{{[0-9]+$}} 98; GCN: {{^}}BB3_2: 99; GCN: s_or_b64 exec 100define void @test_sink_global_small_max_plus_1_mubuf_offset(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 101entry: 102 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 99999 103 %in.gep = getelementptr i8, i8 addrspace(1)* %in, i64 4096 104 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 105 %tmp0 = icmp eq i32 %tid, 0 106 br i1 %tmp0, label %endif, label %if 107 108if: 109 %tmp1 = load i8, i8 addrspace(1)* %in.gep 110 %tmp2 = sext i8 %tmp1 to i32 111 br label %endif 112 113endif: 114 %x = phi i32 [ %tmp2, %if ], [ 0, %entry ] 115 store i32 %x, i32 addrspace(1)* %out.gep 116 br label %done 117 118done: 119 ret void 120} 121 122; OPT-LABEL: @test_sink_scratch_small_offset_i32( 123; OPT-NOT: getelementptr [512 x i32] 124; OPT: br i1 125; OPT: ptrtoint 126 127; GCN-LABEL: {{^}}test_sink_scratch_small_offset_i32: 128; GCN: s_and_saveexec_b64 129; GCN: buffer_store_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}} 130; GCN: buffer_load_dword {{v[0-9]+}}, off, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:4092{{$}} 131; GCN: {{^}}BB4_2: 132define void @test_sink_scratch_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { 133entry: 134 %alloca = alloca [512 x i32], align 4 135 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 136 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 137 %add.arg = add i32 %arg, 8 138 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1023 139 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 140 %tmp0 = icmp eq i32 %tid, 0 141 br i1 %tmp0, label %endif, label %if 142 143if: 144 store volatile i32 123, i32* %alloca.gep 145 %tmp1 = load volatile i32, i32* %alloca.gep 146 br label %endif 147 148endif: 149 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 150 store i32 %x, i32 addrspace(1)* %out.gep.0 151 %load = load volatile i32, i32* %alloca.gep 152 store i32 %load, i32 addrspace(1)* %out.gep.1 153 br label %done 154 155done: 156 ret void 157} 158 159; OPT-LABEL: @test_no_sink_scratch_large_offset_i32( 160; OPT: %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 161; OPT: br i1 162; OPT-NOT: ptrtoint 163 164; GCN-LABEL: {{^}}test_no_sink_scratch_large_offset_i32: 165; GCN: s_and_saveexec_b64 166; GCN: buffer_store_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 167; GCN: buffer_load_dword {{v[0-9]+}}, {{v[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}} 168; GCN: {{^}}BB5_2: 169define void @test_no_sink_scratch_large_offset_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %arg) { 170entry: 171 %alloca = alloca [512 x i32], align 4 172 %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i64 999998 173 %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i64 999999 174 %add.arg = add i32 %arg, 8 175 %alloca.gep = getelementptr [512 x i32], [512 x i32]* %alloca, i32 0, i32 1024 176 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 177 %tmp0 = icmp eq i32 %tid, 0 178 br i1 %tmp0, label %endif, label %if 179 180if: 181 store volatile i32 123, i32* %alloca.gep 182 %tmp1 = load volatile i32, i32* %alloca.gep 183 br label %endif 184 185endif: 186 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 187 store i32 %x, i32 addrspace(1)* %out.gep.0 188 %load = load volatile i32, i32* %alloca.gep 189 store i32 %load, i32 addrspace(1)* %out.gep.1 190 br label %done 191 192done: 193 ret void 194} 195 196; GCN-LABEL: {{^}}test_sink_global_vreg_sreg_i32: 197; GCN: s_and_saveexec_b64 198; CI: buffer_load_dword {{v[0-9]+}}, {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 199; VI: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] 200; GCN: {{^}}BB6_2: 201define void @test_sink_global_vreg_sreg_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in, i32 %offset) { 202entry: 203 %offset.ext = zext i32 %offset to i64 204 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 205 %in.gep = getelementptr i32, i32 addrspace(1)* %in, i64 %offset.ext 206 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 207 %tmp0 = icmp eq i32 %tid, 0 208 br i1 %tmp0, label %endif, label %if 209 210if: 211 %tmp1 = load i32, i32 addrspace(1)* %in.gep 212 br label %endif 213 214endif: 215 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 216 store i32 %x, i32 addrspace(1)* %out.gep 217 br label %done 218 219done: 220 ret void 221} 222 223; OPT-LABEL: @test_sink_constant_small_offset_i32 224; OPT-NOT: getelementptr i32, i32 addrspace(2)* 225; OPT: br i1 226 227; GCN-LABEL: {{^}}test_sink_constant_small_offset_i32: 228; GCN: s_and_saveexec_b64 229; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x7{{$}} 230; GCN: s_or_b64 exec, exec 231define void @test_sink_constant_small_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 232entry: 233 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 234 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 7 235 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 236 %tmp0 = icmp eq i32 %tid, 0 237 br i1 %tmp0, label %endif, label %if 238 239if: 240 %tmp1 = load i32, i32 addrspace(2)* %in.gep 241 br label %endif 242 243endif: 244 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 245 store i32 %x, i32 addrspace(1)* %out.gep 246 br label %done 247 248done: 249 ret void 250} 251 252; OPT-LABEL: @test_sink_constant_max_8_bit_offset_i32 253; OPT-NOT: getelementptr i32, i32 addrspace(2)* 254; OPT: br i1 255 256; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_i32: 257; GCN: s_and_saveexec_b64 258; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xff{{$}} 259; GCN: s_or_b64 exec, exec 260define void @test_sink_constant_max_8_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 261entry: 262 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 263 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 255 264 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 265 %tmp0 = icmp eq i32 %tid, 0 266 br i1 %tmp0, label %endif, label %if 267 268if: 269 %tmp1 = load i32, i32 addrspace(2)* %in.gep 270 br label %endif 271 272endif: 273 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 274 store i32 %x, i32 addrspace(1)* %out.gep 275 br label %done 276 277done: 278 ret void 279} 280 281; OPT-LABEL: @test_sink_constant_max_8_bit_offset_p1_i32 282; OPT-SI: getelementptr i32, i32 addrspace(2)* 283; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 284; OPT-VI-NOT: getelementptr i32, i32 addrspace(2)* 285; OPT: br i1 286 287; GCN-LABEL: {{^}}test_sink_constant_max_8_bit_offset_p1_i32: 288; GCN: s_and_saveexec_b64 289; SI: s_movk_i32 [[OFFSET:s[0-9]+]], 0x400 290 291; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 292; GCN: s_or_b64 exec, exec 293define void @test_sink_constant_max_8_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 294entry: 295 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 296 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 256 297 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 298 %tmp0 = icmp eq i32 %tid, 0 299 br i1 %tmp0, label %endif, label %if 300 301if: 302 %tmp1 = load i32, i32 addrspace(2)* %in.gep 303 br label %endif 304 305endif: 306 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 307 store i32 %x, i32 addrspace(1)* %out.gep 308 br label %done 309 310done: 311 ret void 312} 313 314; OPT-LABEL: @test_sink_constant_max_32_bit_offset_i32 315; OPT-SI: getelementptr i32, i32 addrspace(2)* 316; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 317; OPT: br i1 318 319; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_i32: 320; GCN: s_and_saveexec_b64 321; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, -4{{$}} 322; GCN: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, 3{{$}} 323; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 324; GCN: s_or_b64 exec, exec 325define void @test_sink_constant_max_32_bit_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 326entry: 327 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 328 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 4294967295 329 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 330 %tmp0 = icmp eq i32 %tid, 0 331 br i1 %tmp0, label %endif, label %if 332 333if: 334 %tmp1 = load i32, i32 addrspace(2)* %in.gep 335 br label %endif 336 337endif: 338 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 339 store i32 %x, i32 addrspace(1)* %out.gep 340 br label %done 341 342done: 343 ret void 344} 345 346; OPT-LABEL: @test_sink_constant_max_32_bit_offset_p1_i32 347; OPT: getelementptr i32, i32 addrspace(2)* 348; OPT: br i1 349 350; GCN-LABEL: {{^}}test_sink_constant_max_32_bit_offset_p1_i32: 351; GCN: s_and_saveexec_b64 352; GCN: s_add_u32 353; GCN: s_addc_u32 354; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 355; GCN: s_or_b64 exec, exec 356define void @test_sink_constant_max_32_bit_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 357entry: 358 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 359 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 17179869181 360 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 361 %tmp0 = icmp eq i32 %tid, 0 362 br i1 %tmp0, label %endif, label %if 363 364if: 365 %tmp1 = load i32, i32 addrspace(2)* %in.gep 366 br label %endif 367 368endif: 369 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 370 store i32 %x, i32 addrspace(1)* %out.gep 371 br label %done 372 373done: 374 ret void 375} 376 377; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_i32: 378; GCN: s_and_saveexec_b64 379; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0xffffc{{$}} 380; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 381 382; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x3ffff{{$}} 383; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0xffffc{{$}} 384 385; GCN: s_or_b64 exec, exec 386define void @test_sink_constant_max_20_bit_byte_offset_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 387entry: 388 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 389 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262143 390 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 391 %tmp0 = icmp eq i32 %tid, 0 392 br i1 %tmp0, label %endif, label %if 393 394if: 395 %tmp1 = load i32, i32 addrspace(2)* %in.gep 396 br label %endif 397 398endif: 399 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 400 store i32 %x, i32 addrspace(1)* %out.gep 401 br label %done 402 403done: 404 ret void 405} 406 407; OPT-LABEL: @test_sink_constant_max_20_bit_byte_offset_p1_i32 408; OPT-SI: getelementptr i32, i32 addrspace(2)* 409; OPT-CI-NOT: getelementptr i32, i32 addrspace(2)* 410; OPT-VI: getelementptr i32, i32 addrspace(2)* 411; OPT: br i1 412 413; GCN-LABEL: {{^}}test_sink_constant_max_20_bit_byte_offset_p1_i32: 414; GCN: s_and_saveexec_b64 415; SI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 416; SI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 417 418; CI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, 0x40000{{$}} 419 420; VI: s_mov_b32 [[OFFSET:s[0-9]+]], 0x100000{{$}} 421; VI: s_load_dword s{{[0-9]+}}, {{s\[[0-9]+:[0-9]+\]}}, [[OFFSET]]{{$}} 422 423; GCN: s_or_b64 exec, exec 424define void @test_sink_constant_max_20_bit_byte_offset_p1_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 425entry: 426 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i64 999999 427 %in.gep = getelementptr i32, i32 addrspace(2)* %in, i64 262144 428 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 429 %tmp0 = icmp eq i32 %tid, 0 430 br i1 %tmp0, label %endif, label %if 431 432if: 433 %tmp1 = load i32, i32 addrspace(2)* %in.gep 434 br label %endif 435 436endif: 437 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 438 store i32 %x, i32 addrspace(1)* %out.gep 439 br label %done 440 441done: 442 ret void 443} 444 445%struct.foo = type { [3 x float], [3 x float] } 446 447; OPT-LABEL: @sink_ds_address( 448; OPT: ptrtoint %struct.foo addrspace(3)* %ptr to i64 449 450; GCN-LABEL: {{^}}sink_ds_address: 451; GCN: s_load_dword [[SREG1:s[0-9]+]], 452; GCN: v_mov_b32_e32 [[VREG1:v[0-9]+]], [[SREG1]] 453; GCN-DAG: ds_read2_b32 v[{{[0-9+:[0-9]+}}], [[VREG1]] offset0:3 offset1:5 454define void @sink_ds_address(%struct.foo addrspace(3)* nocapture %ptr) nounwind { 455entry: 456 %x = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 0 457 %y = getelementptr inbounds %struct.foo, %struct.foo addrspace(3)* %ptr, i32 0, i32 1, i32 2 458 br label %bb32 459 460bb32: 461 %a = load float, float addrspace(3)* %x, align 4 462 %b = load float, float addrspace(3)* %y, align 4 463 %cmp = fcmp one float %a, %b 464 br i1 %cmp, label %bb34, label %bb33 465 466bb33: 467 unreachable 468 469bb34: 470 unreachable 471} 472 473; Address offset is not a multiple of 4. This is a valid mubuf offset, 474; but not smrd. 475 476; OPT-LABEL: @test_sink_constant_small_max_mubuf_offset_load_i32_align_1( 477; OPT: br i1 %tmp0, 478; OPT: if: 479; OPT: %sunkaddr = ptrtoint i8 addrspace(2)* %in to i64 480; OPT: %sunkaddr1 = add i64 %sunkaddr, 4095 481define void @test_sink_constant_small_max_mubuf_offset_load_i32_align_1(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 482entry: 483 %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 1024 484 %in.gep = getelementptr i8, i8 addrspace(2)* %in, i64 4095 485 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 486 %tmp0 = icmp eq i32 %tid, 0 487 br i1 %tmp0, label %endif, label %if 488 489if: 490 %bitcast = bitcast i8 addrspace(2)* %in.gep to i32 addrspace(2)* 491 %tmp1 = load i32, i32 addrspace(2)* %bitcast, align 1 492 br label %endif 493 494endif: 495 %x = phi i32 [ %tmp1, %if ], [ 0, %entry ] 496 store i32 %x, i32 addrspace(1)* %out.gep 497 br label %done 498 499done: 500 ret void 501} 502 503declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 504 505attributes #0 = { nounwind readnone } 506attributes #1 = { nounwind } 507