1; RUN: opt -S -mtriple=amdgcn-- -amdgpu-replace-lds-use-with-pointer -amdgpu-enable-lds-replace-with-pointer=true < %s | FileCheck %s 2 3; DESCRIPTION: 4; 5; Replace lds globals used within phi instruction. 6; 7 8; Original LDS should exist. 9; CHECK: @lds.1 = addrspace(3) global i32 undef, align 4 10; CHECK: @lds.2 = addrspace(3) global i32 undef, align 4 11@lds.1 = addrspace(3) global i32 undef, align 4 12@lds.2 = addrspace(3) global i32 undef, align 4 13 14; Pointers should be created. 15; CHECK: @lds.1.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 16; CHECK: @lds.2.ptr = internal unnamed_addr addrspace(3) global i16 undef, align 2 17 18define void @f0(i32 %arg) { 19; CHECK-LABEL: bb: 20; CHECK: %0 = load i16, i16 addrspace(3)* @lds.2.ptr, align 2 21; CHECK: %1 = getelementptr i8, i8 addrspace(3)* null, i16 %0 22; CHECK: %2 = bitcast i8 addrspace(3)* %1 to i32 addrspace(3)* 23; CHECK: %3 = load i16, i16 addrspace(3)* @lds.1.ptr, align 2 24; CHECK: %4 = getelementptr i8, i8 addrspace(3)* null, i16 %3 25; CHECK: %5 = bitcast i8 addrspace(3)* %4 to i32 addrspace(3)* 26; CHECK: %id = call i32 @llvm.amdgcn.workitem.id.x() 27; CHECK: %my.tmp = sub i32 %id, %arg 28; CHECK: br label %bb1 29bb: 30 %id = call i32 @llvm.amdgcn.workitem.id.x() 31 %my.tmp = sub i32 %id, %arg 32 br label %bb1 33 34; CHECK-LABEL: bb1: 35; CHECK: %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 36; CHECK: %6 = icmp ne i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), %5 37; CHECK: %lsr.iv.next = add i32 %lsr.iv, 1 38; CHECK: %cmp0 = icmp slt i32 %lsr.iv.next, 0 39; CHECK: br i1 %cmp0, label %bb4, label %Flow 40bb1: 41 %lsr.iv = phi i32 [ undef, %bb ], [ %my.tmp2, %Flow ] 42 %lsr.iv.next = add i32 %lsr.iv, 1 43 %cmp0 = icmp slt i32 %lsr.iv.next, 0 44 br i1 %cmp0, label %bb4, label %Flow 45 46; CHECK-LABEL: bb4: 47; CHECK: %load = load volatile i32, i32 addrspace(1)* undef, align 4 48; CHECK: %cmp1 = icmp sge i32 %my.tmp, %load 49; CHECK: br label %Flow 50bb4: 51 %load = load volatile i32, i32 addrspace(1)* undef, align 4 52 %cmp1 = icmp sge i32 %my.tmp, %load 53 br label %Flow 54 55; CHECK-LABEL: Flow: 56; CHECK: %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 57; CHECK: %my.tmp3 = phi i32 addrspace(3)* [ %2, %bb4 ], [ %5, %bb1 ] 58; CHECK: %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ %6, %bb1 ] 59; CHECK: br i1 %my.tmp4, label %bb9, label %bb1 60Flow: 61 %my.tmp2 = phi i32 [ %lsr.iv.next, %bb4 ], [ undef, %bb1 ] 62 %my.tmp3 = phi i32 addrspace(3)* [@lds.2, %bb4 ], [ @lds.1, %bb1 ] 63 %my.tmp4 = phi i1 [ %cmp1, %bb4 ], [ icmp ne (i32 addrspace(3)* inttoptr (i32 4 to i32 addrspace(3)*), i32 addrspace(3)* @lds.1), %bb1 ] 64 br i1 %my.tmp4, label %bb9, label %bb1 65 66; CHECK-LABEL: bb9: 67; CHECK: store volatile i32 7, i32 addrspace(3)* undef, align 4 68; CHECK: ret void 69bb9: 70 store volatile i32 7, i32 addrspace(3)* undef 71 ret void 72} 73 74; CHECK-LABEL: @k0 75; CHECK: %1 = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) 76; CHECK: %2 = icmp eq i32 %1, 0 77; CHECK: br i1 %2, label %3, label %4 78; 79; CHECK-LABEL: 3: 80; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.2 to i16), i16 addrspace(3)* @lds.2.ptr, align 2 81; CHECK: store i16 ptrtoint (i32 addrspace(3)* @lds.1 to i16), i16 addrspace(3)* @lds.1.ptr, align 2 82; CHECK: br label %4 83; 84; CHECK-LABEL: 4: 85; CHECK: call void @llvm.amdgcn.wave.barrier() 86; CHECK: call void @f0(i32 %arg) 87; CHECK: ret void 88define amdgpu_kernel void @k0(i32 %arg) { 89 call void @f0(i32 %arg) 90 ret void 91} 92 93declare i32 @llvm.amdgcn.workitem.id.x() 94