1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,CI %s 2; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 3 4declare i32 @llvm.amdgcn.workitem.id.x() #0 5 6@lds.obj = addrspace(3) global [256 x i32] undef, align 4 7 8; GCN-LABEL: {{^}}write_ds_sub0_offset0_global: 9; GCN: v_lshlrev_b32_e32 [[SHL:v[0-9]+]], 2, v0 10; GCN: v_sub_{{[iu]}}32_e32 [[BASEPTR:v[0-9]+]], {{(vcc, )?}}lds.obj@abs32@lo, [[SHL]] 11; GCN: v_mov_b32_e32 [[VAL:v[0-9]+]], 0x7b 12; GCN: ds_write_b32 [[BASEPTR]], [[VAL]] offset:12 13define amdgpu_kernel void @write_ds_sub0_offset0_global() #0 { 14entry: 15 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1 16 %sub1 = sub i32 0, %x.i 17 %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1 18 %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3 19 store i32 123, i32 addrspace(3)* %arrayidx 20 ret void 21} 22 23; GFX9-LABEL: {{^}}write_ds_sub0_offset0_global_clamp_bit: 24; GFX9: v_sub_u32 25; GFX9: s_endpgm 26define amdgpu_kernel void @write_ds_sub0_offset0_global_clamp_bit(float %dummy.val) #0 { 27entry: 28 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #1 29 %sub1 = sub i32 0, %x.i 30 %tmp0 = getelementptr [256 x i32], [256 x i32] addrspace(3)* @lds.obj, i32 0, i32 %sub1 31 %arrayidx = getelementptr inbounds i32, i32 addrspace(3)* %tmp0, i32 3 32 store i32 123, i32 addrspace(3)* %arrayidx 33 %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false) 34 store volatile float %fmas, float addrspace(1)* null 35 ret void 36} 37 38; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset: 39; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 40; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] 41; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] 42; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 43; GCN: ds_write_b8 [[NEG]], [[K]] offset:65535 44define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset() #1 { 45 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 46 %neg = sub i32 0, %x.i 47 %shl = shl i32 %neg, 2 48 %add = add i32 65535, %shl 49 %ptr = inttoptr i32 %add to i8 addrspace(3)* 50 store i8 13, i8 addrspace(3)* %ptr 51 ret void 52} 53 54; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_max_offset_p1: 55; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 56; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x10000, [[SCALED]] 57; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0x10000, [[SCALED]] 58; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 59; GCN: ds_write_b8 [[NEG]], [[K]]{{$}} 60define amdgpu_kernel void @add_x_shl_neg_to_sub_max_offset_p1() #1 { 61 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 62 %neg = sub i32 0, %x.i 63 %shl = shl i32 %neg, 2 64 %add = add i32 65536, %shl 65 %ptr = inttoptr i32 %add to i8 addrspace(3)* 66 store i8 13, i8 addrspace(3)* %ptr 67 ret void 68} 69 70; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use: 71; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 72; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] 73; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] 74; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 75; GCN-NOT: v_sub 76; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} 77; GCN-NOT: v_sub 78; GCN: ds_write_b32 [[NEG]], [[K]] offset:456{{$}} 79; GCN: s_endpgm 80define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use() #1 { 81 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 82 %neg = sub i32 0, %x.i 83 %shl = shl i32 %neg, 2 84 %add0 = add i32 123, %shl 85 %add1 = add i32 456, %shl 86 %ptr0 = inttoptr i32 %add0 to i32 addrspace(3)* 87 store volatile i32 13, i32 addrspace(3)* %ptr0 88 %ptr1 = inttoptr i32 %add1 to i32 addrspace(3)* 89 store volatile i32 13, i32 addrspace(3)* %ptr1 90 ret void 91} 92 93; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_multi_use_same_offset: 94; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 95; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] 96; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] 97; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 13 98; GCN-NOT: v_sub 99; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} 100; GCN-NOT: v_sub 101; GCN: ds_write_b32 [[NEG]], [[K]] offset:123{{$}} 102; GCN: s_endpgm 103define amdgpu_kernel void @add_x_shl_neg_to_sub_multi_use_same_offset() #1 { 104 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 105 %neg = sub i32 0, %x.i 106 %shl = shl i32 %neg, 2 107 %add = add i32 123, %shl 108 %ptr = inttoptr i32 %add to i32 addrspace(3)* 109 store volatile i32 13, i32 addrspace(3)* %ptr 110 store volatile i32 13, i32 addrspace(3)* %ptr 111 ret void 112} 113 114; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset: 115; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 116; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0, [[SCALED]] 117; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0, [[SCALED]] 118; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset0:254 offset1:255 119define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset() #1 { 120 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 121 %neg = sub i32 0, %x.i 122 %shl = shl i32 %neg, 2 123 %add = add i32 1019, %shl 124 %ptr = inttoptr i32 %add to i64 addrspace(3)* 125 store i64 123, i64 addrspace(3)* %ptr, align 4 126 ret void 127} 128 129; GFX9-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit: 130; GFX9: v_sub_u32 131; GFX9: s_endpgm 132define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_clamp_bit(float %dummy.val) #1 { 133 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 134 %neg = sub i32 0, %x.i 135 %shl = shl i32 %neg, 2 136 %add = add i32 1019, %shl 137 %ptr = inttoptr i32 %add to i64 addrspace(3)* 138 store i64 123, i64 addrspace(3)* %ptr, align 4 139 %fmas = call float @llvm.amdgcn.div.fmas.f32(float %dummy.val, float %dummy.val, float %dummy.val, i1 false) 140 store volatile float %fmas, float addrspace(1)* null 141 ret void 142} 143 144; GCN-LABEL: {{^}}add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1: 145; GCN-DAG: v_lshlrev_b32_e32 [[SCALED:v[0-9]+]], 2, v0 146; CI-DAG: v_sub_i32_e32 [[NEG:v[0-9]+]], vcc, 0x3fc, [[SCALED]] 147; GFX9-DAG: v_sub_u32_e32 [[NEG:v[0-9]+]], 0x3fc, [[SCALED]] 148; GCN: ds_write2_b32 [[NEG]], {{v[0-9]+}}, {{v[0-9]+}} offset1:1{{$}} 149define amdgpu_kernel void @add_x_shl_neg_to_sub_misaligned_i64_max_offset_p1() #1 { 150 %x.i = call i32 @llvm.amdgcn.workitem.id.x() #0 151 %neg = sub i32 0, %x.i 152 %shl = shl i32 %neg, 2 153 %add = add i32 1020, %shl 154 %ptr = inttoptr i32 %add to i64 addrspace(3)* 155 store i64 123, i64 addrspace(3)* %ptr, align 4 156 ret void 157} 158 159declare float @llvm.amdgcn.div.fmas.f32(float, float, float, i1) 160 161attributes #0 = { nounwind readnone } 162attributes #1 = { nounwind } 163attributes #2 = { nounwind convergent } 164