1; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 2; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SIVI,FUNC %s 3; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9,FUNC %s 4; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,FUNC %s 5; RUN: llc -march=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10,FUNC %s 6 7; FUNC-LABEL: {{^}}s_add_i32: 8; GCN: s_add_i32 s[[REG:[0-9]+]], {{s[0-9]+, s[0-9]+}} 9; GCN: v_mov_b32_e32 v[[V_REG:[0-9]+]], s[[REG]] 10; GCN: buffer_store_{{dword|b32}} v[[V_REG]], 11define amdgpu_kernel void @s_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 12 %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1 13 %a = load i32, i32 addrspace(1)* %in 14 %b = load i32, i32 addrspace(1)* %b_ptr 15 %result = add i32 %a, %b 16 store i32 %result, i32 addrspace(1)* %out 17 ret void 18} 19 20; FUNC-LABEL: {{^}}s_add_v2i32: 21; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 22; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 23define amdgpu_kernel void @s_add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) { 24 %b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1 25 %a = load <2 x i32>, <2 x i32> addrspace(1)* %in 26 %b = load <2 x i32>, <2 x i32> addrspace(1)* %b_ptr 27 %result = add <2 x i32> %a, %b 28 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 29 ret void 30} 31 32; FUNC-LABEL: {{^}}s_add_v4i32: 33; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 34; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 35; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 36; GCN: s_add_i32 s{{[0-9]+, s[0-9]+, s[0-9]+}} 37define amdgpu_kernel void @s_add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) { 38 %b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1 39 %a = load <4 x i32>, <4 x i32> addrspace(1)* %in 40 %b = load <4 x i32>, <4 x i32> addrspace(1)* %b_ptr 41 %result = add <4 x i32> %a, %b 42 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 43 ret void 44} 45 46; FUNC-LABEL: {{^}}s_add_v8i32: 47; GCN: s_add_i32 48; GCN: s_add_i32 49; GCN: s_add_i32 50; GCN: s_add_i32 51; GCN: s_add_i32 52; GCN: s_add_i32 53; GCN: s_add_i32 54; GCN: s_add_i32 55define amdgpu_kernel void @s_add_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, <8 x i32> %b) { 56entry: 57 %0 = add <8 x i32> %a, %b 58 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 59 ret void 60} 61 62; FUNC-LABEL: {{^}}s_add_v16i32: 63; GCN: s_add_i32 64; GCN: s_add_i32 65; GCN: s_add_i32 66; GCN: s_add_i32 67; GCN: s_add_i32 68; GCN: s_add_i32 69; GCN: s_add_i32 70; GCN: s_add_i32 71; GCN: s_add_i32 72; GCN: s_add_i32 73; GCN: s_add_i32 74; GCN: s_add_i32 75; GCN: s_add_i32 76; GCN: s_add_i32 77; GCN: s_add_i32 78; GCN: s_add_i32 79define amdgpu_kernel void @s_add_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, <16 x i32> %b) { 80entry: 81 %0 = add <16 x i32> %a, %b 82 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 83 ret void 84} 85 86; FUNC-LABEL: {{^}}v_add_i32: 87; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[A:v[0-9]+]] 88; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[B:v[0-9]+]] 89; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, [[A]], [[B]] 90; GFX9: v_add_u32_e32 v{{[0-9]+}}, [[A]], [[B]] 91; GFX10: v_add_nc_u32_e32 v{{[0-9]+}}, [[A]], [[B]] 92define amdgpu_kernel void @v_add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 93 %tid = call i32 @llvm.amdgcn.workitem.id.x() 94 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 95 %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 96 %a = load volatile i32, i32 addrspace(1)* %gep 97 %b = load volatile i32, i32 addrspace(1)* %b_ptr 98 %result = add i32 %a, %b 99 store i32 %result, i32 addrspace(1)* %out 100 ret void 101} 102 103; FUNC-LABEL: {{^}}v_add_imm_i32: 104; GCN: {{buffer|flat|global}}_load_{{dword|b32}} [[A:v[0-9]+]] 105; SIVI: v_add_{{i|u}}32_e32 v{{[0-9]+}}, vcc, 0x7b, [[A]] 106; GFX9: v_add_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] 107; GFX10: v_add_nc_u32_e32 v{{[0-9]+}}, 0x7b, [[A]] 108define amdgpu_kernel void @v_add_imm_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 { 109 %tid = call i32 @llvm.amdgcn.workitem.id.x() 110 %gep = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %tid 111 %b_ptr = getelementptr i32, i32 addrspace(1)* %gep, i32 1 112 %a = load volatile i32, i32 addrspace(1)* %gep 113 %result = add i32 %a, 123 114 store i32 %result, i32 addrspace(1)* %out 115 ret void 116} 117 118; FUNC-LABEL: {{^}}add64: 119; GCN: s_add_u32 120; GCN: s_addc_u32 121define amdgpu_kernel void @add64(i64 addrspace(1)* %out, i64 %a, i64 %b) { 122entry: 123 %add = add i64 %a, %b 124 store i64 %add, i64 addrspace(1)* %out 125 ret void 126} 127 128; The v_addc_u32 and v_add_i32 instruction can't read SGPRs, because they 129; use VCC. The test is designed so that %a will be stored in an SGPR and 130; %0 will be stored in a VGPR, so the comiler will be forced to copy %a 131; to a VGPR before doing the add. 132 133; FUNC-LABEL: {{^}}add64_sgpr_vgpr: 134; GCN-NOT: v_addc_u32_e32 s 135define amdgpu_kernel void @add64_sgpr_vgpr(i64 addrspace(1)* %out, i64 %a, i64 addrspace(1)* %in) { 136entry: 137 %0 = load i64, i64 addrspace(1)* %in 138 %1 = add i64 %a, %0 139 store i64 %1, i64 addrspace(1)* %out 140 ret void 141} 142 143; Test i64 add inside a branch. 144; FUNC-LABEL: {{^}}add64_in_branch: 145; GCN: s_add_u32 146; GCN: s_addc_u32 147define amdgpu_kernel void @add64_in_branch(i64 addrspace(1)* %out, i64 addrspace(1)* %in, i64 %a, i64 %b, i64 %c) { 148entry: 149 %0 = icmp eq i64 %a, 0 150 br i1 %0, label %if, label %else 151 152if: 153 %1 = load i64, i64 addrspace(1)* %in 154 br label %endif 155 156else: 157 %2 = add i64 %a, %b 158 br label %endif 159 160endif: 161 %3 = phi i64 [%1, %if], [%2, %else] 162 store i64 %3, i64 addrspace(1)* %out 163 ret void 164} 165 166; Make sure the VOP3 form of add is initially selected. Otherwise pair 167; of opies from/to VCC would be necessary 168 169; GCN-LABEL: {{^}}add_select_vop3: 170; SI: v_add_i32_e64 v0, s[0:1], s0, v0 171; VI: v_add_u32_e64 v0, s[0:1], s0, v0 172; GFX9: v_add_u32_e32 v0, s0, v0 173; GFX10: v_add_nc_u32_e32 v0, s0, v0 174 175; GCN: ; def vcc 176; GCN: ds_{{write|store}}_b32 177; GCN: ; use vcc 178define amdgpu_ps void @add_select_vop3(i32 inreg %s, i32 %v) { 179 %vcc = call i64 asm sideeffect "; def vcc", "={vcc}"() 180 %sub = add i32 %v, %s 181 store i32 %sub, i32 addrspace(3)* undef 182 call void asm sideeffect "; use vcc", "{vcc}"(i64 %vcc) 183 ret void 184} 185 186declare i32 @llvm.amdgcn.workitem.id.x() #1 187 188attributes #0 = { nounwind } 189attributes #1 = { nounwind readnone speculatable } 190