1;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=GCN %s 2;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefixes=GCN,VI %s 3 4; GCN-LABEL: {{^}}v_interp: 5; GCN-NOT: s_wqm 6; GCN: s_mov_b32 m0, s{{[0-9]+}} 7; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 8; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 9; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 10; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}} 11define amdgpu_ps void @v_interp(<16 x i8> addrspace(2)* inreg, <16 x i8> addrspace(2)* inreg, <32 x i8> addrspace(2)* inreg, i32 inreg, <2 x float>) { 12main_body: 13 %i = extractelement <2 x float> %4, i32 0 14 %j = extractelement <2 x float> %4, i32 1 15 %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 %3) 16 %p1_0 = call float @llvm.amdgcn.interp.p2(float %p0_0, float %j, i32 0, i32 0, i32 %3) 17 %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 %3) 18 %p1_1 = call float @llvm.amdgcn.interp.p2(float %p0_1, float %j, i32 1, i32 0, i32 %3) 19 %const = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 %3) 20 %w = fadd float %p1_1, %const 21 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %p0_0, float %p0_0, float %p1_1, float %w) 22 ret void 23} 24 25; GCN-LABEL: {{^}}v_interp_p1: 26; GCN: s_movk_i32 m0, 0x100 27; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 28; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 29; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.z{{$}} 30; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.w{{$}} 31; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 32 33; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr1.x{{$}} 34; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr2.y{{$}} 35; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr3.z{{$}} 36; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr4.w{{$}} 37; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.w{{$}} 38; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.w{{$}} 39; GCN-DAG: v_interp_p1_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}} 40define amdgpu_ps void @v_interp_p1(float %i) { 41 %p0_0 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 0, i32 256) 42 %p0_1 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 0, i32 256) 43 %p0_2 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 0, i32 256) 44 %p0_3 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 0, i32 256) 45 %p0_4 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 0, i32 256) 46 %p0_5 = call float @llvm.amdgcn.interp.p1(float %i, i32 0, i32 1, i32 256) 47 %p0_6 = call float @llvm.amdgcn.interp.p1(float %i, i32 1, i32 2, i32 256) 48 %p0_7 = call float @llvm.amdgcn.interp.p1(float %i, i32 2, i32 3, i32 256) 49 %p0_8 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 4, i32 256) 50 %p0_9 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 63, i32 256) 51 %p0_10 = call float @llvm.amdgcn.interp.p1(float %i, i32 3, i32 64, i32 256) 52 %p0_11 = call float @llvm.amdgcn.interp.p1(float %i, i32 4, i32 64, i32 256) 53 54 store volatile float %p0_0, float addrspace(1)* undef 55 store volatile float %p0_1, float addrspace(1)* undef 56 store volatile float %p0_2, float addrspace(1)* undef 57 store volatile float %p0_3, float addrspace(1)* undef 58 store volatile float %p0_4, float addrspace(1)* undef 59 store volatile float %p0_5, float addrspace(1)* undef 60 store volatile float %p0_6, float addrspace(1)* undef 61 store volatile float %p0_7, float addrspace(1)* undef 62 store volatile float %p0_8, float addrspace(1)* undef 63 store volatile float %p0_9, float addrspace(1)* undef 64 store volatile float %p0_10, float addrspace(1)* undef 65 store volatile float %p0_11, float addrspace(1)* undef 66 ret void 67} 68 69; GCN-LABEL: {{^}}v_interp_p2: 70; GCN: s_movk_i32 m0, 0x100 71; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 72; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.y{{$}} 73; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.z{{$}} 74; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.w{{$}} 75; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 76; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr0.x{{$}} 77; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr63.x{{$}} 78; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}} 79; GCN-DAG: v_interp_p2_f32 v{{[0-9]+}}, v{{[0-9]+}}, attr64.x{{$}} 80define amdgpu_ps void @v_interp_p2(float %x, float %j) { 81 %p2_0 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 0, i32 256) 82 %p2_1 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 1, i32 0, i32 256) 83 %p2_2 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 2, i32 0, i32 256) 84 %p2_3 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 3, i32 0, i32 256) 85 %p2_4 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 0, i32 256) 86 87 %p2_5 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 1, i32 256) 88 %p2_6 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 63, i32 256) 89 %p2_7 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 0, i32 64, i32 256) 90 %p2_8 = call float @llvm.amdgcn.interp.p2(float %x, float %j, i32 4, i32 64, i32 256) 91 92 store volatile float %p2_0, float addrspace(1)* undef 93 store volatile float %p2_1, float addrspace(1)* undef 94 store volatile float %p2_2, float addrspace(1)* undef 95 store volatile float %p2_3, float addrspace(1)* undef 96 store volatile float %p2_4, float addrspace(1)* undef 97 store volatile float %p2_5, float addrspace(1)* undef 98 store volatile float %p2_6, float addrspace(1)* undef 99 store volatile float %p2_7, float addrspace(1)* undef 100 store volatile float %p2_8, float addrspace(1)* undef 101 ret void 102} 103 104; GCN-LABEL: {{^}}v_interp_mov: 105; GCN: s_movk_i32 m0, 0x100 106; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.x{{$}} 107; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p20, attr0.x{{$}} 108; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}} 109; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr0.x{{$}} 110 111; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.x{{$}} 112; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.z{{$}} 113; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.w{{$}} 114; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr0.x{{$}} 115; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_8, attr0.x{{$}} 116 117; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr63.y{{$}} 118; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, p10, attr64.y{{$}} 119; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_3, attr64.y{{$}} 120; GCN-DAG: v_interp_mov_f32 v{{[0-9]+}}, invalid_param_10, attr64.x{{$}} 121define amdgpu_ps void @v_interp_mov(float %x, float %j) { 122 %mov_0 = call float @llvm.amdgcn.interp.mov(i32 0, i32 0, i32 0, i32 256) 123 %mov_1 = call float @llvm.amdgcn.interp.mov(i32 1, i32 0, i32 0, i32 256) 124 %mov_2 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 256) 125 %mov_3 = call float @llvm.amdgcn.interp.mov(i32 3, i32 0, i32 0, i32 256) 126 127 %mov_4 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 0, i32 256) 128 %mov_5 = call float @llvm.amdgcn.interp.mov(i32 0, i32 2, i32 0, i32 256) 129 %mov_6 = call float @llvm.amdgcn.interp.mov(i32 0, i32 3, i32 0, i32 256) 130 %mov_7 = call float @llvm.amdgcn.interp.mov(i32 0, i32 4, i32 0, i32 256) 131 %mov_8 = call float @llvm.amdgcn.interp.mov(i32 8, i32 4, i32 0, i32 256) 132 133 %mov_9 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 63, i32 256) 134 %mov_10 = call float @llvm.amdgcn.interp.mov(i32 0, i32 1, i32 64, i32 256) 135 %mov_11 = call float @llvm.amdgcn.interp.mov(i32 3, i32 1, i32 64, i32 256) 136 %mov_12 = call float @llvm.amdgcn.interp.mov(i32 10, i32 4, i32 64, i32 256) 137 138 store volatile float %mov_0, float addrspace(1)* undef 139 store volatile float %mov_1, float addrspace(1)* undef 140 store volatile float %mov_2, float addrspace(1)* undef 141 store volatile float %mov_3, float addrspace(1)* undef 142 143 store volatile float %mov_4, float addrspace(1)* undef 144 store volatile float %mov_5, float addrspace(1)* undef 145 store volatile float %mov_6, float addrspace(1)* undef 146 store volatile float %mov_7, float addrspace(1)* undef 147 store volatile float %mov_8, float addrspace(1)* undef 148 149 store volatile float %mov_9, float addrspace(1)* undef 150 store volatile float %mov_10, float addrspace(1)* undef 151 store volatile float %mov_11, float addrspace(1)* undef 152 store volatile float %mov_12, float addrspace(1)* undef 153 ret void 154} 155 156; SI won't merge ds memory operations, because of the signed offset bug, so 157; we only have check lines for VI. 158; VI-LABEL: v_interp_readnone: 159; VI: s_mov_b32 m0, 0 160; VI-DAG: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 161; VI-DAG: v_interp_mov_f32 v{{[0-9]+}}, p0, attr0.x{{$}} 162; VI: s_mov_b32 m0, -1{{$}} 163; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 164define amdgpu_ps void @v_interp_readnone(float addrspace(3)* %lds) { 165 store float 0.0, float addrspace(3)* %lds 166 %tmp1 = call float @llvm.amdgcn.interp.mov(i32 2, i32 0, i32 0, i32 0) 167 %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 168 store float 0.0, float addrspace(3)* %tmp2 169 call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %tmp1, float %tmp1, float %tmp1, float %tmp1) 170 ret void 171} 172 173; Function Attrs: nounwind readnone 174declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0 175 176; Function Attrs: nounwind readnone 177declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 178 179declare float @llvm.amdgcn.interp.mov(i32, i32, i32, i32) #0 180 181declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) 182 183attributes #0 = { nounwind readnone } 184