1; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s 2; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s 3 4declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1 5declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1 6declare i32 @llvm.amdgcn.workitem.id.x() 7declare i32 @llvm.amdgcn.workitem.id.y() 8 9; GCN-LABEL: {{^}}v_permlane16_b32_vss: 10; GFX10PLUS-NOT: v_readfirstlane_b32 11; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 12define amdgpu_kernel void @v_permlane16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 13 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0) 14 store i32 %v, i32 addrspace(1)* %out 15 ret void 16} 17 18; GCN-LABEL: {{^}}v_permlane16_b32_vii: 19; GFX10PLUS-NOT: v_readfirstlane_b32 20; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}} 21define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 { 22 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0) 23 store i32 %v, i32 addrspace(1)* %out 24 ret void 25} 26 27; GCN-LABEL: {{^}}v_permlane16_b32_vll: 28; FIXME-GFX10PLUS: It is allowed to have both immediates as literals 29; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 30; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 31; GFX10PLUS-NOT: v_readfirstlane_b32 32; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} 33define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { 34 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) 35 store i32 %v, i32 addrspace(1)* %out 36 ret void 37} 38 39; GCN-LABEL: {{^}}v_permlane16_b32_vvv: 40; GFX10-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0 41; GFX10-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1 42; GFX11-DAG: v_and_b32_e32 [[VSRC1:v[0-9]+]], 43; GFX11-DAG: v_bfe_u32 [[VSRC2:v[0-9]+]], 44; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]] 45; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]] 46; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} 47define amdgpu_kernel void @v_permlane16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 { 48 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 49 %tidy = call i32 @llvm.amdgcn.workitem.id.y() 50 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0) 51 store i32 %v, i32 addrspace(1)* %out 52 ret void 53} 54 55; GCN-LABEL: {{^}}v_permlane16_b32_vvs: 56; GFX10PLUS-NOT: v_readfirstlane_b32 57; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0 58; GFX10PLUS-NOT: v_readfirstlane_b32 59; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}} 60define amdgpu_kernel void @v_permlane16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 { 61 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 62 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0) 63 store i32 %v, i32 addrspace(1)* %out 64 ret void 65} 66 67; GCN-LABEL: {{^}}v_permlane16_b32_vsv: 68; GFX10PLUS-NOT: v_readfirstlane_b32 69; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}} 70; GFX10PLUS-NOT: v_readfirstlane_b32 71; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}} 72define amdgpu_kernel void @v_permlane16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 { 73 %tidy = call i32 @llvm.amdgcn.workitem.id.y() 74 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0) 75 store i32 %v, i32 addrspace(1)* %out 76 ret void 77} 78 79; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi: 80; GFX10PLUS-NOT: v_readfirstlane_b32 81; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} 82define amdgpu_kernel void @v_permlane16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 83 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0) 84 store i32 %v, i32 addrspace(1)* %out 85 ret void 86} 87 88; GCN-LABEL: {{^}}v_permlane16_b32_vss_bc: 89; GFX10PLUS-NOT: v_readfirstlane_b32 90; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} 91define amdgpu_kernel void @v_permlane16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 92 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1) 93 store i32 %v, i32 addrspace(1)* %out 94 ret void 95} 96 97; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi_bc: 98; GFX10PLUS-NOT: v_readfirstlane_b32 99; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} 100define amdgpu_kernel void @v_permlane16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 101 %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1) 102 store i32 %v, i32 addrspace(1)* %out 103 ret void 104} 105 106; GCN-LABEL: {{^}}v_permlanex16_b32_vss: 107; GFX10PLUS-NOT: v_readfirstlane_b32 108; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 109define amdgpu_kernel void @v_permlanex16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 110 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0) 111 store i32 %v, i32 addrspace(1)* %out 112 ret void 113} 114 115; GCN-LABEL: {{^}}v_permlanex16_b32_vii: 116; GFX10PLUS-NOT: v_readfirstlane_b32 117; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}} 118define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 { 119 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0) 120 store i32 %v, i32 addrspace(1)* %out 121 ret void 122} 123 124; GCN-LABEL: {{^}}v_permlanex16_b32_vll: 125; FIXME-GFX10PLUS: It is allowed to have both immediates as literals 126; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234 127; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1 128; GFX10PLUS-NOT: v_readfirstlane_b32 129; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} 130define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 { 131 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0) 132 store i32 %v, i32 addrspace(1)* %out 133 ret void 134} 135 136; GCN-LABEL: {{^}}v_permlanex16_b32_vvv: 137; GFX10-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0 138; GFX10-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1 139; GFX11-DAG: v_and_b32_e32 [[VSRC1:v[0-9]+]], 140; GFX11-DAG: v_bfe_u32 [[VSRC2:v[0-9]+]], 141; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]] 142; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]] 143; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}} 144define amdgpu_kernel void @v_permlanex16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 { 145 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 146 %tidy = call i32 @llvm.amdgcn.workitem.id.y() 147 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0) 148 store i32 %v, i32 addrspace(1)* %out 149 ret void 150} 151 152; GCN-LABEL: {{^}}v_permlanex16_b32_vvs: 153; GFX10PLUS-NOT: v_readfirstlane_b32 154; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0 155; GFX10PLUS-NOT: v_readfirstlane_b32 156; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}} 157define amdgpu_kernel void @v_permlanex16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 { 158 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 159 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0) 160 store i32 %v, i32 addrspace(1)* %out 161 ret void 162} 163 164; GCN-LABEL: {{^}}v_permlanex16_b32_vsv: 165; GFX10PLUS-NOT: v_readfirstlane_b32 166; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}} 167; GFX10PLUS-NOT: v_readfirstlane_b32 168; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}} 169define amdgpu_kernel void @v_permlanex16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 { 170 %tidy = call i32 @llvm.amdgcn.workitem.id.y() 171 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0) 172 store i32 %v, i32 addrspace(1)* %out 173 ret void 174} 175 176; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi: 177; GFX10PLUS-NOT: v_readfirstlane_b32 178; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} 179define amdgpu_kernel void @v_permlanex16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 180 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0) 181 store i32 %v, i32 addrspace(1)* %out 182 ret void 183} 184 185; GCN-LABEL: {{^}}v_permlanex16_b32_vss_bc: 186; GFX10PLUS-NOT: v_readfirstlane_b32 187; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} 188define amdgpu_kernel void @v_permlanex16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 189 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1) 190 store i32 %v, i32 addrspace(1)* %out 191 ret void 192} 193 194; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi_bc: 195; GFX10PLUS-NOT: v_readfirstlane_b32 196; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} 197define amdgpu_kernel void @v_permlanex16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 198 %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1) 199 store i32 %v, i32 addrspace(1)* %out 200 ret void 201} 202 203; GCN-LABEL: {{^}}v_permlane16_b32_tid_tid: 204; GFX10PLUS: v_permlane16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 205define amdgpu_kernel void @v_permlane16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 206 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 207 %v = call i32 @llvm.amdgcn.permlane16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) 208 store i32 %v, i32 addrspace(1)* %out 209 ret void 210} 211 212; GCN-LABEL: {{^}}v_permlane16_b32_undef_tid: 213; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 214define amdgpu_kernel void @v_permlane16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 215 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 216 %v = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) 217 store i32 %v, i32 addrspace(1)* %out 218 ret void 219} 220 221; GCN-LABEL: {{^}}v_permlane16_b32_i_tid: 222; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039 223; GFX10PLUS: v_permlane16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 224define amdgpu_kernel void @v_permlane16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 225 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 226 %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) 227 store i32 %v, i32 addrspace(1)* %out 228 ret void 229} 230 231; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi: 232; GFX10PLUS-NOT: 0x3039 233; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} 234define amdgpu_kernel void @v_permlane16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 235 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 236 %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0) 237 store i32 %v, i32 addrspace(1)* %out 238 ret void 239} 240 241; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_bc: 242; GFX10PLUS-NOT: 0x3039 243; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} 244define amdgpu_kernel void @v_permlane16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 245 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 246 %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1) 247 store i32 %v, i32 addrspace(1)* %out 248 ret void 249} 250 251; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi_bc: 252; GFX10PLUS-NOT: 0x3039 253; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} 254define amdgpu_kernel void @v_permlane16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 255 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 256 %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1) 257 store i32 %v, i32 addrspace(1)* %out 258 ret void 259} 260 261; GCN-LABEL: {{^}}v_permlanex16_b32_tid_tid: 262; GFX10PLUS: v_permlanex16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 263define amdgpu_kernel void @v_permlanex16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 264 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 265 %v = call i32 @llvm.amdgcn.permlanex16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) 266 store i32 %v, i32 addrspace(1)* %out 267 ret void 268} 269 270; GCN-LABEL: {{^}}v_permlanex16_b32_undef_tid: 271; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 272define amdgpu_kernel void @v_permlanex16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 273 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 274 %v = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) 275 store i32 %v, i32 addrspace(1)* %out 276 ret void 277} 278 279; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid: 280; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039 281; GFX10PLUS: v_permlanex16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}} 282define amdgpu_kernel void @v_permlanex16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 283 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 284 %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0) 285 store i32 %v, i32 addrspace(1)* %out 286 ret void 287} 288 289; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi: 290; GFX10PLUS-NOT: 0x3039 291; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}} 292define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 293 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 294 %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0) 295 store i32 %v, i32 addrspace(1)* %out 296 ret void 297} 298 299; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_bc: 300; GFX10PLUS-NOT: 0x3039 301; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}} 302define amdgpu_kernel void @v_permlanex16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 303 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 304 %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1) 305 store i32 %v, i32 addrspace(1)* %out 306 ret void 307} 308 309; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi_bc: 310; GFX10PLUS-NOT: 0x3039 311; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}} 312define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 { 313 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 314 %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1) 315 store i32 %v, i32 addrspace(1)* %out 316 ret void 317} 318 319attributes #0 = { nounwind readnone convergent } 320attributes #1 = { nounwind } 321