1; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX10 %s
2; RUN: llc -amdgpu-load-store-vectorizer=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10PLUS,GFX11 %s
3
4declare i32 @llvm.amdgcn.permlane16(i32, i32, i32, i32, i1, i1) #1
5declare i32 @llvm.amdgcn.permlanex16(i32, i32, i32, i32, i1, i1) #1
6declare i32 @llvm.amdgcn.workitem.id.x()
7declare i32 @llvm.amdgcn.workitem.id.y()
8
9; GCN-LABEL: {{^}}v_permlane16_b32_vss:
10; GFX10PLUS-NOT: v_readfirstlane_b32
11; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
12define amdgpu_kernel void @v_permlane16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
13  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0)
14  store i32 %v, i32 addrspace(1)* %out
15  ret void
16}
17
18; GCN-LABEL: {{^}}v_permlane16_b32_vii:
19; GFX10PLUS-NOT: v_readfirstlane_b32
20; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}}
21define amdgpu_kernel void @v_permlane16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 {
22  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0)
23  store i32 %v, i32 addrspace(1)* %out
24  ret void
25}
26
27; GCN-LABEL: {{^}}v_permlane16_b32_vll:
28; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
29; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
30; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
31; GFX10PLUS-NOT: v_readfirstlane_b32
32; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
33define amdgpu_kernel void @v_permlane16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
34  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
35  store i32 %v, i32 addrspace(1)* %out
36  ret void
37}
38
39; GCN-LABEL: {{^}}v_permlane16_b32_vvv:
40; GFX10-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
41; GFX10-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1
42; GFX11-DAG: v_and_b32_e32 [[VSRC1:v[0-9]+]],
43; GFX11-DAG: v_bfe_u32 [[VSRC2:v[0-9]+]],
44; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]]
45; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]]
46; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
47define amdgpu_kernel void @v_permlane16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 {
48  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
49  %tidy = call i32 @llvm.amdgcn.workitem.id.y()
50  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0)
51  store i32 %v, i32 addrspace(1)* %out
52  ret void
53}
54
55; GCN-LABEL: {{^}}v_permlane16_b32_vvs:
56; GFX10PLUS-NOT: v_readfirstlane_b32
57; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
58; GFX10PLUS-NOT: v_readfirstlane_b32
59; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}}
60define amdgpu_kernel void @v_permlane16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 {
61  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
62  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0)
63  store i32 %v, i32 addrspace(1)* %out
64  ret void
65}
66
67; GCN-LABEL: {{^}}v_permlane16_b32_vsv:
68; GFX10PLUS-NOT: v_readfirstlane_b32
69; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}}
70; GFX10PLUS-NOT: v_readfirstlane_b32
71; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}}
72define amdgpu_kernel void @v_permlane16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
73  %tidy = call i32 @llvm.amdgcn.workitem.id.y()
74  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0)
75  store i32 %v, i32 addrspace(1)* %out
76  ret void
77}
78
79; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi:
80; GFX10PLUS-NOT: v_readfirstlane_b32
81; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
82define amdgpu_kernel void @v_permlane16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
83  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0)
84  store i32 %v, i32 addrspace(1)* %out
85  ret void
86}
87
88; GCN-LABEL: {{^}}v_permlane16_b32_vss_bc:
89; GFX10PLUS-NOT: v_readfirstlane_b32
90; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
91define amdgpu_kernel void @v_permlane16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
92  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1)
93  store i32 %v, i32 addrspace(1)* %out
94  ret void
95}
96
97; GCN-LABEL: {{^}}v_permlane16_b32_vss_fi_bc:
98; GFX10PLUS-NOT: v_readfirstlane_b32
99; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
100define amdgpu_kernel void @v_permlane16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
101  %v = call i32 @llvm.amdgcn.permlane16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1)
102  store i32 %v, i32 addrspace(1)* %out
103  ret void
104}
105
106; GCN-LABEL: {{^}}v_permlanex16_b32_vss:
107; GFX10PLUS-NOT: v_readfirstlane_b32
108; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
109define amdgpu_kernel void @v_permlanex16_b32_vss(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
110  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 0)
111  store i32 %v, i32 addrspace(1)* %out
112  ret void
113}
114
115; GCN-LABEL: {{^}}v_permlanex16_b32_vii:
116; GFX10PLUS-NOT: v_readfirstlane_b32
117; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, 1, 2{{$}}
118define amdgpu_kernel void @v_permlanex16_b32_vii(i32 addrspace(1)* %out, i32 %src0) #1 {
119  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 1, i32 2, i1 0, i1 0)
120  store i32 %v, i32 addrspace(1)* %out
121  ret void
122}
123
124; GCN-LABEL: {{^}}v_permlanex16_b32_vll:
125; FIXME-GFX10PLUS: It is allowed to have both immediates as literals
126; GFX10PLUS-DAG: s_movk_i32 [[SRC1:s[0-9]+]], 0x1234
127; GFX10PLUS-DAG: s_mov_b32 [[SRC2:s[0-9]+]], 0xc1d1
128; GFX10PLUS-NOT: v_readfirstlane_b32
129; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
130define amdgpu_kernel void @v_permlanex16_b32_vll(i32 addrspace(1)* %out, i32 %src0) #1 {
131  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 4660, i32 49617, i1 0, i1 0)
132  store i32 %v, i32 addrspace(1)* %out
133  ret void
134}
135
136; GCN-LABEL: {{^}}v_permlanex16_b32_vvv:
137; GFX10-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
138; GFX10-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v1
139; GFX11-DAG: v_and_b32_e32 [[VSRC1:v[0-9]+]],
140; GFX11-DAG: v_bfe_u32 [[VSRC2:v[0-9]+]],
141; GFX11-DAG: v_readfirstlane_b32 [[SRC1:s[0-9]+]], [[VSRC1]]
142; GFX11-DAG: v_readfirstlane_b32 [[SRC2:s[0-9]+]], [[VSRC2]]
143; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], [[SRC2]]{{$}}
144define amdgpu_kernel void @v_permlanex16_b32_vvv(i32 addrspace(1)* %out, i32 %src0) #1 {
145  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
146  %tidy = call i32 @llvm.amdgcn.workitem.id.y()
147  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %tidy, i1 0, i1 0)
148  store i32 %v, i32 addrspace(1)* %out
149  ret void
150}
151
152; GCN-LABEL: {{^}}v_permlanex16_b32_vvs:
153; GFX10PLUS-NOT: v_readfirstlane_b32
154; GFX10PLUS: v_readfirstlane_b32 [[SRC1:s[0-9]+]], v0
155; GFX10PLUS-NOT: v_readfirstlane_b32
156; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, [[SRC1]], s{{[0-9]+}}{{$}}
157define amdgpu_kernel void @v_permlanex16_b32_vvs(i32 addrspace(1)* %out, i32 %src0, i32 %src2) #1 {
158  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
159  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %tidx, i32 %src2, i1 0, i1 0)
160  store i32 %v, i32 addrspace(1)* %out
161  ret void
162}
163
164; GCN-LABEL: {{^}}v_permlanex16_b32_vsv:
165; GFX10PLUS-NOT: v_readfirstlane_b32
166; GFX10PLUS: v_readfirstlane_b32 [[SRC2:s[0-9]+]], v{{[0-9]+}}
167; GFX10PLUS-NOT: v_readfirstlane_b32
168; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, [[SRC2]]{{$}}
169define amdgpu_kernel void @v_permlanex16_b32_vsv(i32 addrspace(1)* %out, i32 %src0, i32 %src1) #1 {
170  %tidy = call i32 @llvm.amdgcn.workitem.id.y()
171  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %tidy, i1 0, i1 0)
172  store i32 %v, i32 addrspace(1)* %out
173  ret void
174}
175
176; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi:
177; GFX10PLUS-NOT: v_readfirstlane_b32
178; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
179define amdgpu_kernel void @v_permlanex16_b32_vss_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
180  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 0)
181  store i32 %v, i32 addrspace(1)* %out
182  ret void
183}
184
185; GCN-LABEL: {{^}}v_permlanex16_b32_vss_bc:
186; GFX10PLUS-NOT: v_readfirstlane_b32
187; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
188define amdgpu_kernel void @v_permlanex16_b32_vss_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
189  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 0, i1 1)
190  store i32 %v, i32 addrspace(1)* %out
191  ret void
192}
193
194; GCN-LABEL: {{^}}v_permlanex16_b32_vss_fi_bc:
195; GFX10PLUS-NOT: v_readfirstlane_b32
196; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v{{[0-9]+}}, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
197define amdgpu_kernel void @v_permlanex16_b32_vss_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
198  %v = call i32 @llvm.amdgcn.permlanex16(i32 %src0, i32 %src0, i32 %src1, i32 %src2, i1 1, i1 1)
199  store i32 %v, i32 addrspace(1)* %out
200  ret void
201}
202
203; GCN-LABEL: {{^}}v_permlane16_b32_tid_tid:
204; GFX10PLUS: v_permlane16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
205define amdgpu_kernel void @v_permlane16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
206  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
207  %v = call i32 @llvm.amdgcn.permlane16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
208  store i32 %v, i32 addrspace(1)* %out
209  ret void
210}
211
212; GCN-LABEL: {{^}}v_permlane16_b32_undef_tid:
213; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
214define amdgpu_kernel void @v_permlane16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
215  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
216  %v = call i32 @llvm.amdgcn.permlane16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
217  store i32 %v, i32 addrspace(1)* %out
218  ret void
219}
220
221; GCN-LABEL: {{^}}v_permlane16_b32_i_tid:
222; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039
223; GFX10PLUS: v_permlane16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
224define amdgpu_kernel void @v_permlane16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
225  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
226  %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
227  store i32 %v, i32 addrspace(1)* %out
228  ret void
229}
230
231; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi:
232; GFX10PLUS-NOT: 0x3039
233; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
234define amdgpu_kernel void @v_permlane16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
235  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
236  %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0)
237  store i32 %v, i32 addrspace(1)* %out
238  ret void
239}
240
241; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_bc:
242; GFX10PLUS-NOT: 0x3039
243; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
244define amdgpu_kernel void @v_permlane16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
245  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
246  %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1)
247  store i32 %v, i32 addrspace(1)* %out
248  ret void
249}
250
251; GCN-LABEL: {{^}}v_permlane16_b32_i_tid_fi_bc:
252; GFX10PLUS-NOT: 0x3039
253; GFX10PLUS: v_permlane16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
254define amdgpu_kernel void @v_permlane16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
255  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
256  %v = call i32 @llvm.amdgcn.permlane16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1)
257  store i32 %v, i32 addrspace(1)* %out
258  ret void
259}
260
261; GCN-LABEL: {{^}}v_permlanex16_b32_tid_tid:
262; GFX10PLUS: v_permlanex16_b32 v0, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
263define amdgpu_kernel void @v_permlanex16_b32_tid_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
264  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
265  %v = call i32 @llvm.amdgcn.permlanex16(i32 %tidx, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
266  store i32 %v, i32 addrspace(1)* %out
267  ret void
268}
269
270; GCN-LABEL: {{^}}v_permlanex16_b32_undef_tid:
271; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
272define amdgpu_kernel void @v_permlanex16_b32_undef_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
273  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
274  %v = call i32 @llvm.amdgcn.permlanex16(i32 undef, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
275  store i32 %v, i32 addrspace(1)* %out
276  ret void
277}
278
279; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid:
280; GFX10PLUS: v_{{(dual_)?}}mov_b32{{(_e32)?}} [[OLD:v[0-9]+]], 0x3039
281; GFX10PLUS: v_permlanex16_b32 [[OLD]], v0, s{{[0-9]+}}, s{{[0-9]+}}{{$}}
282define amdgpu_kernel void @v_permlanex16_b32_i_tid(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
283  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
284  %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 0)
285  store i32 %v, i32 addrspace(1)* %out
286  ret void
287}
288
289; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi:
290; GFX10PLUS-NOT: 0x3039
291; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,0|1,0,0,1}}]{{$}}
292define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
293  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
294  %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 0)
295  store i32 %v, i32 addrspace(1)* %out
296  ret void
297}
298
299; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_bc:
300; GFX10PLUS-NOT: 0x3039
301; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{0,1|0,1,0,0}}]{{$}}
302define amdgpu_kernel void @v_permlanex16_b32_i_tid_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
303  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
304  %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 0, i1 1)
305  store i32 %v, i32 addrspace(1)* %out
306  ret void
307}
308
309; GCN-LABEL: {{^}}v_permlanex16_b32_i_tid_fi_bc:
310; GFX10PLUS-NOT: 0x3039
311; GFX10PLUS: v_permlanex16_b32 v{{[0-9]+}}, v0, s{{[0-9]+}}, s{{[0-9]+}} op_sel:[{{1,1|1,1,0,1}}]{{$}}
312define amdgpu_kernel void @v_permlanex16_b32_i_tid_fi_bc(i32 addrspace(1)* %out, i32 %src0, i32 %src1, i32 %src2) #1 {
313  %tidx = call i32 @llvm.amdgcn.workitem.id.x()
314  %v = call i32 @llvm.amdgcn.permlanex16(i32 12345, i32 %tidx, i32 %src1, i32 %src2, i1 1, i1 1)
315  store i32 %v, i32 addrspace(1)* %out
316  ret void
317}
318
319attributes #0 = { nounwind readnone convergent }
320attributes #1 = { nounwind }
321