1; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
2
3; GCN-LABEL: {{^}}lsh8_or_and:
4; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x6050400
5; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
6define amdgpu_kernel void @lsh8_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
7bb:
8  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
9  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
10  %tmp = load i32, i32 addrspace(1)* %gep, align 4
11  %tmp2 = shl i32 %tmp, 8
12  %tmp3 = and i32 %arg1, 255
13  %tmp4 = or i32 %tmp2, %tmp3
14  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
15  ret void
16}
17
18; GCN-LABEL: {{^}}lsr24_or_and:
19; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
20; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
21define amdgpu_kernel void @lsr24_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
22bb:
23  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
24  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
25  %tmp = load i32, i32 addrspace(1)* %gep, align 4
26  %tmp2 = lshr i32 %tmp, 24
27  %tmp3 = and i32 %arg1, 4294967040 ; 0xffffff00
28  %tmp4 = or i32 %tmp2, %tmp3
29  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
30  ret void
31}
32
33; GCN-LABEL: {{^}}and_or_lsr24:
34; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7060503
35; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
36define amdgpu_kernel void @and_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
37bb:
38  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
39  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
40  %tmp = load i32, i32 addrspace(1)* %gep, align 4
41  %tmp2 = and i32 %tmp, 4294967040 ; 0xffffff00
42  %tmp3 = lshr i32 %arg1, 24
43  %tmp4 = or i32 %tmp2, %tmp3
44  %tmp5 = xor i32 %tmp4, -2147483648
45  store i32 %tmp5, i32 addrspace(1)* %gep, align 4
46  ret void
47}
48
49; GCN-LABEL: {{^}}and_or_and:
50; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020500
51; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
52define amdgpu_kernel void @and_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
53bb:
54  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
55  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
56  %tmp = load i32, i32 addrspace(1)* %gep, align 4
57  %tmp2 = and i32 %tmp, -16711936
58  %tmp3 = and i32 %arg1, 16711935
59  %tmp4 = or i32 %tmp2, %tmp3
60  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
61  ret void
62}
63
64; GCN-LABEL: {{^}}lsh8_or_lsr24:
65; GCN: v_alignbit_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, 24
66define amdgpu_kernel void @lsh8_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
67bb:
68  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
69  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
70  %tmp = load i32, i32 addrspace(1)* %gep, align 4
71  %tmp2 = shl i32 %tmp, 8
72  %tmp3 = lshr i32 %arg1, 24
73  %tmp4 = or i32 %tmp2, %tmp3
74  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
75  ret void
76}
77
78; GCN-LABEL: {{^}}lsh16_or_lsr24:
79; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x5040c03
80; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
81define amdgpu_kernel void @lsh16_or_lsr24(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
82bb:
83  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
84  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
85  %tmp = load i32, i32 addrspace(1)* %gep, align 4
86  %tmp2 = shl i32 %tmp, 16
87  %tmp3 = lshr i32 %arg1, 24
88  %tmp4 = or i32 %tmp2, %tmp3
89  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
90  ret void
91}
92
93; GCN-LABEL: {{^}}and_xor_and:
94; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
95; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
96define amdgpu_kernel void @and_xor_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
97bb:
98  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
99  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
100  %tmp = load i32, i32 addrspace(1)* %gep, align 4
101  %tmp2 = and i32 %tmp, -16776961
102  %tmp3 = and i32 %arg1, 16776960
103  %tmp4 = xor i32 %tmp2, %tmp3
104  store i32 %tmp4, i32 addrspace(1)* %gep, align 4
105  ret void
106}
107
108; GCN-LABEL: {{^}}and_or_or_and:
109; GCN: s_and_b32 s{{[0-9]+}}, s{{[0-9]+}}, 0xff00
110; GCN: s_or_b32 [[SREG:s[0-9]+]], s{{[0-9]+}}, 0xffff0000
111; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, v{{[0-9]+}}
112; GCN: v_or_b32_e32 v{{[0-9]+}}, [[SREG]], [[VREG]]
113; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
114define amdgpu_kernel void @and_or_or_and(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
115bb:
116  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
117  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
118  %tmp = load i32, i32 addrspace(1)* %gep, align 4
119  %and = and i32 %tmp, 16711935     ; 0x00ff00ff
120  %tmp1 = and i32 %arg1, 4294967040 ; 0xffffff00
121  %tmp2 = or i32 %tmp1, -65536
122  %tmp3 = or i32 %tmp2, %and
123  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
124  ret void
125}
126
127; GCN-LABEL: {{^}}and_or_and_shl:
128; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
129; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
130define amdgpu_kernel void @and_or_and_shl(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
131bb:
132  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
133  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
134  %tmp = load i32, i32 addrspace(1)* %gep, align 4
135  %tmp2 = shl i32 %tmp, 16
136  %tmp3 = and i32 %arg1, 65535
137  %tmp4 = or i32 %tmp2, %tmp3
138  %and = and i32 %tmp4, 4278190335
139  store i32 %and, i32 addrspace(1)* %gep, align 4
140  ret void
141}
142
143; GCN-LABEL: {{^}}or_and_or:
144; GCN: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x7020104
145; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
146define amdgpu_kernel void @or_and_or(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
147bb:
148  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
149  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
150  %tmp = load i32, i32 addrspace(1)* %gep, align 4
151  %or1 = or i32 %tmp, 16776960    ; 0x00ffff00
152  %or2 = or i32 %arg1, 4278190335 ; 0xff0000ff
153  %and = and i32 %or1, %or2
154  store i32 %and, i32 addrspace(1)* %gep, align 4
155  ret void
156}
157
158; GCN-LABEL: {{^}}known_ffff0500:
159; GCN: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
160; GCN: s_and_b32 [[SREG:s[0-9]+]], [[SREG]], 0xff00
161; GCN: s_or_b32 [[SREG]], [[SREG]], 0xffff0000
162; GCN: v_and_b32_e32 [[VREG:v[0-9]+]], 0xff00ff, [[VREG]]
163; GCN: v_or_b32_e32 [[VREG]], [[SREG]], [[VREG]]
164; GCN: store_dword v[{{[0-9:]+}}], [[VREG]]{{$}}
165; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
166; FIXME here should have been "v_perm_b32" with 0xffff0500 mask.
167define amdgpu_kernel void @known_ffff0500(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
168bb:
169  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
170  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
171  %load = load i32, i32 addrspace(1)* %gep, align 4
172  %mask1 = or i32 %arg1, 32768 ; 0x8000
173  %mask2 = or i32 %load, 4
174  %and = and i32 %mask2, 16711935     ; 0x00ff00ff
175  %tmp1 = and i32 %mask1, 4294967040 ; 0xffffff00
176  %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
177  %tmp3 = or i32 %tmp2, %and
178  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
179  %v = and i32 %tmp3, 4294934532 ; 0xffff8004
180  store i32 %v, i32 addrspace(1)* %arg, align 4
181  ret void
182}
183
184; GCN-LABEL: {{^}}known_050c0c00:
185; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x50c0c00
186; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 4{{$}}
187; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
188; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
189define amdgpu_kernel void @known_050c0c00(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
190bb:
191  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
192  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
193  %tmp = load i32, i32 addrspace(1)* %gep, align 4
194  %tmp2 = shl i32 %tmp, 16
195  %mask = or i32 %arg1, 4
196  %tmp3 = and i32 %mask, 65535
197  %tmp4 = or i32 %tmp2, %tmp3
198  %and = and i32 %tmp4, 4278190335
199  store i32 %and, i32 addrspace(1)* %gep, align 4
200  %v = and i32 %and, 16776964
201  store i32 %v, i32 addrspace(1)* %arg, align 4
202  ret void
203}
204
205; GCN-LABEL: {{^}}known_ffff8004:
206; GCN-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0xffff0500
207; GCN-DAG: v_mov_b32_e32 [[RES:v[0-9]+]], 0xffff8004
208; GCN: v_perm_b32 v{{[0-9]+}}, {{[vs][0-9]+}}, {{[vs][0-9]+}}, [[MASK]]
209; GCN: store_dword v[{{[0-9:]+}}], [[RES]]{{$}}
210define amdgpu_kernel void @known_ffff8004(i32 addrspace(1)* nocapture %arg, i32 %arg1) {
211bb:
212  %id = tail call i32 @llvm.amdgcn.workitem.id.x()
213  %gep = getelementptr i32, i32 addrspace(1)* %arg, i32 %id
214  %load = load i32, i32 addrspace(1)* %gep, align 4
215  %mask1 = or i32 %arg1, 4
216  %mask2 = or i32 %load, 32768 ; 0x8000
217  %and = and i32 %mask1, 16711935     ; 0x00ff00ff
218  %tmp1 = and i32 %mask2, 4294967040 ; 0xffffff00
219  %tmp2 = or i32 %tmp1, 4294901760   ; 0xffff0000
220  %tmp3 = or i32 %tmp2, %and
221  store i32 %tmp3, i32 addrspace(1)* %gep, align 4
222  %v = and i32 %tmp3, 4294934532 ; 0xffff8004
223  store i32 %v, i32 addrspace(1)* %arg, align 4
224  ret void
225}
226
227declare i32 @llvm.amdgcn.workitem.id.x()
228