1; RUN: llc -march=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefix=GFX9 %s
3
4; GCN-LABEL: name:            uniform_vec_0_i16
5; GCN: S_LSHL_B32
6define amdgpu_kernel void @uniform_vec_0_i16(i32 addrspace(1)* %out, i16 %a) {
7  %tmp = insertelement <2 x i16> undef, i16 0, i32 0
8  %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1
9  %val = bitcast <2 x i16> %vec to i32
10  store i32 %val, i32 addrspace(1)* %out, align 4
11  ret void
12}
13
14; GCN-LABEL: name:            divergent_vec_0_i16
15; GCN: V_LSHLREV_B32_e64
16define i32 @divergent_vec_0_i16(i16 %a) {
17  %tmp = insertelement <2 x i16> undef, i16 0, i32 0
18  %vec = insertelement <2 x i16> %tmp, i16 %a, i32 1
19  %val = bitcast <2 x i16> %vec to i32
20  ret i32 %val
21}
22
23; GCN-LABEL: name:            uniform_vec_i16_0
24; GCN: S_AND_B32
25define amdgpu_kernel void @uniform_vec_i16_0(i32 addrspace(1)* %out, i16 %a) {
26  %tmp = insertelement <2 x i16> undef, i16 %a, i32 0
27  %vec = insertelement <2 x i16> %tmp, i16 0, i32 1
28  %val = bitcast <2 x i16> %vec to i32
29  store i32 %val, i32 addrspace(1)* %out, align 4
30  ret void
31}
32
33; GCN-LABEL: name:            divergent_vec_i16_0
34; GCN: V_AND_B32_e64
35define i32 @divergent_vec_i16_0(i16 %a) {
36  %tmp = insertelement <2 x i16> undef, i16 %a, i32 0
37  %vec = insertelement <2 x i16> %tmp, i16 0, i32 1
38  %val = bitcast <2 x i16> %vec to i32
39  ret i32 %val
40}
41
42; GCN-LABEL: name:            uniform_vec_f16_0
43; GCN: S_AND_B32
44define amdgpu_kernel void @uniform_vec_f16_0(float addrspace(1)* %out, half %a) {
45  %tmp = insertelement <2 x half> undef, half %a, i32 0
46  %vec = insertelement <2 x half> %tmp, half 0.0, i32 1
47  %val = bitcast <2 x half> %vec to float
48  store float %val, float addrspace(1)* %out, align 4
49  ret void
50}
51
52; GCN-LABEL: name:            divergent_vec_f16_0
53; GCN: V_CVT_F16_F32_e64 0, %0
54; GCN: COPY %1
55
56; GFX9-LABEL: name:            divergent_vec_f16_0
57; GFX9: V_AND_B32_e64
58define float @divergent_vec_f16_0(half %a) {
59  %tmp = insertelement <2 x half> undef, half %a, i32 0
60  %vec = insertelement <2 x half> %tmp, half 0.0, i32 1
61  %val = bitcast <2 x half> %vec to float
62  ret float %val
63}
64
65; GCN-LABEL: name:            uniform_vec_i16_LL
66; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535
67; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]]
68; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16
69; GCN:  %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]]
70; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]]
71
72; GFX9-LABEL: name:            uniform_vec_i16_LL
73; GFX9: S_PACK_LL_B32_B16
74define amdgpu_kernel void @uniform_vec_i16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) {
75  %val0 = load volatile i32, i32 addrspace(4)* %in0
76  %val1 = load volatile i32, i32 addrspace(4)* %in1
77  %lo = trunc i32 %val0 to i16
78  %hi = trunc i32 %val1 to i16
79  %vec.0 = insertelement <2 x i16> undef, i16 %lo, i32 0
80  %vec.1 = insertelement <2 x i16> %vec.0, i16 %hi, i32 1
81  %vec.i32 = bitcast <2 x i16> %vec.1 to i32
82  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
83  ret void
84}
85
86; GCN-LABEL: name:            divergent_vec_i16_LL
87; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16
88; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]], %1, implicit $exec
89; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535
90; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %0, killed %[[IMM]], implicit $exec
91; GCN: V_OR_B32_e64 killed %[[AND]], killed %[[SHL]], implicit $exec
92
93; GFX9-LABEL: name:            divergent_vec_i16_LL
94; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535
95; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]]
96; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]]
97define i32 @divergent_vec_i16_LL(i16 %a, i16 %b) {
98  %tmp = insertelement <2 x i16> undef, i16 %a, i32 0
99  %vec = insertelement <2 x i16> %tmp, i16 %b, i32 1
100  %val = bitcast <2 x i16> %vec to i32
101  ret i32 %val
102}
103
104; GCN-LABEL: name:            uniform_vec_i16_LH
105; GCN-DAG: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535
106; GCN-DAG: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]]
107; GCN-DAG: %[[NEG:[0-9]+]]:sreg_32 = S_MOV_B32 -65536
108; GCN-DAG: %[[ANDN:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[NEG]]
109; GCN: S_OR_B32 killed %[[AND]], killed %[[ANDN]]
110
111; GFX9-LABEL: name:            uniform_vec_i16_LH
112; GFX9: S_PACK_LH_B32_B16
113define amdgpu_kernel void @uniform_vec_i16_LH(i32 addrspace(1)* %out, i16 %a, i32 %b) {
114  %shift = lshr i32 %b, 16
115  %tr = trunc i32 %shift to i16
116  %tmp = insertelement <2 x i16> undef, i16 %a, i32 0
117  %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1
118  %val = bitcast <2 x i16> %vec to i32
119  store i32 %val, i32 addrspace(1)* %out, align 4
120  ret void
121}
122
123; GCN-LABEL: name:            divergent_vec_i16_LH
124; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535
125; GCN: V_BFI_B32_e64 killed %[[IMM]]
126define i32 @divergent_vec_i16_LH(i16 %a, i32 %b) {
127  %shift = lshr i32 %b, 16
128  %tr = trunc i32 %shift to i16
129  %tmp = insertelement <2 x i16> undef, i16 %a, i32 0
130  %vec = insertelement <2 x i16> %tmp, i16 %tr, i32 1
131  %val = bitcast <2 x i16> %vec to i32
132  ret i32 %val
133}
134
135; GCN-LABEL: name:            uniform_vec_i16_HH
136; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16
137; GCN:  %[[SHR:[0-9]+]]:sreg_32 = S_LSHR_B32 killed %{{[0-9]+}}, killed %[[SHIFT]]
138; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536
139; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]]
140; GCN: S_OR_B32 killed %[[SHR]], killed %[[AND]]
141
142; GFX9-LABEL: name:            uniform_vec_i16_HH
143; GFX9: S_PACK_HH_B32_B16
144define amdgpu_kernel void @uniform_vec_i16_HH(i32 addrspace(1)* %out, i32 %a, i32 %b) {
145  %shift_a = lshr i32 %a, 16
146  %tr_a = trunc i32 %shift_a to i16
147  %shift_b = lshr i32 %b, 16
148  %tr_b = trunc i32 %shift_b to i16
149  %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0
150  %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1
151  %val = bitcast <2 x i16> %vec to i32
152  store i32 %val, i32 addrspace(1)* %out, align 4
153  ret void
154}
155
156; GCN-LABEL: name:            divergent_vec_i16_HH
157; GCN: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 killed %{{[0-9]+}}, %0, implicit $exec
158; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 -65536
159; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 %1, killed %[[IMM]], implicit $exec
160; GCN: V_OR_B32_e64 killed %[[SHR]], killed %[[AND]], implicit $exec
161
162; GFX9-LABEL: name:            divergent_vec_i16_HH
163; GFX9: %[[SHR:[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %0
164; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -65536, implicit $exec
165; GFX9: V_AND_OR_B32_e64 %1, killed %[[IMM]], killed %[[SHR]]
166define i32 @divergent_vec_i16_HH(i32 %a, i32 %b) {
167  %shift_a = lshr i32 %a, 16
168  %tr_a = trunc i32 %shift_a to i16
169  %shift_b = lshr i32 %b, 16
170  %tr_b = trunc i32 %shift_b to i16
171  %tmp = insertelement <2 x i16> undef, i16 %tr_a, i32 0
172  %vec = insertelement <2 x i16> %tmp, i16 %tr_b, i32 1
173  %val = bitcast <2 x i16> %vec to i32
174  ret i32 %val
175}
176
177; GCN-LABEL: name:            uniform_vec_f16_LL
178; GCN: %[[IMM:[0-9]+]]:sreg_32 = S_MOV_B32 65535
179; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %{{[0-9]+}}, killed %[[IMM]]
180; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16
181; GCN:  %[[SHL:[0-9]+]]:sreg_32 = S_LSHL_B32 killed %{{[0-9]+}}, killed %[[SHIFT]]
182; GCN: S_OR_B32 killed %[[AND]], killed %[[SHL]]
183
184; GFX9-LABEL: name:            uniform_vec_f16_LL
185; GFX9: S_PACK_LL_B32_B16
186define amdgpu_kernel void @uniform_vec_f16_LL(i32 addrspace(4)* %in0, i32 addrspace(4)* %in1) {
187  %val0 = load volatile i32, i32 addrspace(4)* %in0
188  %val1 = load volatile i32, i32 addrspace(4)* %in1
189  %lo.i = trunc i32 %val0 to i16
190  %hi.i = trunc i32 %val1 to i16
191  %lo = bitcast i16 %lo.i to half
192  %hi = bitcast i16 %hi.i to half
193  %vec.0 = insertelement <2 x half> undef, half %lo, i32 0
194  %vec.1 = insertelement <2 x half> %vec.0, half %hi, i32 1
195  %vec.i32 = bitcast <2 x half> %vec.1 to i32
196
197  call void asm sideeffect "; use $0", "s"(i32 %vec.i32) #0
198  ret void
199}
200
201; GCN-LABEL: name:            divergent_vec_f16_LL
202; GCN: %[[SHIFT:[0-9]+]]:sreg_32 = S_MOV_B32 16
203; GCN: %[[SHL:[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 killed %[[SHIFT]]
204; GCN: V_OR_B32_e64 killed %{{[0-9]+}}, killed %[[SHL]], implicit $exec
205
206; GFX9-LABEL: name:            divergent_vec_f16_LL
207; GFX9: %[[IMM:[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535
208; GFX9: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[IMM]]
209; GFX9: V_LSHL_OR_B32_e64 %{{[0-9]+}}, 16, killed %[[AND]]
210define float @divergent_vec_f16_LL(half %a, half %b) {
211  %tmp = insertelement <2 x half> undef, half %a, i32 0
212  %vec = insertelement <2 x half> %tmp, half %b, i32 1
213  %val = bitcast <2 x half> %vec to float
214  ret float %val
215}
216