1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -march=amdgcn -mcpu=fiji -mattr=+fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
3; RUN: llc -march=amdgcn -mcpu=fiji -mattr=-fp16-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
4
5; Make sure fdiv is promoted to f32.
6
7; GCN-LABEL: {{^}}v_fdiv_f16
8; SI:     v_cvt_f32_f16
9; SI:     v_cvt_f32_f16
10; SI:     v_div_scale_f32
11; SI-DAG: v_div_scale_f32
12; SI-DAG: v_rcp_f32
13; SI:     v_fma_f32
14; SI:     v_fma_f32
15; SI:     v_mul_f32
16; SI:     v_fma_f32
17; SI:     v_fma_f32
18; SI:     v_fma_f32
19; SI:     v_div_fmas_f32
20; SI:     v_div_fixup_f32
21; SI:     v_cvt_f16_f32
22
23; VI: flat_load_ushort [[LHS:v[0-9]+]]
24; VI: flat_load_ushort [[RHS:v[0-9]+]]
25
26; VI-DAG: v_cvt_f32_f16_e32 [[CVT_LHS:v[0-9]+]], [[LHS]]
27; VI-DAG: v_cvt_f32_f16_e32 [[CVT_RHS:v[0-9]+]], [[RHS]]
28
29; VI-DAG: v_rcp_f32_e32 [[RCP_RHS:v[0-9]+]], [[CVT_RHS]]
30; VI: v_mul_f32_e32 [[MUL:v[0-9]+]], [[RCP_RHS]], [[CVT_LHS]]
31; VI: v_cvt_f16_f32_e32 [[CVT_BACK:v[0-9]+]], [[MUL]]
32; VI: v_div_fixup_f16 [[RESULT:v[0-9]+]], [[CVT_BACK]], [[RHS]], [[LHS]]
33; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
34define void @v_fdiv_f16(
35    half addrspace(1)* %r,
36    half addrspace(1)* %a,
37    half addrspace(1)* %b) #0 {
38entry:
39  %tid = call i32 @llvm.amdgcn.workitem.id.x()
40  %tid.ext = sext i32 %tid to i64
41  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
42  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
43  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
44  %a.val = load volatile half, half addrspace(1)* %gep.a
45  %b.val = load volatile half, half addrspace(1)* %gep.b
46  %r.val = fdiv half %a.val, %b.val
47  store half %r.val, half addrspace(1)* %gep.r
48  ret void
49}
50
51; GCN-LABEL: {{^}}v_rcp_f16:
52; VI: flat_load_ushort [[VAL:v[0-9]+]]
53; VI-NOT: [[VAL]]
54; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
55; VI-NOT: [[RESULT]]
56; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
57define void @v_rcp_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
58entry:
59  %tid = call i32 @llvm.amdgcn.workitem.id.x()
60  %tid.ext = sext i32 %tid to i64
61  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
62  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
63  %b.val = load volatile half, half addrspace(1)* %gep.b
64  %r.val = fdiv half 1.0, %b.val
65  store half %r.val, half addrspace(1)* %gep.r
66  ret void
67}
68
69; GCN-LABEL: {{^}}v_rcp_f16_abs:
70; VI: flat_load_ushort [[VAL:v[0-9]+]]
71; VI-NOT: [[VAL]]
72; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], |[[VAL]]|
73; VI-NOT: [RESULT]]
74; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
75define void @v_rcp_f16_abs(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
76entry:
77  %tid = call i32 @llvm.amdgcn.workitem.id.x()
78  %tid.ext = sext i32 %tid to i64
79  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
80  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
81  %b.val = load volatile half, half addrspace(1)* %gep.b
82  %b.abs = call half @llvm.fabs.f16(half %b.val)
83  %r.val = fdiv half 1.0, %b.abs
84  store half %r.val, half addrspace(1)* %gep.r
85  ret void
86}
87
88; GCN-LABEL: {{^}}v_rcp_f16_arcp:
89; VI: flat_load_ushort [[VAL:v[0-9]+]]
90; VI-NOT: [[VAL]]
91; VI: v_rcp_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
92; VI-NOT: [[RESULT]]
93; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
94define void @v_rcp_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
95entry:
96  %tid = call i32 @llvm.amdgcn.workitem.id.x()
97  %tid.ext = sext i32 %tid to i64
98  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
99  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
100  %b.val = load volatile half, half addrspace(1)* %gep.b
101  %r.val = fdiv arcp half 1.0, %b.val
102  store half %r.val, half addrspace(1)* %gep.r
103  ret void
104}
105
106; GCN-LABEL: {{^}}v_rcp_f16_neg:
107; VI: flat_load_ushort [[VAL:v[0-9]+]]
108; VI-NOT: [[VAL]]
109; VI: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[VAL]]
110; VI-NOT: [RESULT]]
111; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
112define void @v_rcp_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
113entry:
114  %tid = call i32 @llvm.amdgcn.workitem.id.x()
115  %tid.ext = sext i32 %tid to i64
116  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
117  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
118  %b.val = load volatile half, half addrspace(1)* %gep.b
119  %r.val = fdiv half -1.0, %b.val
120  store half %r.val, half addrspace(1)* %gep.r
121  ret void
122}
123
124; GCN-LABEL: {{^}}v_rsq_f16:
125; VI: flat_load_ushort [[VAL:v[0-9]+]]
126; VI-NOT: [[VAL]]
127; VI: v_rsq_f16_e32 [[RESULT:v[0-9]+]], [[VAL]]
128; VI-NOT: [RESULT]]
129; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
130define void @v_rsq_f16(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
131entry:
132  %tid = call i32 @llvm.amdgcn.workitem.id.x()
133  %tid.ext = sext i32 %tid to i64
134  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
135  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
136  %b.val = load volatile half, half addrspace(1)* %gep.b
137  %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
138  %r.val = fdiv half 1.0, %b.sqrt
139  store half %r.val, half addrspace(1)* %gep.r
140  ret void
141}
142
143; GCN-LABEL: {{^}}v_rsq_f16_neg:
144; VI: flat_load_ushort [[VAL:v[0-9]+]]
145; VI-NOT: [[VAL]]
146; VI: v_sqrt_f16_e32 [[SQRT:v[0-9]+]], [[VAL]]
147; VI-NEXT: v_rcp_f16_e64 [[RESULT:v[0-9]+]], -[[SQRT]]
148; VI-NOT: [RESULT]]
149; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
150define void @v_rsq_f16_neg(half addrspace(1)* %r, half addrspace(1)* %b) #0 {
151entry:
152  %tid = call i32 @llvm.amdgcn.workitem.id.x()
153  %tid.ext = sext i32 %tid to i64
154  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
155  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
156  %b.val = load volatile half, half addrspace(1)* %gep.b
157  %b.sqrt = call half @llvm.sqrt.f16(half %b.val)
158  %r.val = fdiv half -1.0, %b.sqrt
159  store half %r.val, half addrspace(1)* %gep.r
160  ret void
161}
162
163; GCN-LABEL: {{^}}v_fdiv_f16_arcp:
164; VI: flat_load_ushort [[LHS:v[0-9]+]]
165; VI: flat_load_ushort [[RHS:v[0-9]+]]
166
167; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
168; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
169
170; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
171define void @v_fdiv_f16_arcp(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #0 {
172entry:
173  %tid = call i32 @llvm.amdgcn.workitem.id.x()
174  %tid.ext = sext i32 %tid to i64
175  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
176  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
177  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
178  %a.val = load volatile half, half addrspace(1)* %gep.a
179  %b.val = load volatile half, half addrspace(1)* %gep.b
180  %r.val = fdiv arcp half %a.val, %b.val
181  store half %r.val, half addrspace(1)* %gep.r
182  ret void
183}
184
185; GCN-LABEL: {{^}}v_fdiv_f16_unsafe:
186; VI: flat_load_ushort [[LHS:v[0-9]+]]
187; VI: flat_load_ushort [[RHS:v[0-9]+]]
188
189; VI: v_rcp_f16_e32 [[RCP:v[0-9]+]], [[RHS]]
190; VI: v_mul_f16_e32 [[RESULT:v[0-9]+]], [[RCP]], [[LHS]]
191
192; VI: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, [[RESULT]]
193define void @v_fdiv_f16_unsafe(half addrspace(1)* %r, half addrspace(1)* %a, half addrspace(1)* %b) #2 {
194entry:
195  %tid = call i32 @llvm.amdgcn.workitem.id.x()
196  %tid.ext = sext i32 %tid to i64
197  %gep.a = getelementptr inbounds half, half addrspace(1)* %a, i64 %tid.ext
198  %gep.b = getelementptr inbounds half, half addrspace(1)* %b, i64 %tid.ext
199  %gep.r = getelementptr inbounds half, half addrspace(1)* %r, i64 %tid.ext
200  %a.val = load volatile half, half addrspace(1)* %gep.a
201  %b.val = load volatile half, half addrspace(1)* %gep.b
202  %r.val = fdiv half %a.val, %b.val
203  store half %r.val, half addrspace(1)* %gep.r
204  ret void
205}
206
207declare i32 @llvm.amdgcn.workitem.id.x() #1
208declare half @llvm.sqrt.f16(half) #1
209declare half @llvm.fabs.f16(half) #1
210
211attributes #0 = { nounwind }
212attributes #1 = { nounwind readnone }
213attributes #2 = { nounwind "unsafe-fp-math"="true" }
214