1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
3;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
5
6define float @v_exp_f32(float %arg0) {
7; GCN-LABEL: v_exp_f32:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
11; GCN-NEXT:    v_exp_f32_e32 v0, v0
12; GCN-NEXT:    s_setpc_b64 s[30:31]
13  %result = call float @llvm.exp.f32(float %arg0)
14  ret float %result
15}
16
17define <2 x float> @v_exp_v2f32(<2 x float> %arg0) {
18; GCN-LABEL: v_exp_v2f32:
19; GCN:       ; %bb.0:
20; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GCN-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
22; GCN-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
23; GCN-NEXT:    v_exp_f32_e32 v0, v0
24; GCN-NEXT:    v_exp_f32_e32 v1, v1
25; GCN-NEXT:    s_setpc_b64 s[30:31]
26  %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0)
27  ret <2 x float> %result
28}
29
30define <3 x float> @v_exp_v3f32(<3 x float> %arg0) {
31; GCN-LABEL: v_exp_v3f32:
32; GCN:       ; %bb.0:
33; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
34; GCN-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
35; GCN-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
36; GCN-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v2
37; GCN-NEXT:    v_exp_f32_e32 v0, v0
38; GCN-NEXT:    v_exp_f32_e32 v1, v1
39; GCN-NEXT:    v_exp_f32_e32 v2, v2
40; GCN-NEXT:    s_setpc_b64 s[30:31]
41  %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0)
42  ret <3 x float> %result
43}
44
45define <4 x float> @v_exp_v4f32(<4 x float> %arg0) {
46; GCN-LABEL: v_exp_v4f32:
47; GCN:       ; %bb.0:
48; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49; GCN-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
50; GCN-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
51; GCN-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v2
52; GCN-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v3
53; GCN-NEXT:    v_exp_f32_e32 v0, v0
54; GCN-NEXT:    v_exp_f32_e32 v1, v1
55; GCN-NEXT:    v_exp_f32_e32 v2, v2
56; GCN-NEXT:    v_exp_f32_e32 v3, v3
57; GCN-NEXT:    s_setpc_b64 s[30:31]
58  %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0)
59  ret <4 x float> %result
60}
61
62define half @v_exp_f16(half %arg0) {
63; SI-LABEL: v_exp_f16:
64; SI:       ; %bb.0:
65; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
66; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
67; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
68; SI-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
69; SI-NEXT:    v_exp_f32_e32 v0, v0
70; SI-NEXT:    s_setpc_b64 s[30:31]
71;
72; VI-LABEL: v_exp_f16:
73; VI:       ; %bb.0:
74; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75; VI-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
76; VI-NEXT:    v_exp_f16_e32 v0, v0
77; VI-NEXT:    s_setpc_b64 s[30:31]
78;
79; GFX9-LABEL: v_exp_f16:
80; GFX9:       ; %bb.0:
81; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
82; GFX9-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
83; GFX9-NEXT:    v_exp_f16_e32 v0, v0
84; GFX9-NEXT:    s_setpc_b64 s[30:31]
85  %result = call half @llvm.exp.f16(half %arg0)
86  ret half %result
87}
88
89define <2 x half> @v_exp_v2f16(<2 x half> %arg0) {
90; SI-LABEL: v_exp_v2f16:
91; SI:       ; %bb.0:
92; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
93; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
94; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
95; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
96; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
97; SI-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
98; SI-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
99; SI-NEXT:    v_exp_f32_e32 v0, v0
100; SI-NEXT:    v_exp_f32_e32 v1, v1
101; SI-NEXT:    s_setpc_b64 s[30:31]
102;
103; VI-LABEL: v_exp_v2f16:
104; VI:       ; %bb.0:
105; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
106; VI-NEXT:    v_mov_b32_e32 v1, 0x3dc5
107; VI-NEXT:    v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
108; VI-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
109; VI-NEXT:    v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
110; VI-NEXT:    v_exp_f16_e32 v0, v0
111; VI-NEXT:    v_or_b32_e32 v0, v0, v1
112; VI-NEXT:    s_setpc_b64 s[30:31]
113;
114; GFX9-LABEL: v_exp_v2f16:
115; GFX9:       ; %bb.0:
116; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX9-NEXT:    s_movk_i32 s4, 0x3dc5
118; GFX9-NEXT:    v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
119; GFX9-NEXT:    v_exp_f16_e32 v1, v0
120; GFX9-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
121; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
122; GFX9-NEXT:    s_setpc_b64 s[30:31]
123  %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0)
124  ret <2 x half> %result
125}
126
127; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) {
128;   %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0)
129;   ret <3 x half> %result
130; }
131
132define <4 x half> @v_exp_v4f16(<4 x half> %arg0) {
133; SI-LABEL: v_exp_v4f16:
134; SI:       ; %bb.0:
135; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
137; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
138; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
139; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
140; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
141; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
142; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
143; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
144; SI-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
145; SI-NEXT:    v_mul_f32_e32 v1, 0x3fb8aa3b, v1
146; SI-NEXT:    v_mul_f32_e32 v2, 0x3fb8aa3b, v2
147; SI-NEXT:    v_mul_f32_e32 v3, 0x3fb8aa3b, v3
148; SI-NEXT:    v_exp_f32_e32 v0, v0
149; SI-NEXT:    v_exp_f32_e32 v1, v1
150; SI-NEXT:    v_exp_f32_e32 v2, v2
151; SI-NEXT:    v_exp_f32_e32 v3, v3
152; SI-NEXT:    s_setpc_b64 s[30:31]
153;
154; VI-LABEL: v_exp_v4f16:
155; VI:       ; %bb.0:
156; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
157; VI-NEXT:    v_mov_b32_e32 v3, 0x3dc5
158; VI-NEXT:    v_mul_f16_e32 v2, 0x3dc5, v1
159; VI-NEXT:    v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
160; VI-NEXT:    v_mul_f16_e32 v4, 0x3dc5, v0
161; VI-NEXT:    v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
162; VI-NEXT:    v_exp_f16_e32 v2, v2
163; VI-NEXT:    v_exp_f16_e32 v4, v4
164; VI-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
165; VI-NEXT:    v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
166; VI-NEXT:    v_or_b32_e32 v0, v4, v0
167; VI-NEXT:    v_or_b32_e32 v1, v2, v1
168; VI-NEXT:    s_setpc_b64 s[30:31]
169;
170; GFX9-LABEL: v_exp_v4f16:
171; GFX9:       ; %bb.0:
172; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
173; GFX9-NEXT:    s_movk_i32 s4, 0x3dc5
174; GFX9-NEXT:    v_mul_f16_e32 v2, 0x3dc5, v1
175; GFX9-NEXT:    v_mul_f16_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
176; GFX9-NEXT:    v_mul_f16_e32 v3, 0x3dc5, v0
177; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
178; GFX9-NEXT:    v_exp_f16_e32 v2, v2
179; GFX9-NEXT:    v_exp_f16_e32 v3, v3
180; GFX9-NEXT:    v_exp_f16_e32 v0, v0
181; GFX9-NEXT:    v_exp_f16_e32 v1, v1
182; GFX9-NEXT:    v_pack_b32_f16 v0, v3, v0
183; GFX9-NEXT:    v_pack_b32_f16 v1, v2, v1
184; GFX9-NEXT:    s_setpc_b64 s[30:31]
185  %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0)
186  ret <4 x half> %result
187}
188
189declare float @llvm.exp.f32(float)
190declare <2 x float> @llvm.exp.v2f32(<2 x float>)
191declare <3 x float> @llvm.exp.v3f32(<3 x float>)
192declare <4 x float> @llvm.exp.v4f32(<4 x float>)
193
194declare half @llvm.exp.f16(half)
195declare <2 x half> @llvm.exp.v2f16(<2 x half>)
196declare <3 x half> @llvm.exp.v3f16(<3 x half>)
197declare <4 x half> @llvm.exp.v4f16(<4 x half>)
198
199