1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s
3;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s
4;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
5
6define float @v_exp_f32(float %arg0) {
7; GCN-LABEL: v_exp_f32:
8; GCN:       ; %bb.0:
9; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
10; GCN-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
11; GCN-NEXT:    v_exp_f32_e32 v0, v0
12; GCN-NEXT:    s_setpc_b64 s[30:31]
13  %result = call float @llvm.exp.f32(float %arg0)
14  ret float %result
15}
16
17define <2 x float> @v_exp_v2f32(<2 x float> %arg0) {
18; GCN-LABEL: v_exp_v2f32:
19; GCN:       ; %bb.0:
20; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GCN-NEXT:    s_mov_b32 s4, 0x3fb8aa3b
22; GCN-NEXT:    v_mul_f32_e32 v0, s4, v0
23; GCN-NEXT:    v_mul_f32_e32 v1, s4, v1
24; GCN-NEXT:    v_exp_f32_e32 v0, v0
25; GCN-NEXT:    v_exp_f32_e32 v1, v1
26; GCN-NEXT:    s_setpc_b64 s[30:31]
27  %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0)
28  ret <2 x float> %result
29}
30
31define <3 x float> @v_exp_v3f32(<3 x float> %arg0) {
32; GCN-LABEL: v_exp_v3f32:
33; GCN:       ; %bb.0:
34; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
35; GCN-NEXT:    s_mov_b32 s4, 0x3fb8aa3b
36; GCN-NEXT:    v_mul_f32_e32 v0, s4, v0
37; GCN-NEXT:    v_mul_f32_e32 v1, s4, v1
38; GCN-NEXT:    v_mul_f32_e32 v2, s4, v2
39; GCN-NEXT:    v_exp_f32_e32 v0, v0
40; GCN-NEXT:    v_exp_f32_e32 v1, v1
41; GCN-NEXT:    v_exp_f32_e32 v2, v2
42; GCN-NEXT:    s_setpc_b64 s[30:31]
43  %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0)
44  ret <3 x float> %result
45}
46
47define <4 x float> @v_exp_v4f32(<4 x float> %arg0) {
48; GCN-LABEL: v_exp_v4f32:
49; GCN:       ; %bb.0:
50; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51; GCN-NEXT:    s_mov_b32 s4, 0x3fb8aa3b
52; GCN-NEXT:    v_mul_f32_e32 v0, s4, v0
53; GCN-NEXT:    v_mul_f32_e32 v1, s4, v1
54; GCN-NEXT:    v_mul_f32_e32 v2, s4, v2
55; GCN-NEXT:    v_mul_f32_e32 v3, s4, v3
56; GCN-NEXT:    v_exp_f32_e32 v0, v0
57; GCN-NEXT:    v_exp_f32_e32 v1, v1
58; GCN-NEXT:    v_exp_f32_e32 v2, v2
59; GCN-NEXT:    v_exp_f32_e32 v3, v3
60; GCN-NEXT:    s_setpc_b64 s[30:31]
61  %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0)
62  ret <4 x float> %result
63}
64
65define half @v_exp_f16(half %arg0) {
66; SI-LABEL: v_exp_f16:
67; SI:       ; %bb.0:
68; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
70; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
71; SI-NEXT:    v_mul_f32_e32 v0, 0x3fb8aa3b, v0
72; SI-NEXT:    v_exp_f32_e32 v0, v0
73; SI-NEXT:    s_setpc_b64 s[30:31]
74;
75; VI-LABEL: v_exp_f16:
76; VI:       ; %bb.0:
77; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
78; VI-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
79; VI-NEXT:    v_exp_f16_e32 v0, v0
80; VI-NEXT:    s_setpc_b64 s[30:31]
81;
82; GFX9-LABEL: v_exp_f16:
83; GFX9:       ; %bb.0:
84; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
85; GFX9-NEXT:    v_mul_f16_e32 v0, 0x3dc5, v0
86; GFX9-NEXT:    v_exp_f16_e32 v0, v0
87; GFX9-NEXT:    s_setpc_b64 s[30:31]
88  %result = call half @llvm.exp.f16(half %arg0)
89  ret half %result
90}
91
92define <2 x half> @v_exp_v2f16(<2 x half> %arg0) {
93; SI-LABEL: v_exp_v2f16:
94; SI:       ; %bb.0:
95; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
97; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
98; SI-NEXT:    s_mov_b32 s4, 0x3fb8aa3b
99; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
100; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
101; SI-NEXT:    v_mul_f32_e32 v0, s4, v0
102; SI-NEXT:    v_mul_f32_e32 v1, s4, v1
103; SI-NEXT:    v_exp_f32_e32 v0, v0
104; SI-NEXT:    v_exp_f32_e32 v1, v1
105; SI-NEXT:    s_setpc_b64 s[30:31]
106;
107; VI-LABEL: v_exp_v2f16:
108; VI:       ; %bb.0:
109; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
110; VI-NEXT:    s_movk_i32 s4, 0x3dc5
111; VI-NEXT:    v_mov_b32_e32 v1, s4
112; VI-NEXT:    v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
113; VI-NEXT:    v_mul_f16_e32 v0, s4, v0
114; VI-NEXT:    v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
115; VI-NEXT:    v_exp_f16_e32 v0, v0
116; VI-NEXT:    v_or_b32_e32 v0, v0, v1
117; VI-NEXT:    s_setpc_b64 s[30:31]
118;
119; GFX9-LABEL: v_exp_v2f16:
120; GFX9:       ; %bb.0:
121; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
122; GFX9-NEXT:    s_movk_i32 s4, 0x3dc5
123; GFX9-NEXT:    v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0]
124; GFX9-NEXT:    v_exp_f16_e32 v1, v0
125; GFX9-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
126; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
127; GFX9-NEXT:    s_setpc_b64 s[30:31]
128  %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0)
129  ret <2 x half> %result
130}
131
132; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) {
133;   %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0)
134;   ret <3 x half> %result
135; }
136
137define <4 x half> @v_exp_v4f16(<4 x half> %arg0) {
138; SI-LABEL: v_exp_v4f16:
139; SI:       ; %bb.0:
140; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
142; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
143; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
144; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
145; SI-NEXT:    s_mov_b32 s4, 0x3fb8aa3b
146; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
147; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
148; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
149; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
150; SI-NEXT:    v_mul_f32_e32 v0, s4, v0
151; SI-NEXT:    v_mul_f32_e32 v1, s4, v1
152; SI-NEXT:    v_mul_f32_e32 v2, s4, v2
153; SI-NEXT:    v_mul_f32_e32 v3, s4, v3
154; SI-NEXT:    v_exp_f32_e32 v0, v0
155; SI-NEXT:    v_exp_f32_e32 v1, v1
156; SI-NEXT:    v_exp_f32_e32 v2, v2
157; SI-NEXT:    v_exp_f32_e32 v3, v3
158; SI-NEXT:    s_setpc_b64 s[30:31]
159;
160; VI-LABEL: v_exp_v4f16:
161; VI:       ; %bb.0:
162; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
163; VI-NEXT:    s_movk_i32 s4, 0x3dc5
164; VI-NEXT:    v_mov_b32_e32 v3, s4
165; VI-NEXT:    v_mul_f16_e32 v2, s4, v1
166; VI-NEXT:    v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
167; VI-NEXT:    v_mul_f16_e32 v4, s4, v0
168; VI-NEXT:    v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
169; VI-NEXT:    v_exp_f16_e32 v2, v2
170; VI-NEXT:    v_exp_f16_e32 v4, v4
171; VI-NEXT:    v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
172; VI-NEXT:    v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
173; VI-NEXT:    v_or_b32_e32 v0, v4, v0
174; VI-NEXT:    v_or_b32_e32 v1, v2, v1
175; VI-NEXT:    s_setpc_b64 s[30:31]
176;
177; GFX9-LABEL: v_exp_v4f16:
178; GFX9:       ; %bb.0:
179; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
180; GFX9-NEXT:    s_movk_i32 s4, 0x3dc5
181; GFX9-NEXT:    v_mul_f16_e32 v2, s4, v1
182; GFX9-NEXT:    v_mul_f16_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
183; GFX9-NEXT:    v_mul_f16_e32 v3, s4, v0
184; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
185; GFX9-NEXT:    v_exp_f16_e32 v2, v2
186; GFX9-NEXT:    v_exp_f16_e32 v3, v3
187; GFX9-NEXT:    v_exp_f16_e32 v0, v0
188; GFX9-NEXT:    v_exp_f16_e32 v1, v1
189; GFX9-NEXT:    v_pack_b32_f16 v0, v3, v0
190; GFX9-NEXT:    v_pack_b32_f16 v1, v2, v1
191; GFX9-NEXT:    s_setpc_b64 s[30:31]
192  %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0)
193  ret <4 x half> %result
194}
195
196declare float @llvm.exp.f32(float)
197declare <2 x float> @llvm.exp.v2f32(<2 x float>)
198declare <3 x float> @llvm.exp.v3f32(<3 x float>)
199declare <4 x float> @llvm.exp.v4f32(<4 x float>)
200
201declare half @llvm.exp.f16(half)
202declare <2 x half> @llvm.exp.v2f16(<2 x half>)
203declare <3 x half> @llvm.exp.v3f16(<3 x half>)
204declare <4 x half> @llvm.exp.v4f16(<4 x half>)
205
206