1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
6; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7; RUN: llc -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s
8
9define float @v_pow_f32(float %x, float %y) {
10; GFX6-LABEL: v_pow_f32:
11; GFX6:       ; %bb.0:
12; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
13; GFX6-NEXT:    v_log_f32_e32 v0, v0
14; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
15; GFX6-NEXT:    v_exp_f32_e32 v0, v0
16; GFX6-NEXT:    s_setpc_b64 s[30:31]
17;
18; GFX8-LABEL: v_pow_f32:
19; GFX8:       ; %bb.0:
20; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
21; GFX8-NEXT:    v_log_f32_e32 v0, v0
22; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
23; GFX8-NEXT:    v_exp_f32_e32 v0, v0
24; GFX8-NEXT:    s_setpc_b64 s[30:31]
25;
26; GFX9-LABEL: v_pow_f32:
27; GFX9:       ; %bb.0:
28; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
29; GFX9-NEXT:    v_log_f32_e32 v0, v0
30; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
31; GFX9-NEXT:    v_exp_f32_e32 v0, v0
32; GFX9-NEXT:    s_setpc_b64 s[30:31]
33;
34; GFX90A-LABEL: v_pow_f32:
35; GFX90A:       ; %bb.0:
36; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
37; GFX90A-NEXT:    v_log_f32_e32 v0, v0
38; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
39; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
40; GFX90A-NEXT:    s_setpc_b64 s[30:31]
41;
42; GFX10-LABEL: v_pow_f32:
43; GFX10:       ; %bb.0:
44; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
45; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
46; GFX10-NEXT:    v_log_f32_e32 v0, v0
47; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
48; GFX10-NEXT:    v_exp_f32_e32 v0, v0
49; GFX10-NEXT:    s_setpc_b64 s[30:31]
50;
51; GFX11-LABEL: v_pow_f32:
52; GFX11:       ; %bb.0:
53; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
55; GFX11-NEXT:    v_log_f32_e32 v0, v0
56; GFX11-NEXT:    s_waitcnt_depctr 0xfff
57; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
58; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
59; GFX11-NEXT:    v_exp_f32_e32 v0, v0
60; GFX11-NEXT:    s_setpc_b64 s[30:31]
61  %pow = call float @llvm.pow.f32(float %x, float %y)
62  ret float %pow
63}
64
65define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
66; GFX6-LABEL: v_pow_v2f32:
67; GFX6:       ; %bb.0:
68; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
69; GFX6-NEXT:    v_log_f32_e32 v0, v0
70; GFX6-NEXT:    v_log_f32_e32 v1, v1
71; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
72; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
73; GFX6-NEXT:    v_exp_f32_e32 v0, v0
74; GFX6-NEXT:    v_exp_f32_e32 v1, v1
75; GFX6-NEXT:    s_setpc_b64 s[30:31]
76;
77; GFX8-LABEL: v_pow_v2f32:
78; GFX8:       ; %bb.0:
79; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
80; GFX8-NEXT:    v_log_f32_e32 v0, v0
81; GFX8-NEXT:    v_log_f32_e32 v1, v1
82; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
83; GFX8-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
84; GFX8-NEXT:    v_exp_f32_e32 v0, v0
85; GFX8-NEXT:    v_exp_f32_e32 v1, v1
86; GFX8-NEXT:    s_setpc_b64 s[30:31]
87;
88; GFX9-LABEL: v_pow_v2f32:
89; GFX9:       ; %bb.0:
90; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91; GFX9-NEXT:    v_log_f32_e32 v0, v0
92; GFX9-NEXT:    v_log_f32_e32 v1, v1
93; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
94; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
95; GFX9-NEXT:    v_exp_f32_e32 v0, v0
96; GFX9-NEXT:    v_exp_f32_e32 v1, v1
97; GFX9-NEXT:    s_setpc_b64 s[30:31]
98;
99; GFX90A-LABEL: v_pow_v2f32:
100; GFX90A:       ; %bb.0:
101; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102; GFX90A-NEXT:    v_log_f32_e32 v0, v0
103; GFX90A-NEXT:    v_log_f32_e32 v1, v1
104; GFX90A-NEXT:    v_mul_legacy_f32 v0, v2, v0
105; GFX90A-NEXT:    v_mul_legacy_f32 v1, v3, v1
106; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
107; GFX90A-NEXT:    v_exp_f32_e32 v1, v1
108; GFX90A-NEXT:    s_setpc_b64 s[30:31]
109;
110; GFX10-LABEL: v_pow_v2f32:
111; GFX10:       ; %bb.0:
112; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
113; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
114; GFX10-NEXT:    v_log_f32_e32 v0, v0
115; GFX10-NEXT:    v_log_f32_e32 v1, v1
116; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
117; GFX10-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
118; GFX10-NEXT:    v_exp_f32_e32 v0, v0
119; GFX10-NEXT:    v_exp_f32_e32 v1, v1
120; GFX10-NEXT:    s_setpc_b64 s[30:31]
121;
122; GFX11-LABEL: v_pow_v2f32:
123; GFX11:       ; %bb.0:
124; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
126; GFX11-NEXT:    v_log_f32_e32 v0, v0
127; GFX11-NEXT:    v_log_f32_e32 v1, v1
128; GFX11-NEXT:    s_waitcnt_depctr 0xfff
129; GFX11-NEXT:    v_dual_mul_dx9_zero_f32 v0, v2, v0 :: v_dual_mul_dx9_zero_f32 v1, v3, v1
130; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
131; GFX11-NEXT:    v_exp_f32_e32 v0, v0
132; GFX11-NEXT:    v_exp_f32_e32 v1, v1
133; GFX11-NEXT:    s_setpc_b64 s[30:31]
134  %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
135  ret <2 x float> %pow
136}
137
138define half @v_pow_f16(half %x, half %y) {
139; GFX6-LABEL: v_pow_f16:
140; GFX6:       ; %bb.0:
141; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
142; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
143; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
144; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
145; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
146; GFX6-NEXT:    v_log_f32_e32 v0, v0
147; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
148; GFX6-NEXT:    v_exp_f32_e32 v0, v0
149; GFX6-NEXT:    s_setpc_b64 s[30:31]
150;
151; GFX8-LABEL: v_pow_f16:
152; GFX8:       ; %bb.0:
153; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
154; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
155; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
156; GFX8-NEXT:    v_log_f32_e32 v0, v0
157; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
158; GFX8-NEXT:    v_exp_f32_e32 v0, v0
159; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
160; GFX8-NEXT:    s_setpc_b64 s[30:31]
161;
162; GFX9-LABEL: v_pow_f16:
163; GFX9:       ; %bb.0:
164; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
165; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
166; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
167; GFX9-NEXT:    v_log_f32_e32 v0, v0
168; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
169; GFX9-NEXT:    v_exp_f32_e32 v0, v0
170; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
171; GFX9-NEXT:    s_setpc_b64 s[30:31]
172;
173; GFX90A-LABEL: v_pow_f16:
174; GFX90A:       ; %bb.0:
175; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
176; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
177; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
178; GFX90A-NEXT:    v_log_f32_e32 v0, v0
179; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
180; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
181; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
182; GFX90A-NEXT:    s_setpc_b64 s[30:31]
183;
184; GFX10-LABEL: v_pow_f16:
185; GFX10:       ; %bb.0:
186; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
187; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
188; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
189; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
190; GFX10-NEXT:    v_log_f32_e32 v0, v0
191; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
192; GFX10-NEXT:    v_exp_f32_e32 v0, v0
193; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
194; GFX10-NEXT:    s_setpc_b64 s[30:31]
195;
196; GFX11-LABEL: v_pow_f16:
197; GFX11:       ; %bb.0:
198; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
199; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
200; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
201; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
202; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
203; GFX11-NEXT:    v_log_f32_e32 v0, v0
204; GFX11-NEXT:    s_waitcnt_depctr 0xfff
205; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
206; GFX11-NEXT:    v_exp_f32_e32 v0, v0
207; GFX11-NEXT:    s_waitcnt_depctr 0xfff
208; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
209; GFX11-NEXT:    s_setpc_b64 s[30:31]
210  %pow = call half @llvm.pow.f16(half %x, half %y)
211  ret half %pow
212}
213
214define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
215; GFX6-LABEL: v_pow_v2f16:
216; GFX6:       ; %bb.0:
217; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
219; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
220; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
221; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
222; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
223; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
224; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
225; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
226; GFX6-NEXT:    v_log_f32_e32 v0, v0
227; GFX6-NEXT:    v_log_f32_e32 v1, v1
228; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
229; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
230; GFX6-NEXT:    v_exp_f32_e32 v0, v0
231; GFX6-NEXT:    v_exp_f32_e32 v1, v1
232; GFX6-NEXT:    s_setpc_b64 s[30:31]
233;
234; GFX8-LABEL: v_pow_v2f16:
235; GFX8:       ; %bb.0:
236; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
237; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
238; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
239; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
240; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
241; GFX8-NEXT:    v_log_f32_e32 v2, v2
242; GFX8-NEXT:    v_log_f32_e32 v0, v0
243; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
244; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
245; GFX8-NEXT:    v_exp_f32_e32 v1, v2
246; GFX8-NEXT:    v_exp_f32_e32 v0, v0
247; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
248; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
249; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
250; GFX8-NEXT:    s_setpc_b64 s[30:31]
251;
252; GFX9-LABEL: v_pow_v2f16:
253; GFX9:       ; %bb.0:
254; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
255; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
256; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
257; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
258; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
259; GFX9-NEXT:    v_log_f32_e32 v2, v2
260; GFX9-NEXT:    v_log_f32_e32 v0, v0
261; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
262; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
263; GFX9-NEXT:    v_exp_f32_e32 v1, v2
264; GFX9-NEXT:    v_exp_f32_e32 v0, v0
265; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
266; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
267; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
268; GFX9-NEXT:    s_setpc_b64 s[30:31]
269;
270; GFX90A-LABEL: v_pow_v2f16:
271; GFX90A:       ; %bb.0:
272; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
274; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
275; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
276; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
277; GFX90A-NEXT:    v_log_f32_e32 v2, v2
278; GFX90A-NEXT:    v_log_f32_e32 v0, v0
279; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
280; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
281; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
282; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
283; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
284; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
285; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
286; GFX90A-NEXT:    s_setpc_b64 s[30:31]
287;
288; GFX10-LABEL: v_pow_v2f16:
289; GFX10:       ; %bb.0:
290; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
292; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
293; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
294; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
295; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
296; GFX10-NEXT:    v_log_f32_e32 v2, v2
297; GFX10-NEXT:    v_log_f32_e32 v0, v0
298; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
299; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
300; GFX10-NEXT:    v_exp_f32_e32 v1, v2
301; GFX10-NEXT:    v_exp_f32_e32 v0, v0
302; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
303; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
304; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
305; GFX10-NEXT:    s_setpc_b64 s[30:31]
306;
307; GFX11-LABEL: v_pow_v2f16:
308; GFX11:       ; %bb.0:
309; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
311; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
312; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
313; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
314; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
315; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
316; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v2
317; GFX11-NEXT:    v_log_f32_e32 v0, v0
318; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
319; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v3
320; GFX11-NEXT:    v_log_f32_e32 v2, v2
321; GFX11-NEXT:    s_waitcnt_depctr 0xfff
322; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
323; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
324; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
325; GFX11-NEXT:    v_exp_f32_e32 v0, v0
326; GFX11-NEXT:    v_exp_f32_e32 v1, v2
327; GFX11-NEXT:    s_waitcnt_depctr 0xfff
328; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
329; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
330; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
331; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
332; GFX11-NEXT:    s_setpc_b64 s[30:31]
333  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
334  ret <2 x half> %pow
335}
336
337define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
338; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
339; GFX6:       ; %bb.0:
340; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
341; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
342; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
343; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
344; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
345; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
346; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
347; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v3
348; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
349; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
350; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
351; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
352; GFX6-NEXT:    v_log_f32_e32 v3, v3
353; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
354; GFX6-NEXT:    v_log_f32_e32 v4, v0
355; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v3
356; GFX6-NEXT:    v_exp_f32_e32 v0, v0
357; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v4
358; GFX6-NEXT:    v_exp_f32_e32 v1, v1
359; GFX6-NEXT:    s_setpc_b64 s[30:31]
360;
361; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
362; GFX8:       ; %bb.0:
363; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
365; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
366; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
367; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
368; GFX8-NEXT:    v_log_f32_e32 v2, v2
369; GFX8-NEXT:    v_log_f32_e32 v0, v0
370; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
371; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
372; GFX8-NEXT:    v_exp_f32_e32 v1, v2
373; GFX8-NEXT:    v_exp_f32_e32 v0, v0
374; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
375; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
376; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
377; GFX8-NEXT:    s_setpc_b64 s[30:31]
378;
379; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
380; GFX9:       ; %bb.0:
381; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
383; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
384; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
385; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
386; GFX9-NEXT:    v_log_f32_e32 v2, v2
387; GFX9-NEXT:    v_log_f32_e32 v0, v0
388; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
389; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
390; GFX9-NEXT:    v_exp_f32_e32 v1, v2
391; GFX9-NEXT:    v_exp_f32_e32 v0, v0
392; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
393; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
394; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
395; GFX9-NEXT:    s_setpc_b64 s[30:31]
396;
397; GFX90A-LABEL: v_pow_v2f16_fneg_lhs:
398; GFX90A:       ; %bb.0:
399; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
400; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
401; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
402; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
403; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
404; GFX90A-NEXT:    v_log_f32_e32 v2, v2
405; GFX90A-NEXT:    v_log_f32_e32 v0, v0
406; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
407; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
408; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
409; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
410; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
411; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
412; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
413; GFX90A-NEXT:    s_setpc_b64 s[30:31]
414;
415; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
416; GFX10:       ; %bb.0:
417; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
418; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
419; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
420; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
421; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
422; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
423; GFX10-NEXT:    v_log_f32_e32 v2, v2
424; GFX10-NEXT:    v_log_f32_e32 v0, v0
425; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
426; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
427; GFX10-NEXT:    v_exp_f32_e32 v1, v2
428; GFX10-NEXT:    v_exp_f32_e32 v0, v0
429; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
430; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
431; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
432; GFX10-NEXT:    s_setpc_b64 s[30:31]
433;
434; GFX11-LABEL: v_pow_v2f16_fneg_lhs:
435; GFX11:       ; %bb.0:
436; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
437; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
438; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
439; GFX11-NEXT:    v_cvt_f32_f16_e64 v0, -v0
440; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
441; GFX11-NEXT:    v_cvt_f32_f16_e32 v1, v1
442; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
443; GFX11-NEXT:    v_cvt_f32_f16_e64 v2, -v2
444; GFX11-NEXT:    v_log_f32_e32 v0, v0
445; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
446; GFX11-NEXT:    v_cvt_f32_f16_e32 v3, v3
447; GFX11-NEXT:    v_log_f32_e32 v2, v2
448; GFX11-NEXT:    s_waitcnt_depctr 0xfff
449; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
450; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
451; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
452; GFX11-NEXT:    v_exp_f32_e32 v0, v0
453; GFX11-NEXT:    v_exp_f32_e32 v1, v2
454; GFX11-NEXT:    s_waitcnt_depctr 0xfff
455; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
456; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
457; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
458; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
459; GFX11-NEXT:    s_setpc_b64 s[30:31]
460  %x.fneg = fneg <2 x half> %x
461  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
462  ret <2 x half> %pow
463}
464
465define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
466; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
467; GFX6:       ; %bb.0:
468; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
469; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
470; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
471; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
472; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
473; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
474; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
475; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
476; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
477; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
478; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
479; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
480; GFX6-NEXT:    v_log_f32_e32 v0, v0
481; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
482; GFX6-NEXT:    v_log_f32_e32 v1, v1
483; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
484; GFX6-NEXT:    v_exp_f32_e32 v0, v0
485; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
486; GFX6-NEXT:    v_exp_f32_e32 v1, v1
487; GFX6-NEXT:    s_setpc_b64 s[30:31]
488;
489; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
490; GFX8:       ; %bb.0:
491; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
492; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
493; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
494; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
495; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
496; GFX8-NEXT:    v_log_f32_e32 v2, v2
497; GFX8-NEXT:    v_log_f32_e32 v0, v0
498; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
499; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
500; GFX8-NEXT:    v_exp_f32_e32 v1, v2
501; GFX8-NEXT:    v_exp_f32_e32 v0, v0
502; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
503; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
504; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
505; GFX8-NEXT:    s_setpc_b64 s[30:31]
506;
507; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
508; GFX9:       ; %bb.0:
509; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
510; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
511; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
512; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
513; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
514; GFX9-NEXT:    v_log_f32_e32 v2, v2
515; GFX9-NEXT:    v_log_f32_e32 v0, v0
516; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
517; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
518; GFX9-NEXT:    v_exp_f32_e32 v1, v2
519; GFX9-NEXT:    v_exp_f32_e32 v0, v0
520; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
521; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
522; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
523; GFX9-NEXT:    s_setpc_b64 s[30:31]
524;
525; GFX90A-LABEL: v_pow_v2f16_fneg_rhs:
526; GFX90A:       ; %bb.0:
527; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
529; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
530; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
531; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
532; GFX90A-NEXT:    v_log_f32_e32 v2, v2
533; GFX90A-NEXT:    v_log_f32_e32 v0, v0
534; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
535; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
536; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
537; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
538; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
539; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
540; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
541; GFX90A-NEXT:    s_setpc_b64 s[30:31]
542;
543; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
544; GFX10:       ; %bb.0:
545; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
546; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
547; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
548; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
549; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
550; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
551; GFX10-NEXT:    v_log_f32_e32 v2, v2
552; GFX10-NEXT:    v_log_f32_e32 v0, v0
553; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
554; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
555; GFX10-NEXT:    v_exp_f32_e32 v1, v2
556; GFX10-NEXT:    v_exp_f32_e32 v0, v0
557; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
558; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
559; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
560; GFX10-NEXT:    s_setpc_b64 s[30:31]
561;
562; GFX11-LABEL: v_pow_v2f16_fneg_rhs:
563; GFX11:       ; %bb.0:
564; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
566; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
567; GFX11-NEXT:    v_cvt_f32_f16_e32 v0, v0
568; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
569; GFX11-NEXT:    v_cvt_f32_f16_e64 v1, -v1
570; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
571; GFX11-NEXT:    v_cvt_f32_f16_e32 v2, v2
572; GFX11-NEXT:    v_log_f32_e32 v0, v0
573; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
574; GFX11-NEXT:    v_cvt_f32_f16_e64 v3, -v3
575; GFX11-NEXT:    v_log_f32_e32 v2, v2
576; GFX11-NEXT:    s_waitcnt_depctr 0xfff
577; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
578; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
579; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
580; GFX11-NEXT:    v_exp_f32_e32 v0, v0
581; GFX11-NEXT:    v_exp_f32_e32 v1, v2
582; GFX11-NEXT:    s_waitcnt_depctr 0xfff
583; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
584; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
585; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
586; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
587; GFX11-NEXT:    s_setpc_b64 s[30:31]
588  %y.fneg = fneg <2 x half> %y
589  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
590  ret <2 x half> %pow
591}
592
593define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
594; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
595; GFX6:       ; %bb.0:
596; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
598; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
599; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
600; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
601; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
602; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
603; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
604; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
605; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
606; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
607; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
608; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
609; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
610; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
611; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
612; GFX6-NEXT:    v_log_f32_e32 v0, v0
613; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
614; GFX6-NEXT:    v_log_f32_e32 v1, v1
615; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
616; GFX6-NEXT:    v_exp_f32_e32 v0, v0
617; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
618; GFX6-NEXT:    v_exp_f32_e32 v1, v1
619; GFX6-NEXT:    s_setpc_b64 s[30:31]
620;
621; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
622; GFX8:       ; %bb.0:
623; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
624; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
625; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
626; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
627; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
628; GFX8-NEXT:    v_log_f32_e32 v2, v2
629; GFX8-NEXT:    v_log_f32_e32 v0, v0
630; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
631; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
632; GFX8-NEXT:    v_exp_f32_e32 v1, v2
633; GFX8-NEXT:    v_exp_f32_e32 v0, v0
634; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
635; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
636; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
637; GFX8-NEXT:    s_setpc_b64 s[30:31]
638;
639; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
640; GFX9:       ; %bb.0:
641; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
643; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
644; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
645; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
646; GFX9-NEXT:    v_log_f32_e32 v2, v2
647; GFX9-NEXT:    v_log_f32_e32 v0, v0
648; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
649; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
650; GFX9-NEXT:    v_exp_f32_e32 v1, v2
651; GFX9-NEXT:    v_exp_f32_e32 v0, v0
652; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
653; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
654; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
655; GFX9-NEXT:    s_setpc_b64 s[30:31]
656;
657; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs:
658; GFX90A:       ; %bb.0:
659; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
660; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
661; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
662; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
663; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
664; GFX90A-NEXT:    v_log_f32_e32 v2, v2
665; GFX90A-NEXT:    v_log_f32_e32 v0, v0
666; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
667; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
668; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
669; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
670; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
671; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
672; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
673; GFX90A-NEXT:    s_setpc_b64 s[30:31]
674;
675; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
676; GFX10:       ; %bb.0:
677; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
678; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
679; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
680; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
681; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
682; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
683; GFX10-NEXT:    v_log_f32_e32 v2, v2
684; GFX10-NEXT:    v_log_f32_e32 v0, v0
685; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
686; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
687; GFX10-NEXT:    v_exp_f32_e32 v1, v2
688; GFX10-NEXT:    v_exp_f32_e32 v0, v0
689; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
690; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
691; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
692; GFX10-NEXT:    s_setpc_b64 s[30:31]
693;
694; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs:
695; GFX11:       ; %bb.0:
696; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
697; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
698; GFX11-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
699; GFX11-NEXT:    v_cvt_f32_f16_e64 v0, -v0
700; GFX11-NEXT:    v_lshrrev_b32_e32 v3, 16, v1
701; GFX11-NEXT:    v_cvt_f32_f16_e64 v1, -v1
702; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
703; GFX11-NEXT:    v_cvt_f32_f16_e64 v2, -v2
704; GFX11-NEXT:    v_log_f32_e32 v0, v0
705; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
706; GFX11-NEXT:    v_cvt_f32_f16_e64 v3, -v3
707; GFX11-NEXT:    v_log_f32_e32 v2, v2
708; GFX11-NEXT:    s_waitcnt_depctr 0xfff
709; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
710; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v2, v3, v2
711; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
712; GFX11-NEXT:    v_exp_f32_e32 v0, v0
713; GFX11-NEXT:    v_exp_f32_e32 v1, v2
714; GFX11-NEXT:    s_waitcnt_depctr 0xfff
715; GFX11-NEXT:    v_cvt_f16_f32_e32 v0, v0
716; GFX11-NEXT:    v_cvt_f16_f32_e32 v1, v1
717; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
718; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
719; GFX11-NEXT:    s_setpc_b64 s[30:31]
720  %x.fneg = fneg <2 x half> %x
721  %y.fneg = fneg <2 x half> %y
722  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
723  ret <2 x half> %pow
724}
725
726; FIXME
727; define double @v_pow_f64(double %x, double %y) {
728;   %pow = call double @llvm.pow.f64(double %x, double %y)
729;   ret double %pow
730; }
731
732define float @v_pow_f32_fabs_lhs(float %x, float %y) {
733; GFX6-LABEL: v_pow_f32_fabs_lhs:
734; GFX6:       ; %bb.0:
735; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
736; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
737; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
738; GFX6-NEXT:    v_exp_f32_e32 v0, v0
739; GFX6-NEXT:    s_setpc_b64 s[30:31]
740;
741; GFX8-LABEL: v_pow_f32_fabs_lhs:
742; GFX8:       ; %bb.0:
743; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
744; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
745; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
746; GFX8-NEXT:    v_exp_f32_e32 v0, v0
747; GFX8-NEXT:    s_setpc_b64 s[30:31]
748;
749; GFX9-LABEL: v_pow_f32_fabs_lhs:
750; GFX9:       ; %bb.0:
751; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
752; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
753; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
754; GFX9-NEXT:    v_exp_f32_e32 v0, v0
755; GFX9-NEXT:    s_setpc_b64 s[30:31]
756;
757; GFX90A-LABEL: v_pow_f32_fabs_lhs:
758; GFX90A:       ; %bb.0:
759; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
760; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
761; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
762; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
763; GFX90A-NEXT:    s_setpc_b64 s[30:31]
764;
765; GFX10-LABEL: v_pow_f32_fabs_lhs:
766; GFX10:       ; %bb.0:
767; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
768; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
769; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
770; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
771; GFX10-NEXT:    v_exp_f32_e32 v0, v0
772; GFX10-NEXT:    s_setpc_b64 s[30:31]
773;
774; GFX11-LABEL: v_pow_f32_fabs_lhs:
775; GFX11:       ; %bb.0:
776; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
778; GFX11-NEXT:    v_log_f32_e64 v0, |v0|
779; GFX11-NEXT:    s_waitcnt_depctr 0xfff
780; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v1, v0
781; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
782; GFX11-NEXT:    v_exp_f32_e32 v0, v0
783; GFX11-NEXT:    s_setpc_b64 s[30:31]
784  %fabs.x = call float @llvm.fabs.f32(float %x)
785  %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
786  ret float %pow
787}
788
789define float @v_pow_f32_fabs_rhs(float %x, float %y) {
790; GFX6-LABEL: v_pow_f32_fabs_rhs:
791; GFX6:       ; %bb.0:
792; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
793; GFX6-NEXT:    v_log_f32_e32 v0, v0
794; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
795; GFX6-NEXT:    v_exp_f32_e32 v0, v0
796; GFX6-NEXT:    s_setpc_b64 s[30:31]
797;
798; GFX8-LABEL: v_pow_f32_fabs_rhs:
799; GFX8:       ; %bb.0:
800; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
801; GFX8-NEXT:    v_log_f32_e32 v0, v0
802; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
803; GFX8-NEXT:    v_exp_f32_e32 v0, v0
804; GFX8-NEXT:    s_setpc_b64 s[30:31]
805;
806; GFX9-LABEL: v_pow_f32_fabs_rhs:
807; GFX9:       ; %bb.0:
808; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
809; GFX9-NEXT:    v_log_f32_e32 v0, v0
810; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
811; GFX9-NEXT:    v_exp_f32_e32 v0, v0
812; GFX9-NEXT:    s_setpc_b64 s[30:31]
813;
814; GFX90A-LABEL: v_pow_f32_fabs_rhs:
815; GFX90A:       ; %bb.0:
816; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
817; GFX90A-NEXT:    v_log_f32_e32 v0, v0
818; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
819; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
820; GFX90A-NEXT:    s_setpc_b64 s[30:31]
821;
822; GFX10-LABEL: v_pow_f32_fabs_rhs:
823; GFX10:       ; %bb.0:
824; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
825; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
826; GFX10-NEXT:    v_log_f32_e32 v0, v0
827; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
828; GFX10-NEXT:    v_exp_f32_e32 v0, v0
829; GFX10-NEXT:    s_setpc_b64 s[30:31]
830;
831; GFX11-LABEL: v_pow_f32_fabs_rhs:
832; GFX11:       ; %bb.0:
833; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
834; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
835; GFX11-NEXT:    v_log_f32_e32 v0, v0
836; GFX11-NEXT:    s_waitcnt_depctr 0xfff
837; GFX11-NEXT:    v_mul_dx9_zero_f32_e64 v0, |v1|, v0
838; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
839; GFX11-NEXT:    v_exp_f32_e32 v0, v0
840; GFX11-NEXT:    s_setpc_b64 s[30:31]
841  %fabs.y = call float @llvm.fabs.f32(float %y)
842  %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
843  ret float %pow
844}
845
846define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
847; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
848; GFX6:       ; %bb.0:
849; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
850; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
851; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
852; GFX6-NEXT:    v_exp_f32_e32 v0, v0
853; GFX6-NEXT:    s_setpc_b64 s[30:31]
854;
855; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
856; GFX8:       ; %bb.0:
857; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
858; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
859; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
860; GFX8-NEXT:    v_exp_f32_e32 v0, v0
861; GFX8-NEXT:    s_setpc_b64 s[30:31]
862;
863; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
864; GFX9:       ; %bb.0:
865; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
866; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
867; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
868; GFX9-NEXT:    v_exp_f32_e32 v0, v0
869; GFX9-NEXT:    s_setpc_b64 s[30:31]
870;
871; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs:
872; GFX90A:       ; %bb.0:
873; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
874; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
875; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
876; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
877; GFX90A-NEXT:    s_setpc_b64 s[30:31]
878;
879; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
880; GFX10:       ; %bb.0:
881; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
882; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
883; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
884; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
885; GFX10-NEXT:    v_exp_f32_e32 v0, v0
886; GFX10-NEXT:    s_setpc_b64 s[30:31]
887;
888; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs:
889; GFX11:       ; %bb.0:
890; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
891; GFX11-NEXT:    s_waitcnt_vscnt null, 0x0
892; GFX11-NEXT:    v_log_f32_e64 v0, |v0|
893; GFX11-NEXT:    s_waitcnt_depctr 0xfff
894; GFX11-NEXT:    v_mul_dx9_zero_f32_e64 v0, |v1|, v0
895; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
896; GFX11-NEXT:    v_exp_f32_e32 v0, v0
897; GFX11-NEXT:    s_setpc_b64 s[30:31]
898  %fabs.x = call float @llvm.fabs.f32(float %x)
899  %fabs.y = call float @llvm.fabs.f32(float %y)
900  %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
901  ret float %pow
902}
903
904define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
905; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
906; GFX6:       ; %bb.0:
907; GFX6-NEXT:    v_log_f32_e32 v1, s0
908; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
909; GFX6-NEXT:    v_exp_f32_e32 v0, v0
910; GFX6-NEXT:    ; return to shader part epilog
911;
912; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
913; GFX8:       ; %bb.0:
914; GFX8-NEXT:    v_log_f32_e32 v1, s0
915; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
916; GFX8-NEXT:    v_exp_f32_e32 v0, v0
917; GFX8-NEXT:    ; return to shader part epilog
918;
919; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
920; GFX9:       ; %bb.0:
921; GFX9-NEXT:    v_log_f32_e32 v1, s0
922; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
923; GFX9-NEXT:    v_exp_f32_e32 v0, v0
924; GFX9-NEXT:    ; return to shader part epilog
925;
926; GFX90A-LABEL: v_pow_f32_sgpr_vgpr:
927; GFX90A:       ; %bb.0:
928; GFX90A-NEXT:    v_log_f32_e32 v1, s0
929; GFX90A-NEXT:    v_mul_legacy_f32 v0, v0, v1
930; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
931; GFX90A-NEXT:    ; return to shader part epilog
932;
933; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
934; GFX10:       ; %bb.0:
935; GFX10-NEXT:    v_log_f32_e32 v1, s0
936; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
937; GFX10-NEXT:    v_exp_f32_e32 v0, v0
938; GFX10-NEXT:    ; return to shader part epilog
939;
940; GFX11-LABEL: v_pow_f32_sgpr_vgpr:
941; GFX11:       ; %bb.0:
942; GFX11-NEXT:    v_log_f32_e32 v1, s0
943; GFX11-NEXT:    s_waitcnt_depctr 0xfff
944; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, v0, v1
945; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
946; GFX11-NEXT:    v_exp_f32_e32 v0, v0
947; GFX11-NEXT:    ; return to shader part epilog
948  %pow = call float @llvm.pow.f32(float %x, float %y)
949  ret float %pow
950}
951
952define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
953; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
954; GFX6:       ; %bb.0:
955; GFX6-NEXT:    v_log_f32_e32 v0, v0
956; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
957; GFX6-NEXT:    v_exp_f32_e32 v0, v0
958; GFX6-NEXT:    ; return to shader part epilog
959;
960; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
961; GFX8:       ; %bb.0:
962; GFX8-NEXT:    v_log_f32_e32 v0, v0
963; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
964; GFX8-NEXT:    v_exp_f32_e32 v0, v0
965; GFX8-NEXT:    ; return to shader part epilog
966;
967; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
968; GFX9:       ; %bb.0:
969; GFX9-NEXT:    v_log_f32_e32 v0, v0
970; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
971; GFX9-NEXT:    v_exp_f32_e32 v0, v0
972; GFX9-NEXT:    ; return to shader part epilog
973;
974; GFX90A-LABEL: v_pow_f32_vgpr_sgpr:
975; GFX90A:       ; %bb.0:
976; GFX90A-NEXT:    v_log_f32_e32 v0, v0
977; GFX90A-NEXT:    v_mul_legacy_f32 v0, s0, v0
978; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
979; GFX90A-NEXT:    ; return to shader part epilog
980;
981; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
982; GFX10:       ; %bb.0:
983; GFX10-NEXT:    v_log_f32_e32 v0, v0
984; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
985; GFX10-NEXT:    v_exp_f32_e32 v0, v0
986; GFX10-NEXT:    ; return to shader part epilog
987;
988; GFX11-LABEL: v_pow_f32_vgpr_sgpr:
989; GFX11:       ; %bb.0:
990; GFX11-NEXT:    v_log_f32_e32 v0, v0
991; GFX11-NEXT:    s_waitcnt_depctr 0xfff
992; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, s0, v0
993; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
994; GFX11-NEXT:    v_exp_f32_e32 v0, v0
995; GFX11-NEXT:    ; return to shader part epilog
996  %pow = call float @llvm.pow.f32(float %x, float %y)
997  ret float %pow
998}
999
1000define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
1001; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
1002; GFX6:       ; %bb.0:
1003; GFX6-NEXT:    v_log_f32_e32 v0, s0
1004; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
1005; GFX6-NEXT:    v_exp_f32_e32 v0, v0
1006; GFX6-NEXT:    ; return to shader part epilog
1007;
1008; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
1009; GFX8:       ; %bb.0:
1010; GFX8-NEXT:    v_log_f32_e32 v0, s0
1011; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
1012; GFX8-NEXT:    v_exp_f32_e32 v0, v0
1013; GFX8-NEXT:    ; return to shader part epilog
1014;
1015; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
1016; GFX9:       ; %bb.0:
1017; GFX9-NEXT:    v_log_f32_e32 v0, s0
1018; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
1019; GFX9-NEXT:    v_exp_f32_e32 v0, v0
1020; GFX9-NEXT:    ; return to shader part epilog
1021;
1022; GFX90A-LABEL: v_pow_f32_sgpr_sgpr:
1023; GFX90A:       ; %bb.0:
1024; GFX90A-NEXT:    v_log_f32_e32 v0, s0
1025; GFX90A-NEXT:    v_mul_legacy_f32 v0, s1, v0
1026; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
1027; GFX90A-NEXT:    ; return to shader part epilog
1028;
1029; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
1030; GFX10:       ; %bb.0:
1031; GFX10-NEXT:    v_log_f32_e32 v0, s0
1032; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
1033; GFX10-NEXT:    v_exp_f32_e32 v0, v0
1034; GFX10-NEXT:    ; return to shader part epilog
1035;
1036; GFX11-LABEL: v_pow_f32_sgpr_sgpr:
1037; GFX11:       ; %bb.0:
1038; GFX11-NEXT:    v_log_f32_e32 v0, s0
1039; GFX11-NEXT:    s_waitcnt_depctr 0xfff
1040; GFX11-NEXT:    v_mul_dx9_zero_f32_e32 v0, s1, v0
1041; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
1042; GFX11-NEXT:    v_exp_f32_e32 v0, v0
1043; GFX11-NEXT:    ; return to shader part epilog
1044  %pow = call float @llvm.pow.f32(float %x, float %y)
1045  ret float %pow
1046}
1047
1048declare half @llvm.pow.f16(half, half)
1049declare float @llvm.pow.f32(float, float)
1050declare double @llvm.pow.f64(double, double)
1051
1052declare half @llvm.fabs.f16(half)
1053declare float @llvm.fabs.f32(float)
1054
1055declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
1056declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
1057