1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
6; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7
8define float @v_pow_f32(float %x, float %y) {
9; GFX6-LABEL: v_pow_f32:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    v_log_f32_e32 v0, v0
13; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
14; GFX6-NEXT:    v_exp_f32_e32 v0, v0
15; GFX6-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX8-LABEL: v_pow_f32:
18; GFX8:       ; %bb.0:
19; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX8-NEXT:    v_log_f32_e32 v0, v0
21; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
22; GFX8-NEXT:    v_exp_f32_e32 v0, v0
23; GFX8-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX9-LABEL: v_pow_f32:
26; GFX9:       ; %bb.0:
27; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX9-NEXT:    v_log_f32_e32 v0, v0
29; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
30; GFX9-NEXT:    v_exp_f32_e32 v0, v0
31; GFX9-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX90A-LABEL: v_pow_f32:
34; GFX90A:       ; %bb.0:
35; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX90A-NEXT:    v_log_f32_e32 v0, v0
37; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
38; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
39; GFX90A-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX10-LABEL: v_pow_f32:
42; GFX10:       ; %bb.0:
43; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
45; GFX10-NEXT:    v_log_f32_e32 v0, v0
46; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
47; GFX10-NEXT:    v_exp_f32_e32 v0, v0
48; GFX10-NEXT:    s_setpc_b64 s[30:31]
49  %pow = call float @llvm.pow.f32(float %x, float %y)
50  ret float %pow
51}
52
53define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
54; GFX6-LABEL: v_pow_v2f32:
55; GFX6:       ; %bb.0:
56; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57; GFX6-NEXT:    v_log_f32_e32 v0, v0
58; GFX6-NEXT:    v_log_f32_e32 v1, v1
59; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
60; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
61; GFX6-NEXT:    v_exp_f32_e32 v0, v0
62; GFX6-NEXT:    v_exp_f32_e32 v1, v1
63; GFX6-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX8-LABEL: v_pow_v2f32:
66; GFX8:       ; %bb.0:
67; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX8-NEXT:    v_log_f32_e32 v0, v0
69; GFX8-NEXT:    v_log_f32_e32 v1, v1
70; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
71; GFX8-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
72; GFX8-NEXT:    v_exp_f32_e32 v0, v0
73; GFX8-NEXT:    v_exp_f32_e32 v1, v1
74; GFX8-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX9-LABEL: v_pow_v2f32:
77; GFX9:       ; %bb.0:
78; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX9-NEXT:    v_log_f32_e32 v0, v0
80; GFX9-NEXT:    v_log_f32_e32 v1, v1
81; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
82; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
83; GFX9-NEXT:    v_exp_f32_e32 v0, v0
84; GFX9-NEXT:    v_exp_f32_e32 v1, v1
85; GFX9-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX90A-LABEL: v_pow_v2f32:
88; GFX90A:       ; %bb.0:
89; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX90A-NEXT:    v_log_f32_e32 v0, v0
91; GFX90A-NEXT:    v_log_f32_e32 v1, v1
92; GFX90A-NEXT:    v_mul_legacy_f32 v0, v2, v0
93; GFX90A-NEXT:    v_mul_legacy_f32 v1, v3, v1
94; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
95; GFX90A-NEXT:    v_exp_f32_e32 v1, v1
96; GFX90A-NEXT:    s_setpc_b64 s[30:31]
97;
98; GFX10-LABEL: v_pow_v2f32:
99; GFX10:       ; %bb.0:
100; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
102; GFX10-NEXT:    v_log_f32_e32 v0, v0
103; GFX10-NEXT:    v_log_f32_e32 v1, v1
104; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
105; GFX10-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
106; GFX10-NEXT:    v_exp_f32_e32 v0, v0
107; GFX10-NEXT:    v_exp_f32_e32 v1, v1
108; GFX10-NEXT:    s_setpc_b64 s[30:31]
109  %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
110  ret <2 x float> %pow
111}
112
113define half @v_pow_f16(half %x, half %y) {
114; GFX6-LABEL: v_pow_f16:
115; GFX6:       ; %bb.0:
116; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
118; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
119; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
120; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
121; GFX6-NEXT:    v_log_f32_e32 v0, v0
122; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
123; GFX6-NEXT:    v_exp_f32_e32 v0, v0
124; GFX6-NEXT:    s_setpc_b64 s[30:31]
125;
126; GFX8-LABEL: v_pow_f16:
127; GFX8:       ; %bb.0:
128; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
130; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
131; GFX8-NEXT:    v_log_f32_e32 v0, v0
132; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
133; GFX8-NEXT:    v_exp_f32_e32 v0, v0
134; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
135; GFX8-NEXT:    s_setpc_b64 s[30:31]
136;
137; GFX9-LABEL: v_pow_f16:
138; GFX9:       ; %bb.0:
139; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
141; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
142; GFX9-NEXT:    v_log_f32_e32 v0, v0
143; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
144; GFX9-NEXT:    v_exp_f32_e32 v0, v0
145; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
146; GFX9-NEXT:    s_setpc_b64 s[30:31]
147;
148; GFX90A-LABEL: v_pow_f16:
149; GFX90A:       ; %bb.0:
150; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
152; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
153; GFX90A-NEXT:    v_log_f32_e32 v0, v0
154; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
155; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
156; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
157; GFX90A-NEXT:    s_setpc_b64 s[30:31]
158;
159; GFX10-LABEL: v_pow_f16:
160; GFX10:       ; %bb.0:
161; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
163; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
164; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
165; GFX10-NEXT:    v_log_f32_e32 v0, v0
166; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
167; GFX10-NEXT:    v_exp_f32_e32 v0, v0
168; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
169; GFX10-NEXT:    s_setpc_b64 s[30:31]
170  %pow = call half @llvm.pow.f16(half %x, half %y)
171  ret half %pow
172}
173
174define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
175; GFX6-LABEL: v_pow_v2f16:
176; GFX6:       ; %bb.0:
177; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
179; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
180; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
181; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
182; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
183; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
184; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
185; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
186; GFX6-NEXT:    v_log_f32_e32 v0, v0
187; GFX6-NEXT:    v_log_f32_e32 v1, v1
188; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
189; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
190; GFX6-NEXT:    v_exp_f32_e32 v0, v0
191; GFX6-NEXT:    v_exp_f32_e32 v1, v1
192; GFX6-NEXT:    s_setpc_b64 s[30:31]
193;
194; GFX8-LABEL: v_pow_v2f16:
195; GFX8:       ; %bb.0:
196; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
198; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
199; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
200; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
201; GFX8-NEXT:    v_log_f32_e32 v2, v2
202; GFX8-NEXT:    v_log_f32_e32 v0, v0
203; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
204; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
205; GFX8-NEXT:    v_exp_f32_e32 v2, v2
206; GFX8-NEXT:    v_exp_f32_e32 v0, v0
207; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
208; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
209; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
210; GFX8-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX9-LABEL: v_pow_v2f16:
213; GFX9:       ; %bb.0:
214; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
216; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
217; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
218; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
219; GFX9-NEXT:    v_log_f32_e32 v2, v2
220; GFX9-NEXT:    v_log_f32_e32 v0, v0
221; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
222; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
223; GFX9-NEXT:    v_exp_f32_e32 v2, v2
224; GFX9-NEXT:    v_exp_f32_e32 v0, v0
225; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
226; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
227; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
228; GFX9-NEXT:    s_setpc_b64 s[30:31]
229;
230; GFX90A-LABEL: v_pow_v2f16:
231; GFX90A:       ; %bb.0:
232; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
234; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
235; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
236; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
237; GFX90A-NEXT:    v_log_f32_e32 v2, v2
238; GFX90A-NEXT:    v_log_f32_e32 v0, v0
239; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
240; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
241; GFX90A-NEXT:    v_exp_f32_e32 v2, v2
242; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
243; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v2
244; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
245; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
246; GFX90A-NEXT:    s_setpc_b64 s[30:31]
247;
248; GFX10-LABEL: v_pow_v2f16:
249; GFX10:       ; %bb.0:
250; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
252; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
253; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
254; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
255; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
256; GFX10-NEXT:    v_log_f32_e32 v2, v2
257; GFX10-NEXT:    v_log_f32_e32 v0, v0
258; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
259; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
260; GFX10-NEXT:    v_exp_f32_e32 v1, v2
261; GFX10-NEXT:    v_exp_f32_e32 v0, v0
262; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
263; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
264; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
265; GFX10-NEXT:    s_setpc_b64 s[30:31]
266  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
267  ret <2 x half> %pow
268}
269
270define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
271; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
272; GFX6:       ; %bb.0:
273; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
275; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
276; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
277; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
278; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
279; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
280; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v3
281; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
282; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
283; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
284; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
285; GFX6-NEXT:    v_log_f32_e32 v3, v3
286; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
287; GFX6-NEXT:    v_log_f32_e32 v4, v0
288; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v3
289; GFX6-NEXT:    v_exp_f32_e32 v0, v0
290; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v4
291; GFX6-NEXT:    v_exp_f32_e32 v1, v1
292; GFX6-NEXT:    s_setpc_b64 s[30:31]
293;
294; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
295; GFX8:       ; %bb.0:
296; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
298; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
299; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
300; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
301; GFX8-NEXT:    v_log_f32_e32 v2, v2
302; GFX8-NEXT:    v_log_f32_e32 v0, v0
303; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
304; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
305; GFX8-NEXT:    v_exp_f32_e32 v2, v2
306; GFX8-NEXT:    v_exp_f32_e32 v0, v0
307; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
308; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
309; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
310; GFX8-NEXT:    s_setpc_b64 s[30:31]
311;
312; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
313; GFX9:       ; %bb.0:
314; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
316; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
317; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
318; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
319; GFX9-NEXT:    v_log_f32_e32 v2, v2
320; GFX9-NEXT:    v_log_f32_e32 v0, v0
321; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
322; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
323; GFX9-NEXT:    v_exp_f32_e32 v2, v2
324; GFX9-NEXT:    v_exp_f32_e32 v0, v0
325; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
326; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
327; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
328; GFX9-NEXT:    s_setpc_b64 s[30:31]
329;
330; GFX90A-LABEL: v_pow_v2f16_fneg_lhs:
331; GFX90A:       ; %bb.0:
332; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
334; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
335; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
336; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
337; GFX90A-NEXT:    v_log_f32_e32 v2, v2
338; GFX90A-NEXT:    v_log_f32_e32 v0, v0
339; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
340; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
341; GFX90A-NEXT:    v_exp_f32_e32 v2, v2
342; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
343; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v2
344; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
345; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
346; GFX90A-NEXT:    s_setpc_b64 s[30:31]
347;
348; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
349; GFX10:       ; %bb.0:
350; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
352; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
353; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
354; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
355; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
356; GFX10-NEXT:    v_log_f32_e32 v2, v2
357; GFX10-NEXT:    v_log_f32_e32 v0, v0
358; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
359; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
360; GFX10-NEXT:    v_exp_f32_e32 v1, v2
361; GFX10-NEXT:    v_exp_f32_e32 v0, v0
362; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
363; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
364; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
365; GFX10-NEXT:    s_setpc_b64 s[30:31]
366  %x.fneg = fneg <2 x half> %x
367  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
368  ret <2 x half> %pow
369}
370
371define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
372; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
373; GFX6:       ; %bb.0:
374; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
376; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
377; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
378; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
379; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
380; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
381; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
382; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
383; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
384; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
385; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
386; GFX6-NEXT:    v_log_f32_e32 v0, v0
387; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
388; GFX6-NEXT:    v_log_f32_e32 v1, v1
389; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
390; GFX6-NEXT:    v_exp_f32_e32 v0, v0
391; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
392; GFX6-NEXT:    v_exp_f32_e32 v1, v1
393; GFX6-NEXT:    s_setpc_b64 s[30:31]
394;
395; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
396; GFX8:       ; %bb.0:
397; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
399; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
400; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
401; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
402; GFX8-NEXT:    v_log_f32_e32 v2, v2
403; GFX8-NEXT:    v_log_f32_e32 v0, v0
404; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
405; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
406; GFX8-NEXT:    v_exp_f32_e32 v2, v2
407; GFX8-NEXT:    v_exp_f32_e32 v0, v0
408; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
409; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
410; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
411; GFX8-NEXT:    s_setpc_b64 s[30:31]
412;
413; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
414; GFX9:       ; %bb.0:
415; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
417; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
418; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
419; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
420; GFX9-NEXT:    v_log_f32_e32 v2, v2
421; GFX9-NEXT:    v_log_f32_e32 v0, v0
422; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
423; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
424; GFX9-NEXT:    v_exp_f32_e32 v2, v2
425; GFX9-NEXT:    v_exp_f32_e32 v0, v0
426; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
427; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
428; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
429; GFX9-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX90A-LABEL: v_pow_v2f16_fneg_rhs:
432; GFX90A:       ; %bb.0:
433; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
435; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
436; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
437; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
438; GFX90A-NEXT:    v_log_f32_e32 v2, v2
439; GFX90A-NEXT:    v_log_f32_e32 v0, v0
440; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
441; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
442; GFX90A-NEXT:    v_exp_f32_e32 v2, v2
443; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
444; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v2
445; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
446; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
447; GFX90A-NEXT:    s_setpc_b64 s[30:31]
448;
449; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
450; GFX10:       ; %bb.0:
451; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
453; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
454; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
455; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
456; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
457; GFX10-NEXT:    v_log_f32_e32 v2, v2
458; GFX10-NEXT:    v_log_f32_e32 v0, v0
459; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
460; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
461; GFX10-NEXT:    v_exp_f32_e32 v1, v2
462; GFX10-NEXT:    v_exp_f32_e32 v0, v0
463; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
464; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
465; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
466; GFX10-NEXT:    s_setpc_b64 s[30:31]
467  %y.fneg = fneg <2 x half> %y
468  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
469  ret <2 x half> %pow
470}
471
472define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
473; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
474; GFX6:       ; %bb.0:
475; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
477; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
478; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
479; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
480; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
481; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
482; GFX6-NEXT:    s_mov_b32 s4, 0x80008000
483; GFX6-NEXT:    v_xor_b32_e32 v0, s4, v0
484; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
485; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
486; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
487; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
488; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
489; GFX6-NEXT:    v_xor_b32_e32 v2, s4, v2
490; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
491; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
492; GFX6-NEXT:    v_log_f32_e32 v0, v0
493; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
494; GFX6-NEXT:    v_log_f32_e32 v1, v1
495; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
496; GFX6-NEXT:    v_exp_f32_e32 v0, v0
497; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
498; GFX6-NEXT:    v_exp_f32_e32 v1, v1
499; GFX6-NEXT:    s_setpc_b64 s[30:31]
500;
501; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
502; GFX8:       ; %bb.0:
503; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
504; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
505; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
506; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
507; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
508; GFX8-NEXT:    v_log_f32_e32 v2, v2
509; GFX8-NEXT:    v_log_f32_e32 v0, v0
510; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
511; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
512; GFX8-NEXT:    v_exp_f32_e32 v2, v2
513; GFX8-NEXT:    v_exp_f32_e32 v0, v0
514; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
515; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
516; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
517; GFX8-NEXT:    s_setpc_b64 s[30:31]
518;
519; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
520; GFX9:       ; %bb.0:
521; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
522; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
523; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
524; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
525; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
526; GFX9-NEXT:    v_log_f32_e32 v2, v2
527; GFX9-NEXT:    v_log_f32_e32 v0, v0
528; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
529; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
530; GFX9-NEXT:    v_exp_f32_e32 v2, v2
531; GFX9-NEXT:    v_exp_f32_e32 v0, v0
532; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v2
533; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
534; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
535; GFX9-NEXT:    s_setpc_b64 s[30:31]
536;
537; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs:
538; GFX90A:       ; %bb.0:
539; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
540; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
541; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
542; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
543; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
544; GFX90A-NEXT:    v_log_f32_e32 v2, v2
545; GFX90A-NEXT:    v_log_f32_e32 v0, v0
546; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
547; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
548; GFX90A-NEXT:    v_exp_f32_e32 v2, v2
549; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
550; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v2
551; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
552; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
553; GFX90A-NEXT:    s_setpc_b64 s[30:31]
554;
555; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
556; GFX10:       ; %bb.0:
557; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
558; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
559; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
560; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
561; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
562; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
563; GFX10-NEXT:    v_log_f32_e32 v2, v2
564; GFX10-NEXT:    v_log_f32_e32 v0, v0
565; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
566; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
567; GFX10-NEXT:    v_exp_f32_e32 v1, v2
568; GFX10-NEXT:    v_exp_f32_e32 v0, v0
569; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
570; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
571; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
572; GFX10-NEXT:    s_setpc_b64 s[30:31]
573  %x.fneg = fneg <2 x half> %x
574  %y.fneg = fneg <2 x half> %y
575  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
576  ret <2 x half> %pow
577}
578
579; FIXME
580; define double @v_pow_f64(double %x, double %y) {
581;   %pow = call double @llvm.pow.f64(double %x, double %y)
582;   ret double %pow
583; }
584
585define float @v_pow_f32_fabs_lhs(float %x, float %y) {
586; GFX6-LABEL: v_pow_f32_fabs_lhs:
587; GFX6:       ; %bb.0:
588; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
589; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
590; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
591; GFX6-NEXT:    v_exp_f32_e32 v0, v0
592; GFX6-NEXT:    s_setpc_b64 s[30:31]
593;
594; GFX8-LABEL: v_pow_f32_fabs_lhs:
595; GFX8:       ; %bb.0:
596; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
598; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
599; GFX8-NEXT:    v_exp_f32_e32 v0, v0
600; GFX8-NEXT:    s_setpc_b64 s[30:31]
601;
602; GFX9-LABEL: v_pow_f32_fabs_lhs:
603; GFX9:       ; %bb.0:
604; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
605; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
606; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
607; GFX9-NEXT:    v_exp_f32_e32 v0, v0
608; GFX9-NEXT:    s_setpc_b64 s[30:31]
609;
610; GFX90A-LABEL: v_pow_f32_fabs_lhs:
611; GFX90A:       ; %bb.0:
612; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
613; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
614; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
615; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
616; GFX90A-NEXT:    s_setpc_b64 s[30:31]
617;
618; GFX10-LABEL: v_pow_f32_fabs_lhs:
619; GFX10:       ; %bb.0:
620; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
621; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
622; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
623; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
624; GFX10-NEXT:    v_exp_f32_e32 v0, v0
625; GFX10-NEXT:    s_setpc_b64 s[30:31]
626  %fabs.x = call float @llvm.fabs.f32(float %x)
627  %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
628  ret float %pow
629}
630
631define float @v_pow_f32_fabs_rhs(float %x, float %y) {
632; GFX6-LABEL: v_pow_f32_fabs_rhs:
633; GFX6:       ; %bb.0:
634; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635; GFX6-NEXT:    v_log_f32_e32 v0, v0
636; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
637; GFX6-NEXT:    v_exp_f32_e32 v0, v0
638; GFX6-NEXT:    s_setpc_b64 s[30:31]
639;
640; GFX8-LABEL: v_pow_f32_fabs_rhs:
641; GFX8:       ; %bb.0:
642; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
643; GFX8-NEXT:    v_log_f32_e32 v0, v0
644; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
645; GFX8-NEXT:    v_exp_f32_e32 v0, v0
646; GFX8-NEXT:    s_setpc_b64 s[30:31]
647;
648; GFX9-LABEL: v_pow_f32_fabs_rhs:
649; GFX9:       ; %bb.0:
650; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
651; GFX9-NEXT:    v_log_f32_e32 v0, v0
652; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
653; GFX9-NEXT:    v_exp_f32_e32 v0, v0
654; GFX9-NEXT:    s_setpc_b64 s[30:31]
655;
656; GFX90A-LABEL: v_pow_f32_fabs_rhs:
657; GFX90A:       ; %bb.0:
658; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
659; GFX90A-NEXT:    v_log_f32_e32 v0, v0
660; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
661; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
662; GFX90A-NEXT:    s_setpc_b64 s[30:31]
663;
664; GFX10-LABEL: v_pow_f32_fabs_rhs:
665; GFX10:       ; %bb.0:
666; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
667; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
668; GFX10-NEXT:    v_log_f32_e32 v0, v0
669; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
670; GFX10-NEXT:    v_exp_f32_e32 v0, v0
671; GFX10-NEXT:    s_setpc_b64 s[30:31]
672  %fabs.y = call float @llvm.fabs.f32(float %y)
673  %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
674  ret float %pow
675}
676
677define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
678; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
679; GFX6:       ; %bb.0:
680; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
681; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
682; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
683; GFX6-NEXT:    v_exp_f32_e32 v0, v0
684; GFX6-NEXT:    s_setpc_b64 s[30:31]
685;
686; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
687; GFX8:       ; %bb.0:
688; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
689; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
690; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
691; GFX8-NEXT:    v_exp_f32_e32 v0, v0
692; GFX8-NEXT:    s_setpc_b64 s[30:31]
693;
694; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
695; GFX9:       ; %bb.0:
696; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
697; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
698; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
699; GFX9-NEXT:    v_exp_f32_e32 v0, v0
700; GFX9-NEXT:    s_setpc_b64 s[30:31]
701;
702; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs:
703; GFX90A:       ; %bb.0:
704; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
705; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
706; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
707; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
708; GFX90A-NEXT:    s_setpc_b64 s[30:31]
709;
710; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
711; GFX10:       ; %bb.0:
712; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
713; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
714; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
715; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
716; GFX10-NEXT:    v_exp_f32_e32 v0, v0
717; GFX10-NEXT:    s_setpc_b64 s[30:31]
718  %fabs.x = call float @llvm.fabs.f32(float %x)
719  %fabs.y = call float @llvm.fabs.f32(float %y)
720  %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
721  ret float %pow
722}
723
724define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
725; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
726; GFX6:       ; %bb.0:
727; GFX6-NEXT:    v_log_f32_e32 v1, s0
728; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
729; GFX6-NEXT:    v_exp_f32_e32 v0, v0
730; GFX6-NEXT:    ; return to shader part epilog
731;
732; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
733; GFX8:       ; %bb.0:
734; GFX8-NEXT:    v_log_f32_e32 v1, s0
735; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
736; GFX8-NEXT:    v_exp_f32_e32 v0, v0
737; GFX8-NEXT:    ; return to shader part epilog
738;
739; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
740; GFX9:       ; %bb.0:
741; GFX9-NEXT:    v_log_f32_e32 v1, s0
742; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
743; GFX9-NEXT:    v_exp_f32_e32 v0, v0
744; GFX9-NEXT:    ; return to shader part epilog
745;
746; GFX90A-LABEL: v_pow_f32_sgpr_vgpr:
747; GFX90A:       ; %bb.0:
748; GFX90A-NEXT:    v_log_f32_e32 v1, s0
749; GFX90A-NEXT:    v_mul_legacy_f32 v0, v0, v1
750; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
751; GFX90A-NEXT:    ; return to shader part epilog
752;
753; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
754; GFX10:       ; %bb.0:
755; GFX10-NEXT:    v_log_f32_e32 v1, s0
756; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
757; GFX10-NEXT:    v_exp_f32_e32 v0, v0
758; GFX10-NEXT:    ; return to shader part epilog
759  %pow = call float @llvm.pow.f32(float %x, float %y)
760  ret float %pow
761}
762
763define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
764; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
765; GFX6:       ; %bb.0:
766; GFX6-NEXT:    v_log_f32_e32 v0, v0
767; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
768; GFX6-NEXT:    v_exp_f32_e32 v0, v0
769; GFX6-NEXT:    ; return to shader part epilog
770;
771; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
772; GFX8:       ; %bb.0:
773; GFX8-NEXT:    v_log_f32_e32 v0, v0
774; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
775; GFX8-NEXT:    v_exp_f32_e32 v0, v0
776; GFX8-NEXT:    ; return to shader part epilog
777;
778; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
779; GFX9:       ; %bb.0:
780; GFX9-NEXT:    v_log_f32_e32 v0, v0
781; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
782; GFX9-NEXT:    v_exp_f32_e32 v0, v0
783; GFX9-NEXT:    ; return to shader part epilog
784;
785; GFX90A-LABEL: v_pow_f32_vgpr_sgpr:
786; GFX90A:       ; %bb.0:
787; GFX90A-NEXT:    v_log_f32_e32 v0, v0
788; GFX90A-NEXT:    v_mul_legacy_f32 v0, s0, v0
789; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
790; GFX90A-NEXT:    ; return to shader part epilog
791;
792; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
793; GFX10:       ; %bb.0:
794; GFX10-NEXT:    v_log_f32_e32 v0, v0
795; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
796; GFX10-NEXT:    v_exp_f32_e32 v0, v0
797; GFX10-NEXT:    ; return to shader part epilog
798  %pow = call float @llvm.pow.f32(float %x, float %y)
799  ret float %pow
800}
801
802define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
803; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
804; GFX6:       ; %bb.0:
805; GFX6-NEXT:    v_log_f32_e32 v0, s0
806; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
807; GFX6-NEXT:    v_exp_f32_e32 v0, v0
808; GFX6-NEXT:    ; return to shader part epilog
809;
810; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
811; GFX8:       ; %bb.0:
812; GFX8-NEXT:    v_log_f32_e32 v0, s0
813; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
814; GFX8-NEXT:    v_exp_f32_e32 v0, v0
815; GFX8-NEXT:    ; return to shader part epilog
816;
817; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
818; GFX9:       ; %bb.0:
819; GFX9-NEXT:    v_log_f32_e32 v0, s0
820; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
821; GFX9-NEXT:    v_exp_f32_e32 v0, v0
822; GFX9-NEXT:    ; return to shader part epilog
823;
824; GFX90A-LABEL: v_pow_f32_sgpr_sgpr:
825; GFX90A:       ; %bb.0:
826; GFX90A-NEXT:    v_log_f32_e32 v0, s0
827; GFX90A-NEXT:    v_mul_legacy_f32 v0, s1, v0
828; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
829; GFX90A-NEXT:    ; return to shader part epilog
830;
831; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
832; GFX10:       ; %bb.0:
833; GFX10-NEXT:    v_log_f32_e32 v0, s0
834; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
835; GFX10-NEXT:    v_exp_f32_e32 v0, v0
836; GFX10-NEXT:    ; return to shader part epilog
837  %pow = call float @llvm.pow.f32(float %x, float %y)
838  ret float %pow
839}
840
841declare half @llvm.pow.f16(half, half)
842declare float @llvm.pow.f32(float, float)
843declare double @llvm.pow.f64(double, double)
844
845declare half @llvm.fabs.f16(half)
846declare float @llvm.fabs.f32(float)
847
848declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
849declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
850