1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s
3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s
4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
5; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s
6; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
7
8define float @v_pow_f32(float %x, float %y) {
9; GFX6-LABEL: v_pow_f32:
10; GFX6:       ; %bb.0:
11; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12; GFX6-NEXT:    v_log_f32_e32 v0, v0
13; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
14; GFX6-NEXT:    v_exp_f32_e32 v0, v0
15; GFX6-NEXT:    s_setpc_b64 s[30:31]
16;
17; GFX8-LABEL: v_pow_f32:
18; GFX8:       ; %bb.0:
19; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
20; GFX8-NEXT:    v_log_f32_e32 v0, v0
21; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
22; GFX8-NEXT:    v_exp_f32_e32 v0, v0
23; GFX8-NEXT:    s_setpc_b64 s[30:31]
24;
25; GFX9-LABEL: v_pow_f32:
26; GFX9:       ; %bb.0:
27; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
28; GFX9-NEXT:    v_log_f32_e32 v0, v0
29; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
30; GFX9-NEXT:    v_exp_f32_e32 v0, v0
31; GFX9-NEXT:    s_setpc_b64 s[30:31]
32;
33; GFX90A-LABEL: v_pow_f32:
34; GFX90A:       ; %bb.0:
35; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
36; GFX90A-NEXT:    v_log_f32_e32 v0, v0
37; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
38; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
39; GFX90A-NEXT:    s_setpc_b64 s[30:31]
40;
41; GFX10-LABEL: v_pow_f32:
42; GFX10:       ; %bb.0:
43; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
45; GFX10-NEXT:    v_log_f32_e32 v0, v0
46; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
47; GFX10-NEXT:    v_exp_f32_e32 v0, v0
48; GFX10-NEXT:    s_setpc_b64 s[30:31]
49  %pow = call float @llvm.pow.f32(float %x, float %y)
50  ret float %pow
51}
52
53define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) {
54; GFX6-LABEL: v_pow_v2f32:
55; GFX6:       ; %bb.0:
56; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
57; GFX6-NEXT:    v_log_f32_e32 v0, v0
58; GFX6-NEXT:    v_log_f32_e32 v1, v1
59; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
60; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
61; GFX6-NEXT:    v_exp_f32_e32 v0, v0
62; GFX6-NEXT:    v_exp_f32_e32 v1, v1
63; GFX6-NEXT:    s_setpc_b64 s[30:31]
64;
65; GFX8-LABEL: v_pow_v2f32:
66; GFX8:       ; %bb.0:
67; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68; GFX8-NEXT:    v_log_f32_e32 v0, v0
69; GFX8-NEXT:    v_log_f32_e32 v1, v1
70; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
71; GFX8-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
72; GFX8-NEXT:    v_exp_f32_e32 v0, v0
73; GFX8-NEXT:    v_exp_f32_e32 v1, v1
74; GFX8-NEXT:    s_setpc_b64 s[30:31]
75;
76; GFX9-LABEL: v_pow_v2f32:
77; GFX9:       ; %bb.0:
78; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79; GFX9-NEXT:    v_log_f32_e32 v0, v0
80; GFX9-NEXT:    v_log_f32_e32 v1, v1
81; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
82; GFX9-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
83; GFX9-NEXT:    v_exp_f32_e32 v0, v0
84; GFX9-NEXT:    v_exp_f32_e32 v1, v1
85; GFX9-NEXT:    s_setpc_b64 s[30:31]
86;
87; GFX90A-LABEL: v_pow_v2f32:
88; GFX90A:       ; %bb.0:
89; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
90; GFX90A-NEXT:    v_log_f32_e32 v0, v0
91; GFX90A-NEXT:    v_log_f32_e32 v1, v1
92; GFX90A-NEXT:    v_mul_legacy_f32 v0, v2, v0
93; GFX90A-NEXT:    v_mul_legacy_f32 v1, v3, v1
94; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
95; GFX90A-NEXT:    v_exp_f32_e32 v1, v1
96; GFX90A-NEXT:    s_setpc_b64 s[30:31]
97;
98; GFX10-LABEL: v_pow_v2f32:
99; GFX10:       ; %bb.0:
100; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
101; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
102; GFX10-NEXT:    v_log_f32_e32 v0, v0
103; GFX10-NEXT:    v_log_f32_e32 v1, v1
104; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
105; GFX10-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
106; GFX10-NEXT:    v_exp_f32_e32 v0, v0
107; GFX10-NEXT:    v_exp_f32_e32 v1, v1
108; GFX10-NEXT:    s_setpc_b64 s[30:31]
109  %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y)
110  ret <2 x float> %pow
111}
112
113define half @v_pow_f16(half %x, half %y) {
114; GFX6-LABEL: v_pow_f16:
115; GFX6:       ; %bb.0:
116; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
117; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
118; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
119; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
120; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
121; GFX6-NEXT:    v_log_f32_e32 v0, v0
122; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
123; GFX6-NEXT:    v_exp_f32_e32 v0, v0
124; GFX6-NEXT:    s_setpc_b64 s[30:31]
125;
126; GFX8-LABEL: v_pow_f16:
127; GFX8:       ; %bb.0:
128; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
129; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
130; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
131; GFX8-NEXT:    v_log_f32_e32 v0, v0
132; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
133; GFX8-NEXT:    v_exp_f32_e32 v0, v0
134; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
135; GFX8-NEXT:    s_setpc_b64 s[30:31]
136;
137; GFX9-LABEL: v_pow_f16:
138; GFX9:       ; %bb.0:
139; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
141; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
142; GFX9-NEXT:    v_log_f32_e32 v0, v0
143; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
144; GFX9-NEXT:    v_exp_f32_e32 v0, v0
145; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
146; GFX9-NEXT:    s_setpc_b64 s[30:31]
147;
148; GFX90A-LABEL: v_pow_f16:
149; GFX90A:       ; %bb.0:
150; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
151; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
152; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
153; GFX90A-NEXT:    v_log_f32_e32 v0, v0
154; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
155; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
156; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
157; GFX90A-NEXT:    s_setpc_b64 s[30:31]
158;
159; GFX10-LABEL: v_pow_f16:
160; GFX10:       ; %bb.0:
161; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
162; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
163; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
164; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
165; GFX10-NEXT:    v_log_f32_e32 v0, v0
166; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
167; GFX10-NEXT:    v_exp_f32_e32 v0, v0
168; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
169; GFX10-NEXT:    s_setpc_b64 s[30:31]
170  %pow = call half @llvm.pow.f16(half %x, half %y)
171  ret half %pow
172}
173
174define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) {
175; GFX6-LABEL: v_pow_v2f16:
176; GFX6:       ; %bb.0:
177; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
179; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
180; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
181; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
182; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
183; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
184; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
185; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
186; GFX6-NEXT:    v_log_f32_e32 v0, v0
187; GFX6-NEXT:    v_log_f32_e32 v1, v1
188; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
189; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
190; GFX6-NEXT:    v_exp_f32_e32 v0, v0
191; GFX6-NEXT:    v_exp_f32_e32 v1, v1
192; GFX6-NEXT:    s_setpc_b64 s[30:31]
193;
194; GFX8-LABEL: v_pow_v2f16:
195; GFX8:       ; %bb.0:
196; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
198; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
199; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
200; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
201; GFX8-NEXT:    v_log_f32_e32 v2, v2
202; GFX8-NEXT:    v_log_f32_e32 v0, v0
203; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
204; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
205; GFX8-NEXT:    v_exp_f32_e32 v1, v2
206; GFX8-NEXT:    v_exp_f32_e32 v0, v0
207; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
208; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
209; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
210; GFX8-NEXT:    s_setpc_b64 s[30:31]
211;
212; GFX9-LABEL: v_pow_v2f16:
213; GFX9:       ; %bb.0:
214; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
215; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
216; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
217; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
218; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
219; GFX9-NEXT:    v_log_f32_e32 v2, v2
220; GFX9-NEXT:    v_log_f32_e32 v0, v0
221; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
222; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
223; GFX9-NEXT:    v_exp_f32_e32 v1, v2
224; GFX9-NEXT:    v_exp_f32_e32 v0, v0
225; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
226; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
227; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
228; GFX9-NEXT:    s_setpc_b64 s[30:31]
229;
230; GFX90A-LABEL: v_pow_v2f16:
231; GFX90A:       ; %bb.0:
232; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
233; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
234; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
235; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
236; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
237; GFX90A-NEXT:    v_log_f32_e32 v2, v2
238; GFX90A-NEXT:    v_log_f32_e32 v0, v0
239; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
240; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
241; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
242; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
243; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
244; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
245; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
246; GFX90A-NEXT:    s_setpc_b64 s[30:31]
247;
248; GFX10-LABEL: v_pow_v2f16:
249; GFX10:       ; %bb.0:
250; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
252; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
253; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
254; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
255; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
256; GFX10-NEXT:    v_log_f32_e32 v2, v2
257; GFX10-NEXT:    v_log_f32_e32 v0, v0
258; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
259; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
260; GFX10-NEXT:    v_exp_f32_e32 v1, v2
261; GFX10-NEXT:    v_exp_f32_e32 v0, v0
262; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
263; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
264; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
265; GFX10-NEXT:    s_setpc_b64 s[30:31]
266  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y)
267  ret <2 x half> %pow
268}
269
270define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) {
271; GFX6-LABEL: v_pow_v2f16_fneg_lhs:
272; GFX6:       ; %bb.0:
273; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
275; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
276; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
277; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
278; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
279; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
280; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v3
281; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v0
282; GFX6-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
283; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
284; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
285; GFX6-NEXT:    v_log_f32_e32 v3, v3
286; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
287; GFX6-NEXT:    v_log_f32_e32 v4, v0
288; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v3
289; GFX6-NEXT:    v_exp_f32_e32 v0, v0
290; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v1, v4
291; GFX6-NEXT:    v_exp_f32_e32 v1, v1
292; GFX6-NEXT:    s_setpc_b64 s[30:31]
293;
294; GFX8-LABEL: v_pow_v2f16_fneg_lhs:
295; GFX8:       ; %bb.0:
296; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
297; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
298; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
299; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
300; GFX8-NEXT:    v_cvt_f32_f16_e32 v1, v1
301; GFX8-NEXT:    v_log_f32_e32 v2, v2
302; GFX8-NEXT:    v_log_f32_e32 v0, v0
303; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
304; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
305; GFX8-NEXT:    v_exp_f32_e32 v1, v2
306; GFX8-NEXT:    v_exp_f32_e32 v0, v0
307; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
308; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
309; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
310; GFX8-NEXT:    s_setpc_b64 s[30:31]
311;
312; GFX9-LABEL: v_pow_v2f16_fneg_lhs:
313; GFX9:       ; %bb.0:
314; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
315; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
316; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
317; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
318; GFX9-NEXT:    v_cvt_f32_f16_e32 v1, v1
319; GFX9-NEXT:    v_log_f32_e32 v2, v2
320; GFX9-NEXT:    v_log_f32_e32 v0, v0
321; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
322; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
323; GFX9-NEXT:    v_exp_f32_e32 v1, v2
324; GFX9-NEXT:    v_exp_f32_e32 v0, v0
325; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
326; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
327; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
328; GFX9-NEXT:    s_setpc_b64 s[30:31]
329;
330; GFX90A-LABEL: v_pow_v2f16_fneg_lhs:
331; GFX90A:       ; %bb.0:
332; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
334; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
335; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
336; GFX90A-NEXT:    v_cvt_f32_f16_e32 v1, v1
337; GFX90A-NEXT:    v_log_f32_e32 v2, v2
338; GFX90A-NEXT:    v_log_f32_e32 v0, v0
339; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
340; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
341; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
342; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
343; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
344; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
345; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
346; GFX90A-NEXT:    s_setpc_b64 s[30:31]
347;
348; GFX10-LABEL: v_pow_v2f16_fneg_lhs:
349; GFX10:       ; %bb.0:
350; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
352; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
353; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
354; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
355; GFX10-NEXT:    v_cvt_f32_f16_e32 v1, v1
356; GFX10-NEXT:    v_log_f32_e32 v2, v2
357; GFX10-NEXT:    v_log_f32_e32 v0, v0
358; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
359; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
360; GFX10-NEXT:    v_exp_f32_e32 v1, v2
361; GFX10-NEXT:    v_exp_f32_e32 v0, v0
362; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
363; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
364; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
365; GFX10-NEXT:    s_setpc_b64 s[30:31]
366  %x.fneg = fneg <2 x half> %x
367  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y)
368  ret <2 x half> %pow
369}
370
371define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) {
372; GFX6-LABEL: v_pow_v2f16_fneg_rhs:
373; GFX6:       ; %bb.0:
374; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
375; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
376; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
377; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
378; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
379; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
380; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
381; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
382; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
383; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
384; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
385; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
386; GFX6-NEXT:    v_log_f32_e32 v0, v0
387; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
388; GFX6-NEXT:    v_log_f32_e32 v1, v1
389; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
390; GFX6-NEXT:    v_exp_f32_e32 v0, v0
391; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
392; GFX6-NEXT:    v_exp_f32_e32 v1, v1
393; GFX6-NEXT:    s_setpc_b64 s[30:31]
394;
395; GFX8-LABEL: v_pow_v2f16_fneg_rhs:
396; GFX8:       ; %bb.0:
397; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
398; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
399; GFX8-NEXT:    v_cvt_f32_f16_e32 v0, v0
400; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
401; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
402; GFX8-NEXT:    v_log_f32_e32 v2, v2
403; GFX8-NEXT:    v_log_f32_e32 v0, v0
404; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
405; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
406; GFX8-NEXT:    v_exp_f32_e32 v1, v2
407; GFX8-NEXT:    v_exp_f32_e32 v0, v0
408; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
409; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
410; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
411; GFX8-NEXT:    s_setpc_b64 s[30:31]
412;
413; GFX9-LABEL: v_pow_v2f16_fneg_rhs:
414; GFX9:       ; %bb.0:
415; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
416; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
417; GFX9-NEXT:    v_cvt_f32_f16_e32 v0, v0
418; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
419; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
420; GFX9-NEXT:    v_log_f32_e32 v2, v2
421; GFX9-NEXT:    v_log_f32_e32 v0, v0
422; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
423; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
424; GFX9-NEXT:    v_exp_f32_e32 v1, v2
425; GFX9-NEXT:    v_exp_f32_e32 v0, v0
426; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
427; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
428; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
429; GFX9-NEXT:    s_setpc_b64 s[30:31]
430;
431; GFX90A-LABEL: v_pow_v2f16_fneg_rhs:
432; GFX90A:       ; %bb.0:
433; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
434; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
435; GFX90A-NEXT:    v_cvt_f32_f16_e32 v0, v0
436; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
437; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
438; GFX90A-NEXT:    v_log_f32_e32 v2, v2
439; GFX90A-NEXT:    v_log_f32_e32 v0, v0
440; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
441; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
442; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
443; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
444; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
445; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
446; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
447; GFX90A-NEXT:    s_setpc_b64 s[30:31]
448;
449; GFX10-LABEL: v_pow_v2f16_fneg_rhs:
450; GFX10:       ; %bb.0:
451; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
452; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
453; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
454; GFX10-NEXT:    v_cvt_f32_f16_e32 v0, v0
455; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
456; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
457; GFX10-NEXT:    v_log_f32_e32 v2, v2
458; GFX10-NEXT:    v_log_f32_e32 v0, v0
459; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
460; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
461; GFX10-NEXT:    v_exp_f32_e32 v1, v2
462; GFX10-NEXT:    v_exp_f32_e32 v0, v0
463; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
464; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
465; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
466; GFX10-NEXT:    s_setpc_b64 s[30:31]
467  %y.fneg = fneg <2 x half> %y
468  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg)
469  ret <2 x half> %pow
470}
471
472define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) {
473; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs:
474; GFX6:       ; %bb.0:
475; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
476; GFX6-NEXT:    v_cvt_f16_f32_e32 v1, v1
477; GFX6-NEXT:    v_cvt_f16_f32_e32 v0, v0
478; GFX6-NEXT:    v_cvt_f16_f32_e32 v3, v3
479; GFX6-NEXT:    v_cvt_f16_f32_e32 v2, v2
480; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
481; GFX6-NEXT:    v_or_b32_e32 v0, v0, v1
482; GFX6-NEXT:    v_xor_b32_e32 v0, 0x80008000, v0
483; GFX6-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
484; GFX6-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
485; GFX6-NEXT:    v_cvt_f32_f16_e32 v1, v1
486; GFX6-NEXT:    v_cvt_f32_f16_e32 v0, v0
487; GFX6-NEXT:    v_or_b32_e32 v2, v2, v3
488; GFX6-NEXT:    v_xor_b32_e32 v2, 0x80008000, v2
489; GFX6-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
490; GFX6-NEXT:    v_cvt_f32_f16_e32 v2, v2
491; GFX6-NEXT:    v_log_f32_e32 v0, v0
492; GFX6-NEXT:    v_cvt_f32_f16_e32 v3, v3
493; GFX6-NEXT:    v_log_f32_e32 v1, v1
494; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v2, v0
495; GFX6-NEXT:    v_exp_f32_e32 v0, v0
496; GFX6-NEXT:    v_mul_legacy_f32_e32 v1, v3, v1
497; GFX6-NEXT:    v_exp_f32_e32 v1, v1
498; GFX6-NEXT:    s_setpc_b64 s[30:31]
499;
500; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs:
501; GFX8:       ; %bb.0:
502; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
503; GFX8-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
504; GFX8-NEXT:    v_cvt_f32_f16_e64 v0, -v0
505; GFX8-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
506; GFX8-NEXT:    v_cvt_f32_f16_e64 v1, -v1
507; GFX8-NEXT:    v_log_f32_e32 v2, v2
508; GFX8-NEXT:    v_log_f32_e32 v0, v0
509; GFX8-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
510; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
511; GFX8-NEXT:    v_exp_f32_e32 v1, v2
512; GFX8-NEXT:    v_exp_f32_e32 v0, v0
513; GFX8-NEXT:    v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
514; GFX8-NEXT:    v_cvt_f16_f32_e32 v0, v0
515; GFX8-NEXT:    v_or_b32_e32 v0, v0, v1
516; GFX8-NEXT:    s_setpc_b64 s[30:31]
517;
518; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs:
519; GFX9:       ; %bb.0:
520; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
521; GFX9-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
522; GFX9-NEXT:    v_cvt_f32_f16_e64 v0, -v0
523; GFX9-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
524; GFX9-NEXT:    v_cvt_f32_f16_e64 v1, -v1
525; GFX9-NEXT:    v_log_f32_e32 v2, v2
526; GFX9-NEXT:    v_log_f32_e32 v0, v0
527; GFX9-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
528; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
529; GFX9-NEXT:    v_exp_f32_e32 v1, v2
530; GFX9-NEXT:    v_exp_f32_e32 v0, v0
531; GFX9-NEXT:    v_cvt_f16_f32_e32 v1, v1
532; GFX9-NEXT:    v_cvt_f16_f32_e32 v0, v0
533; GFX9-NEXT:    v_pack_b32_f16 v0, v0, v1
534; GFX9-NEXT:    s_setpc_b64 s[30:31]
535;
536; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs:
537; GFX90A:       ; %bb.0:
538; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
539; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
540; GFX90A-NEXT:    v_cvt_f32_f16_e64 v0, -v0
541; GFX90A-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
542; GFX90A-NEXT:    v_cvt_f32_f16_e64 v1, -v1
543; GFX90A-NEXT:    v_log_f32_e32 v2, v2
544; GFX90A-NEXT:    v_log_f32_e32 v0, v0
545; GFX90A-NEXT:    v_mul_legacy_f32 v2, v3, v2
546; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
547; GFX90A-NEXT:    v_exp_f32_e32 v1, v2
548; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
549; GFX90A-NEXT:    v_cvt_f16_f32_e32 v1, v1
550; GFX90A-NEXT:    v_cvt_f16_f32_e32 v0, v0
551; GFX90A-NEXT:    v_pack_b32_f16 v0, v0, v1
552; GFX90A-NEXT:    s_setpc_b64 s[30:31]
553;
554; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs:
555; GFX10:       ; %bb.0:
556; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
557; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
558; GFX10-NEXT:    v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
559; GFX10-NEXT:    v_cvt_f32_f16_e64 v0, -v0
560; GFX10-NEXT:    v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
561; GFX10-NEXT:    v_cvt_f32_f16_e64 v1, -v1
562; GFX10-NEXT:    v_log_f32_e32 v2, v2
563; GFX10-NEXT:    v_log_f32_e32 v0, v0
564; GFX10-NEXT:    v_mul_legacy_f32_e32 v2, v3, v2
565; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
566; GFX10-NEXT:    v_exp_f32_e32 v1, v2
567; GFX10-NEXT:    v_exp_f32_e32 v0, v0
568; GFX10-NEXT:    v_cvt_f16_f32_e32 v1, v1
569; GFX10-NEXT:    v_cvt_f16_f32_e32 v0, v0
570; GFX10-NEXT:    v_pack_b32_f16 v0, v0, v1
571; GFX10-NEXT:    s_setpc_b64 s[30:31]
572  %x.fneg = fneg <2 x half> %x
573  %y.fneg = fneg <2 x half> %y
574  %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg)
575  ret <2 x half> %pow
576}
577
578; FIXME
579; define double @v_pow_f64(double %x, double %y) {
580;   %pow = call double @llvm.pow.f64(double %x, double %y)
581;   ret double %pow
582; }
583
584define float @v_pow_f32_fabs_lhs(float %x, float %y) {
585; GFX6-LABEL: v_pow_f32_fabs_lhs:
586; GFX6:       ; %bb.0:
587; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
588; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
589; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
590; GFX6-NEXT:    v_exp_f32_e32 v0, v0
591; GFX6-NEXT:    s_setpc_b64 s[30:31]
592;
593; GFX8-LABEL: v_pow_f32_fabs_lhs:
594; GFX8:       ; %bb.0:
595; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
596; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
597; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
598; GFX8-NEXT:    v_exp_f32_e32 v0, v0
599; GFX8-NEXT:    s_setpc_b64 s[30:31]
600;
601; GFX9-LABEL: v_pow_f32_fabs_lhs:
602; GFX9:       ; %bb.0:
603; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
604; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
605; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
606; GFX9-NEXT:    v_exp_f32_e32 v0, v0
607; GFX9-NEXT:    s_setpc_b64 s[30:31]
608;
609; GFX90A-LABEL: v_pow_f32_fabs_lhs:
610; GFX90A:       ; %bb.0:
611; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
612; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
613; GFX90A-NEXT:    v_mul_legacy_f32 v0, v1, v0
614; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
615; GFX90A-NEXT:    s_setpc_b64 s[30:31]
616;
617; GFX10-LABEL: v_pow_f32_fabs_lhs:
618; GFX10:       ; %bb.0:
619; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
621; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
622; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
623; GFX10-NEXT:    v_exp_f32_e32 v0, v0
624; GFX10-NEXT:    s_setpc_b64 s[30:31]
625  %fabs.x = call float @llvm.fabs.f32(float %x)
626  %pow = call float @llvm.pow.f32(float %fabs.x, float %y)
627  ret float %pow
628}
629
630define float @v_pow_f32_fabs_rhs(float %x, float %y) {
631; GFX6-LABEL: v_pow_f32_fabs_rhs:
632; GFX6:       ; %bb.0:
633; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
634; GFX6-NEXT:    v_log_f32_e32 v0, v0
635; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
636; GFX6-NEXT:    v_exp_f32_e32 v0, v0
637; GFX6-NEXT:    s_setpc_b64 s[30:31]
638;
639; GFX8-LABEL: v_pow_f32_fabs_rhs:
640; GFX8:       ; %bb.0:
641; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
642; GFX8-NEXT:    v_log_f32_e32 v0, v0
643; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
644; GFX8-NEXT:    v_exp_f32_e32 v0, v0
645; GFX8-NEXT:    s_setpc_b64 s[30:31]
646;
647; GFX9-LABEL: v_pow_f32_fabs_rhs:
648; GFX9:       ; %bb.0:
649; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
650; GFX9-NEXT:    v_log_f32_e32 v0, v0
651; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
652; GFX9-NEXT:    v_exp_f32_e32 v0, v0
653; GFX9-NEXT:    s_setpc_b64 s[30:31]
654;
655; GFX90A-LABEL: v_pow_f32_fabs_rhs:
656; GFX90A:       ; %bb.0:
657; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
658; GFX90A-NEXT:    v_log_f32_e32 v0, v0
659; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
660; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
661; GFX90A-NEXT:    s_setpc_b64 s[30:31]
662;
663; GFX10-LABEL: v_pow_f32_fabs_rhs:
664; GFX10:       ; %bb.0:
665; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
666; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
667; GFX10-NEXT:    v_log_f32_e32 v0, v0
668; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
669; GFX10-NEXT:    v_exp_f32_e32 v0, v0
670; GFX10-NEXT:    s_setpc_b64 s[30:31]
671  %fabs.y = call float @llvm.fabs.f32(float %y)
672  %pow = call float @llvm.pow.f32(float %x, float %fabs.y)
673  ret float %pow
674}
675
676define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) {
677; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs:
678; GFX6:       ; %bb.0:
679; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
680; GFX6-NEXT:    v_log_f32_e64 v0, |v0|
681; GFX6-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
682; GFX6-NEXT:    v_exp_f32_e32 v0, v0
683; GFX6-NEXT:    s_setpc_b64 s[30:31]
684;
685; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs:
686; GFX8:       ; %bb.0:
687; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
688; GFX8-NEXT:    v_log_f32_e64 v0, |v0|
689; GFX8-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
690; GFX8-NEXT:    v_exp_f32_e32 v0, v0
691; GFX8-NEXT:    s_setpc_b64 s[30:31]
692;
693; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs:
694; GFX9:       ; %bb.0:
695; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696; GFX9-NEXT:    v_log_f32_e64 v0, |v0|
697; GFX9-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
698; GFX9-NEXT:    v_exp_f32_e32 v0, v0
699; GFX9-NEXT:    s_setpc_b64 s[30:31]
700;
701; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs:
702; GFX90A:       ; %bb.0:
703; GFX90A-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
704; GFX90A-NEXT:    v_log_f32_e64 v0, |v0|
705; GFX90A-NEXT:    v_mul_legacy_f32 v0, |v1|, v0
706; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
707; GFX90A-NEXT:    s_setpc_b64 s[30:31]
708;
709; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs:
710; GFX10:       ; %bb.0:
711; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
712; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
713; GFX10-NEXT:    v_log_f32_e64 v0, |v0|
714; GFX10-NEXT:    v_mul_legacy_f32_e64 v0, |v1|, v0
715; GFX10-NEXT:    v_exp_f32_e32 v0, v0
716; GFX10-NEXT:    s_setpc_b64 s[30:31]
717  %fabs.x = call float @llvm.fabs.f32(float %x)
718  %fabs.y = call float @llvm.fabs.f32(float %y)
719  %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y)
720  ret float %pow
721}
722
723define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) {
724; GFX6-LABEL: v_pow_f32_sgpr_vgpr:
725; GFX6:       ; %bb.0:
726; GFX6-NEXT:    v_log_f32_e32 v1, s0
727; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
728; GFX6-NEXT:    v_exp_f32_e32 v0, v0
729; GFX6-NEXT:    ; return to shader part epilog
730;
731; GFX8-LABEL: v_pow_f32_sgpr_vgpr:
732; GFX8:       ; %bb.0:
733; GFX8-NEXT:    v_log_f32_e32 v1, s0
734; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
735; GFX8-NEXT:    v_exp_f32_e32 v0, v0
736; GFX8-NEXT:    ; return to shader part epilog
737;
738; GFX9-LABEL: v_pow_f32_sgpr_vgpr:
739; GFX9:       ; %bb.0:
740; GFX9-NEXT:    v_log_f32_e32 v1, s0
741; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
742; GFX9-NEXT:    v_exp_f32_e32 v0, v0
743; GFX9-NEXT:    ; return to shader part epilog
744;
745; GFX90A-LABEL: v_pow_f32_sgpr_vgpr:
746; GFX90A:       ; %bb.0:
747; GFX90A-NEXT:    v_log_f32_e32 v1, s0
748; GFX90A-NEXT:    v_mul_legacy_f32 v0, v0, v1
749; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
750; GFX90A-NEXT:    ; return to shader part epilog
751;
752; GFX10-LABEL: v_pow_f32_sgpr_vgpr:
753; GFX10:       ; %bb.0:
754; GFX10-NEXT:    v_log_f32_e32 v1, s0
755; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, v0, v1
756; GFX10-NEXT:    v_exp_f32_e32 v0, v0
757; GFX10-NEXT:    ; return to shader part epilog
758  %pow = call float @llvm.pow.f32(float %x, float %y)
759  ret float %pow
760}
761
762define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) {
763; GFX6-LABEL: v_pow_f32_vgpr_sgpr:
764; GFX6:       ; %bb.0:
765; GFX6-NEXT:    v_log_f32_e32 v0, v0
766; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
767; GFX6-NEXT:    v_exp_f32_e32 v0, v0
768; GFX6-NEXT:    ; return to shader part epilog
769;
770; GFX8-LABEL: v_pow_f32_vgpr_sgpr:
771; GFX8:       ; %bb.0:
772; GFX8-NEXT:    v_log_f32_e32 v0, v0
773; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
774; GFX8-NEXT:    v_exp_f32_e32 v0, v0
775; GFX8-NEXT:    ; return to shader part epilog
776;
777; GFX9-LABEL: v_pow_f32_vgpr_sgpr:
778; GFX9:       ; %bb.0:
779; GFX9-NEXT:    v_log_f32_e32 v0, v0
780; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
781; GFX9-NEXT:    v_exp_f32_e32 v0, v0
782; GFX9-NEXT:    ; return to shader part epilog
783;
784; GFX90A-LABEL: v_pow_f32_vgpr_sgpr:
785; GFX90A:       ; %bb.0:
786; GFX90A-NEXT:    v_log_f32_e32 v0, v0
787; GFX90A-NEXT:    v_mul_legacy_f32 v0, s0, v0
788; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
789; GFX90A-NEXT:    ; return to shader part epilog
790;
791; GFX10-LABEL: v_pow_f32_vgpr_sgpr:
792; GFX10:       ; %bb.0:
793; GFX10-NEXT:    v_log_f32_e32 v0, v0
794; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s0, v0
795; GFX10-NEXT:    v_exp_f32_e32 v0, v0
796; GFX10-NEXT:    ; return to shader part epilog
797  %pow = call float @llvm.pow.f32(float %x, float %y)
798  ret float %pow
799}
800
801define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) {
802; GFX6-LABEL: v_pow_f32_sgpr_sgpr:
803; GFX6:       ; %bb.0:
804; GFX6-NEXT:    v_log_f32_e32 v0, s0
805; GFX6-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
806; GFX6-NEXT:    v_exp_f32_e32 v0, v0
807; GFX6-NEXT:    ; return to shader part epilog
808;
809; GFX8-LABEL: v_pow_f32_sgpr_sgpr:
810; GFX8:       ; %bb.0:
811; GFX8-NEXT:    v_log_f32_e32 v0, s0
812; GFX8-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
813; GFX8-NEXT:    v_exp_f32_e32 v0, v0
814; GFX8-NEXT:    ; return to shader part epilog
815;
816; GFX9-LABEL: v_pow_f32_sgpr_sgpr:
817; GFX9:       ; %bb.0:
818; GFX9-NEXT:    v_log_f32_e32 v0, s0
819; GFX9-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
820; GFX9-NEXT:    v_exp_f32_e32 v0, v0
821; GFX9-NEXT:    ; return to shader part epilog
822;
823; GFX90A-LABEL: v_pow_f32_sgpr_sgpr:
824; GFX90A:       ; %bb.0:
825; GFX90A-NEXT:    v_log_f32_e32 v0, s0
826; GFX90A-NEXT:    v_mul_legacy_f32 v0, s1, v0
827; GFX90A-NEXT:    v_exp_f32_e32 v0, v0
828; GFX90A-NEXT:    ; return to shader part epilog
829;
830; GFX10-LABEL: v_pow_f32_sgpr_sgpr:
831; GFX10:       ; %bb.0:
832; GFX10-NEXT:    v_log_f32_e32 v0, s0
833; GFX10-NEXT:    v_mul_legacy_f32_e32 v0, s1, v0
834; GFX10-NEXT:    v_exp_f32_e32 v0, v0
835; GFX10-NEXT:    ; return to shader part epilog
836  %pow = call float @llvm.pow.f32(float %x, float %y)
837  ret float %pow
838}
839
840declare half @llvm.pow.f16(half, half)
841declare float @llvm.pow.f32(float, float)
842declare double @llvm.pow.f64(double, double)
843
844declare half @llvm.fabs.f16(half)
845declare float @llvm.fabs.f32(float)
846
847declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>)
848declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>)
849