1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s 6; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 7; RUN: llc -march=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s 8 9define float @v_pow_f32(float %x, float %y) { 10; GFX6-LABEL: v_pow_f32: 11; GFX6: ; %bb.0: 12; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GFX6-NEXT: v_log_f32_e32 v0, v0 14; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 15; GFX6-NEXT: v_exp_f32_e32 v0, v0 16; GFX6-NEXT: s_setpc_b64 s[30:31] 17; 18; GFX8-LABEL: v_pow_f32: 19; GFX8: ; %bb.0: 20; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GFX8-NEXT: v_log_f32_e32 v0, v0 22; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 23; GFX8-NEXT: v_exp_f32_e32 v0, v0 24; GFX8-NEXT: s_setpc_b64 s[30:31] 25; 26; GFX9-LABEL: v_pow_f32: 27; GFX9: ; %bb.0: 28; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GFX9-NEXT: v_log_f32_e32 v0, v0 30; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 31; GFX9-NEXT: v_exp_f32_e32 v0, v0 32; GFX9-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX90A-LABEL: v_pow_f32: 35; GFX90A: ; %bb.0: 36; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX90A-NEXT: v_log_f32_e32 v0, v0 38; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 39; GFX90A-NEXT: v_exp_f32_e32 v0, v0 40; GFX90A-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX10-LABEL: v_pow_f32: 43; GFX10: ; %bb.0: 44; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 46; GFX10-NEXT: v_log_f32_e32 v0, v0 47; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 48; GFX10-NEXT: v_exp_f32_e32 v0, v0 49; GFX10-NEXT: s_setpc_b64 s[30:31] 50; 51; GFX11-LABEL: v_pow_f32: 52; GFX11: ; %bb.0: 53; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 55; GFX11-NEXT: v_log_f32_e32 v0, v0 56; GFX11-NEXT: s_waitcnt_depctr 0xfff 57; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 58; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 59; GFX11-NEXT: v_exp_f32_e32 v0, v0 60; GFX11-NEXT: s_setpc_b64 s[30:31] 61 %pow = call float @llvm.pow.f32(float %x, float %y) 62 ret float %pow 63} 64 65define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { 66; GFX6-LABEL: v_pow_v2f32: 67; GFX6: ; %bb.0: 68; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX6-NEXT: v_log_f32_e32 v0, v0 70; GFX6-NEXT: v_log_f32_e32 v1, v1 71; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 72; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 73; GFX6-NEXT: v_exp_f32_e32 v0, v0 74; GFX6-NEXT: v_exp_f32_e32 v1, v1 75; GFX6-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX8-LABEL: v_pow_v2f32: 78; GFX8: ; %bb.0: 79; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX8-NEXT: v_log_f32_e32 v0, v0 81; GFX8-NEXT: v_log_f32_e32 v1, v1 82; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 83; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 84; GFX8-NEXT: v_exp_f32_e32 v0, v0 85; GFX8-NEXT: v_exp_f32_e32 v1, v1 86; GFX8-NEXT: s_setpc_b64 s[30:31] 87; 88; GFX9-LABEL: v_pow_v2f32: 89; GFX9: ; %bb.0: 90; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GFX9-NEXT: v_log_f32_e32 v0, v0 92; GFX9-NEXT: v_log_f32_e32 v1, v1 93; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 94; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 95; GFX9-NEXT: v_exp_f32_e32 v0, v0 96; GFX9-NEXT: v_exp_f32_e32 v1, v1 97; GFX9-NEXT: s_setpc_b64 s[30:31] 98; 99; GFX90A-LABEL: v_pow_v2f32: 100; GFX90A: ; %bb.0: 101; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; GFX90A-NEXT: v_log_f32_e32 v0, v0 103; GFX90A-NEXT: v_log_f32_e32 v1, v1 104; GFX90A-NEXT: v_mul_legacy_f32 v0, v2, v0 105; GFX90A-NEXT: v_mul_legacy_f32 v1, v3, v1 106; GFX90A-NEXT: v_exp_f32_e32 v0, v0 107; GFX90A-NEXT: v_exp_f32_e32 v1, v1 108; GFX90A-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX10-LABEL: v_pow_v2f32: 111; GFX10: ; %bb.0: 112; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 114; GFX10-NEXT: v_log_f32_e32 v0, v0 115; GFX10-NEXT: v_log_f32_e32 v1, v1 116; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 117; GFX10-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 118; GFX10-NEXT: v_exp_f32_e32 v0, v0 119; GFX10-NEXT: v_exp_f32_e32 v1, v1 120; GFX10-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX11-LABEL: v_pow_v2f32: 123; GFX11: ; %bb.0: 124; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 126; GFX11-NEXT: v_log_f32_e32 v0, v0 127; GFX11-NEXT: v_log_f32_e32 v1, v1 128; GFX11-NEXT: s_waitcnt_depctr 0xfff 129; GFX11-NEXT: v_dual_mul_dx9_zero_f32 v0, v2, v0 :: v_dual_mul_dx9_zero_f32 v1, v3, v1 130; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 131; GFX11-NEXT: v_exp_f32_e32 v0, v0 132; GFX11-NEXT: v_exp_f32_e32 v1, v1 133; GFX11-NEXT: s_setpc_b64 s[30:31] 134 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) 135 ret <2 x float> %pow 136} 137 138define half @v_pow_f16(half %x, half %y) { 139; GFX6-LABEL: v_pow_f16: 140; GFX6: ; %bb.0: 141; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 143; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 144; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 145; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 146; GFX6-NEXT: v_log_f32_e32 v0, v0 147; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 148; GFX6-NEXT: v_exp_f32_e32 v0, v0 149; GFX6-NEXT: s_setpc_b64 s[30:31] 150; 151; GFX8-LABEL: v_pow_f16: 152; GFX8: ; %bb.0: 153; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 155; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 156; GFX8-NEXT: v_log_f32_e32 v0, v0 157; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 158; GFX8-NEXT: v_exp_f32_e32 v0, v0 159; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 160; GFX8-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX9-LABEL: v_pow_f16: 163; GFX9: ; %bb.0: 164; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 166; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 167; GFX9-NEXT: v_log_f32_e32 v0, v0 168; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 169; GFX9-NEXT: v_exp_f32_e32 v0, v0 170; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 171; GFX9-NEXT: s_setpc_b64 s[30:31] 172; 173; GFX90A-LABEL: v_pow_f16: 174; GFX90A: ; %bb.0: 175; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 177; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 178; GFX90A-NEXT: v_log_f32_e32 v0, v0 179; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 180; GFX90A-NEXT: v_exp_f32_e32 v0, v0 181; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 182; GFX90A-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX10-LABEL: v_pow_f16: 185; GFX10: ; %bb.0: 186; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 188; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 189; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 190; GFX10-NEXT: v_log_f32_e32 v0, v0 191; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 192; GFX10-NEXT: v_exp_f32_e32 v0, v0 193; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 194; GFX10-NEXT: s_setpc_b64 s[30:31] 195; 196; GFX11-LABEL: v_pow_f16: 197; GFX11: ; %bb.0: 198; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 200; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 201; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 202; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 203; GFX11-NEXT: v_log_f32_e32 v0, v0 204; GFX11-NEXT: s_waitcnt_depctr 0xfff 205; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 206; GFX11-NEXT: v_exp_f32_e32 v0, v0 207; GFX11-NEXT: s_waitcnt_depctr 0xfff 208; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 209; GFX11-NEXT: s_setpc_b64 s[30:31] 210 %pow = call half @llvm.pow.f16(half %x, half %y) 211 ret half %pow 212} 213 214define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { 215; GFX6-LABEL: v_pow_v2f16: 216; GFX6: ; %bb.0: 217; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 218; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 219; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 220; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 221; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 222; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 223; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 224; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 225; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 226; GFX6-NEXT: v_log_f32_e32 v0, v0 227; GFX6-NEXT: v_log_f32_e32 v1, v1 228; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 229; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 230; GFX6-NEXT: v_exp_f32_e32 v0, v0 231; GFX6-NEXT: v_exp_f32_e32 v1, v1 232; GFX6-NEXT: s_setpc_b64 s[30:31] 233; 234; GFX8-LABEL: v_pow_v2f16: 235; GFX8: ; %bb.0: 236; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 237; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 238; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 239; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 240; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 241; GFX8-NEXT: v_log_f32_e32 v2, v2 242; GFX8-NEXT: v_log_f32_e32 v0, v0 243; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 244; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 245; GFX8-NEXT: v_exp_f32_e32 v1, v2 246; GFX8-NEXT: v_exp_f32_e32 v0, v0 247; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 248; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 249; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 250; GFX8-NEXT: s_setpc_b64 s[30:31] 251; 252; GFX9-LABEL: v_pow_v2f16: 253; GFX9: ; %bb.0: 254; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 255; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 256; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 257; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 258; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 259; GFX9-NEXT: v_log_f32_e32 v2, v2 260; GFX9-NEXT: v_log_f32_e32 v0, v0 261; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 262; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 263; GFX9-NEXT: v_exp_f32_e32 v1, v2 264; GFX9-NEXT: v_exp_f32_e32 v0, v0 265; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 266; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 267; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 268; GFX9-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX90A-LABEL: v_pow_v2f16: 271; GFX90A: ; %bb.0: 272; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 274; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 275; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 276; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 277; GFX90A-NEXT: v_log_f32_e32 v2, v2 278; GFX90A-NEXT: v_log_f32_e32 v0, v0 279; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 280; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 281; GFX90A-NEXT: v_exp_f32_e32 v1, v2 282; GFX90A-NEXT: v_exp_f32_e32 v0, v0 283; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 284; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 285; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 286; GFX90A-NEXT: s_setpc_b64 s[30:31] 287; 288; GFX10-LABEL: v_pow_v2f16: 289; GFX10: ; %bb.0: 290; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 291; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 292; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 293; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 294; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 295; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 296; GFX10-NEXT: v_log_f32_e32 v2, v2 297; GFX10-NEXT: v_log_f32_e32 v0, v0 298; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 299; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 300; GFX10-NEXT: v_exp_f32_e32 v1, v2 301; GFX10-NEXT: v_exp_f32_e32 v0, v0 302; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 303; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 304; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 305; GFX10-NEXT: s_setpc_b64 s[30:31] 306; 307; GFX11-LABEL: v_pow_v2f16: 308; GFX11: ; %bb.0: 309; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 310; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 311; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 312; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 313; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 314; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 315; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 316; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2 317; GFX11-NEXT: v_log_f32_e32 v0, v0 318; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 319; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3 320; GFX11-NEXT: v_log_f32_e32 v2, v2 321; GFX11-NEXT: s_waitcnt_depctr 0xfff 322; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 323; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 324; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 325; GFX11-NEXT: v_exp_f32_e32 v0, v0 326; GFX11-NEXT: v_exp_f32_e32 v1, v2 327; GFX11-NEXT: s_waitcnt_depctr 0xfff 328; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 329; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 330; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 331; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 332; GFX11-NEXT: s_setpc_b64 s[30:31] 333 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) 334 ret <2 x half> %pow 335} 336 337define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { 338; GFX6-LABEL: v_pow_v2f16_fneg_lhs: 339; GFX6: ; %bb.0: 340; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 342; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 343; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 344; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 345; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 346; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 347; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v3 348; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 349; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 350; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 351; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 352; GFX6-NEXT: v_log_f32_e32 v3, v3 353; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 354; GFX6-NEXT: v_log_f32_e32 v4, v0 355; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v3 356; GFX6-NEXT: v_exp_f32_e32 v0, v0 357; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v4 358; GFX6-NEXT: v_exp_f32_e32 v1, v1 359; GFX6-NEXT: s_setpc_b64 s[30:31] 360; 361; GFX8-LABEL: v_pow_v2f16_fneg_lhs: 362; GFX8: ; %bb.0: 363; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 365; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 366; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 367; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 368; GFX8-NEXT: v_log_f32_e32 v2, v2 369; GFX8-NEXT: v_log_f32_e32 v0, v0 370; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 371; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 372; GFX8-NEXT: v_exp_f32_e32 v1, v2 373; GFX8-NEXT: v_exp_f32_e32 v0, v0 374; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 375; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 376; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 377; GFX8-NEXT: s_setpc_b64 s[30:31] 378; 379; GFX9-LABEL: v_pow_v2f16_fneg_lhs: 380; GFX9: ; %bb.0: 381; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 382; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 383; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 384; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 385; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 386; GFX9-NEXT: v_log_f32_e32 v2, v2 387; GFX9-NEXT: v_log_f32_e32 v0, v0 388; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 389; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 390; GFX9-NEXT: v_exp_f32_e32 v1, v2 391; GFX9-NEXT: v_exp_f32_e32 v0, v0 392; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 393; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 394; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 395; GFX9-NEXT: s_setpc_b64 s[30:31] 396; 397; GFX90A-LABEL: v_pow_v2f16_fneg_lhs: 398; GFX90A: ; %bb.0: 399; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 400; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 401; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 402; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 403; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 404; GFX90A-NEXT: v_log_f32_e32 v2, v2 405; GFX90A-NEXT: v_log_f32_e32 v0, v0 406; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 407; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 408; GFX90A-NEXT: v_exp_f32_e32 v1, v2 409; GFX90A-NEXT: v_exp_f32_e32 v0, v0 410; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 411; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 412; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 413; GFX90A-NEXT: s_setpc_b64 s[30:31] 414; 415; GFX10-LABEL: v_pow_v2f16_fneg_lhs: 416; GFX10: ; %bb.0: 417; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 418; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 419; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 420; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 421; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 422; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 423; GFX10-NEXT: v_log_f32_e32 v2, v2 424; GFX10-NEXT: v_log_f32_e32 v0, v0 425; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 426; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 427; GFX10-NEXT: v_exp_f32_e32 v1, v2 428; GFX10-NEXT: v_exp_f32_e32 v0, v0 429; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 430; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 431; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 432; GFX10-NEXT: s_setpc_b64 s[30:31] 433; 434; GFX11-LABEL: v_pow_v2f16_fneg_lhs: 435; GFX11: ; %bb.0: 436; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 438; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 439; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0 440; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 441; GFX11-NEXT: v_cvt_f32_f16_e32 v1, v1 442; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 443; GFX11-NEXT: v_cvt_f32_f16_e64 v2, -v2 444; GFX11-NEXT: v_log_f32_e32 v0, v0 445; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 446; GFX11-NEXT: v_cvt_f32_f16_e32 v3, v3 447; GFX11-NEXT: v_log_f32_e32 v2, v2 448; GFX11-NEXT: s_waitcnt_depctr 0xfff 449; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 450; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 451; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 452; GFX11-NEXT: v_exp_f32_e32 v0, v0 453; GFX11-NEXT: v_exp_f32_e32 v1, v2 454; GFX11-NEXT: s_waitcnt_depctr 0xfff 455; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 456; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 457; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 458; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 459; GFX11-NEXT: s_setpc_b64 s[30:31] 460 %x.fneg = fneg <2 x half> %x 461 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) 462 ret <2 x half> %pow 463} 464 465define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { 466; GFX6-LABEL: v_pow_v2f16_fneg_rhs: 467; GFX6: ; %bb.0: 468; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 469; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 470; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 471; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 472; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 473; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 474; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 475; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 476; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 477; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 478; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 479; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 480; GFX6-NEXT: v_log_f32_e32 v0, v0 481; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 482; GFX6-NEXT: v_log_f32_e32 v1, v1 483; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 484; GFX6-NEXT: v_exp_f32_e32 v0, v0 485; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 486; GFX6-NEXT: v_exp_f32_e32 v1, v1 487; GFX6-NEXT: s_setpc_b64 s[30:31] 488; 489; GFX8-LABEL: v_pow_v2f16_fneg_rhs: 490; GFX8: ; %bb.0: 491; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 492; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 493; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 494; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 495; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 496; GFX8-NEXT: v_log_f32_e32 v2, v2 497; GFX8-NEXT: v_log_f32_e32 v0, v0 498; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 499; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 500; GFX8-NEXT: v_exp_f32_e32 v1, v2 501; GFX8-NEXT: v_exp_f32_e32 v0, v0 502; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 503; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 504; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 505; GFX8-NEXT: s_setpc_b64 s[30:31] 506; 507; GFX9-LABEL: v_pow_v2f16_fneg_rhs: 508; GFX9: ; %bb.0: 509; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 511; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 512; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 513; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 514; GFX9-NEXT: v_log_f32_e32 v2, v2 515; GFX9-NEXT: v_log_f32_e32 v0, v0 516; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 517; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 518; GFX9-NEXT: v_exp_f32_e32 v1, v2 519; GFX9-NEXT: v_exp_f32_e32 v0, v0 520; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 521; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 522; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 523; GFX9-NEXT: s_setpc_b64 s[30:31] 524; 525; GFX90A-LABEL: v_pow_v2f16_fneg_rhs: 526; GFX90A: ; %bb.0: 527; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 529; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 530; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 531; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 532; GFX90A-NEXT: v_log_f32_e32 v2, v2 533; GFX90A-NEXT: v_log_f32_e32 v0, v0 534; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 535; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 536; GFX90A-NEXT: v_exp_f32_e32 v1, v2 537; GFX90A-NEXT: v_exp_f32_e32 v0, v0 538; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 539; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 540; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 541; GFX90A-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX10-LABEL: v_pow_v2f16_fneg_rhs: 544; GFX10: ; %bb.0: 545; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 547; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 548; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 549; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 550; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 551; GFX10-NEXT: v_log_f32_e32 v2, v2 552; GFX10-NEXT: v_log_f32_e32 v0, v0 553; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 554; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 555; GFX10-NEXT: v_exp_f32_e32 v1, v2 556; GFX10-NEXT: v_exp_f32_e32 v0, v0 557; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 558; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 559; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 560; GFX10-NEXT: s_setpc_b64 s[30:31] 561; 562; GFX11-LABEL: v_pow_v2f16_fneg_rhs: 563; GFX11: ; %bb.0: 564; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 565; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 566; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 567; GFX11-NEXT: v_cvt_f32_f16_e32 v0, v0 568; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 569; GFX11-NEXT: v_cvt_f32_f16_e64 v1, -v1 570; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 571; GFX11-NEXT: v_cvt_f32_f16_e32 v2, v2 572; GFX11-NEXT: v_log_f32_e32 v0, v0 573; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 574; GFX11-NEXT: v_cvt_f32_f16_e64 v3, -v3 575; GFX11-NEXT: v_log_f32_e32 v2, v2 576; GFX11-NEXT: s_waitcnt_depctr 0xfff 577; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 578; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 579; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 580; GFX11-NEXT: v_exp_f32_e32 v0, v0 581; GFX11-NEXT: v_exp_f32_e32 v1, v2 582; GFX11-NEXT: s_waitcnt_depctr 0xfff 583; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 584; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 585; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 586; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 587; GFX11-NEXT: s_setpc_b64 s[30:31] 588 %y.fneg = fneg <2 x half> %y 589 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) 590 ret <2 x half> %pow 591} 592 593define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { 594; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: 595; GFX6: ; %bb.0: 596; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 598; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 599; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 600; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 601; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 602; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 603; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 604; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 605; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 606; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 607; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 608; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 609; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 610; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 611; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 612; GFX6-NEXT: v_log_f32_e32 v0, v0 613; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 614; GFX6-NEXT: v_log_f32_e32 v1, v1 615; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 616; GFX6-NEXT: v_exp_f32_e32 v0, v0 617; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 618; GFX6-NEXT: v_exp_f32_e32 v1, v1 619; GFX6-NEXT: s_setpc_b64 s[30:31] 620; 621; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: 622; GFX8: ; %bb.0: 623; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 625; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 626; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 627; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 628; GFX8-NEXT: v_log_f32_e32 v2, v2 629; GFX8-NEXT: v_log_f32_e32 v0, v0 630; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 631; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 632; GFX8-NEXT: v_exp_f32_e32 v1, v2 633; GFX8-NEXT: v_exp_f32_e32 v0, v0 634; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 635; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 636; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 637; GFX8-NEXT: s_setpc_b64 s[30:31] 638; 639; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: 640; GFX9: ; %bb.0: 641; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 642; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 643; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 644; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 645; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 646; GFX9-NEXT: v_log_f32_e32 v2, v2 647; GFX9-NEXT: v_log_f32_e32 v0, v0 648; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 649; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 650; GFX9-NEXT: v_exp_f32_e32 v1, v2 651; GFX9-NEXT: v_exp_f32_e32 v0, v0 652; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 653; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 654; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 655; GFX9-NEXT: s_setpc_b64 s[30:31] 656; 657; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs: 658; GFX90A: ; %bb.0: 659; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 660; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 661; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 662; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 663; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 664; GFX90A-NEXT: v_log_f32_e32 v2, v2 665; GFX90A-NEXT: v_log_f32_e32 v0, v0 666; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 667; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 668; GFX90A-NEXT: v_exp_f32_e32 v1, v2 669; GFX90A-NEXT: v_exp_f32_e32 v0, v0 670; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 671; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 672; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 673; GFX90A-NEXT: s_setpc_b64 s[30:31] 674; 675; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: 676; GFX10: ; %bb.0: 677; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 679; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 680; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 681; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 682; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 683; GFX10-NEXT: v_log_f32_e32 v2, v2 684; GFX10-NEXT: v_log_f32_e32 v0, v0 685; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 686; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 687; GFX10-NEXT: v_exp_f32_e32 v1, v2 688; GFX10-NEXT: v_exp_f32_e32 v0, v0 689; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 690; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 691; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 692; GFX10-NEXT: s_setpc_b64 s[30:31] 693; 694; GFX11-LABEL: v_pow_v2f16_fneg_lhs_rhs: 695; GFX11: ; %bb.0: 696; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 697; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 698; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v0 699; GFX11-NEXT: v_cvt_f32_f16_e64 v0, -v0 700; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v1 701; GFX11-NEXT: v_cvt_f32_f16_e64 v1, -v1 702; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 703; GFX11-NEXT: v_cvt_f32_f16_e64 v2, -v2 704; GFX11-NEXT: v_log_f32_e32 v0, v0 705; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 706; GFX11-NEXT: v_cvt_f32_f16_e64 v3, -v3 707; GFX11-NEXT: v_log_f32_e32 v2, v2 708; GFX11-NEXT: s_waitcnt_depctr 0xfff 709; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 710; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v2, v3, v2 711; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 712; GFX11-NEXT: v_exp_f32_e32 v0, v0 713; GFX11-NEXT: v_exp_f32_e32 v1, v2 714; GFX11-NEXT: s_waitcnt_depctr 0xfff 715; GFX11-NEXT: v_cvt_f16_f32_e32 v0, v0 716; GFX11-NEXT: v_cvt_f16_f32_e32 v1, v1 717; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 718; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 719; GFX11-NEXT: s_setpc_b64 s[30:31] 720 %x.fneg = fneg <2 x half> %x 721 %y.fneg = fneg <2 x half> %y 722 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) 723 ret <2 x half> %pow 724} 725 726; FIXME 727; define double @v_pow_f64(double %x, double %y) { 728; %pow = call double @llvm.pow.f64(double %x, double %y) 729; ret double %pow 730; } 731 732define float @v_pow_f32_fabs_lhs(float %x, float %y) { 733; GFX6-LABEL: v_pow_f32_fabs_lhs: 734; GFX6: ; %bb.0: 735; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 736; GFX6-NEXT: v_log_f32_e64 v0, |v0| 737; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 738; GFX6-NEXT: v_exp_f32_e32 v0, v0 739; GFX6-NEXT: s_setpc_b64 s[30:31] 740; 741; GFX8-LABEL: v_pow_f32_fabs_lhs: 742; GFX8: ; %bb.0: 743; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 744; GFX8-NEXT: v_log_f32_e64 v0, |v0| 745; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 746; GFX8-NEXT: v_exp_f32_e32 v0, v0 747; GFX8-NEXT: s_setpc_b64 s[30:31] 748; 749; GFX9-LABEL: v_pow_f32_fabs_lhs: 750; GFX9: ; %bb.0: 751; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 752; GFX9-NEXT: v_log_f32_e64 v0, |v0| 753; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 754; GFX9-NEXT: v_exp_f32_e32 v0, v0 755; GFX9-NEXT: s_setpc_b64 s[30:31] 756; 757; GFX90A-LABEL: v_pow_f32_fabs_lhs: 758; GFX90A: ; %bb.0: 759; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 760; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 761; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 762; GFX90A-NEXT: v_exp_f32_e32 v0, v0 763; GFX90A-NEXT: s_setpc_b64 s[30:31] 764; 765; GFX10-LABEL: v_pow_f32_fabs_lhs: 766; GFX10: ; %bb.0: 767; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 768; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 769; GFX10-NEXT: v_log_f32_e64 v0, |v0| 770; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 771; GFX10-NEXT: v_exp_f32_e32 v0, v0 772; GFX10-NEXT: s_setpc_b64 s[30:31] 773; 774; GFX11-LABEL: v_pow_f32_fabs_lhs: 775; GFX11: ; %bb.0: 776; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 778; GFX11-NEXT: v_log_f32_e64 v0, |v0| 779; GFX11-NEXT: s_waitcnt_depctr 0xfff 780; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v1, v0 781; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 782; GFX11-NEXT: v_exp_f32_e32 v0, v0 783; GFX11-NEXT: s_setpc_b64 s[30:31] 784 %fabs.x = call float @llvm.fabs.f32(float %x) 785 %pow = call float @llvm.pow.f32(float %fabs.x, float %y) 786 ret float %pow 787} 788 789define float @v_pow_f32_fabs_rhs(float %x, float %y) { 790; GFX6-LABEL: v_pow_f32_fabs_rhs: 791; GFX6: ; %bb.0: 792; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 793; GFX6-NEXT: v_log_f32_e32 v0, v0 794; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 795; GFX6-NEXT: v_exp_f32_e32 v0, v0 796; GFX6-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX8-LABEL: v_pow_f32_fabs_rhs: 799; GFX8: ; %bb.0: 800; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX8-NEXT: v_log_f32_e32 v0, v0 802; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 803; GFX8-NEXT: v_exp_f32_e32 v0, v0 804; GFX8-NEXT: s_setpc_b64 s[30:31] 805; 806; GFX9-LABEL: v_pow_f32_fabs_rhs: 807; GFX9: ; %bb.0: 808; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 809; GFX9-NEXT: v_log_f32_e32 v0, v0 810; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 811; GFX9-NEXT: v_exp_f32_e32 v0, v0 812; GFX9-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX90A-LABEL: v_pow_f32_fabs_rhs: 815; GFX90A: ; %bb.0: 816; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX90A-NEXT: v_log_f32_e32 v0, v0 818; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 819; GFX90A-NEXT: v_exp_f32_e32 v0, v0 820; GFX90A-NEXT: s_setpc_b64 s[30:31] 821; 822; GFX10-LABEL: v_pow_f32_fabs_rhs: 823; GFX10: ; %bb.0: 824; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 825; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 826; GFX10-NEXT: v_log_f32_e32 v0, v0 827; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 828; GFX10-NEXT: v_exp_f32_e32 v0, v0 829; GFX10-NEXT: s_setpc_b64 s[30:31] 830; 831; GFX11-LABEL: v_pow_f32_fabs_rhs: 832; GFX11: ; %bb.0: 833; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 834; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 835; GFX11-NEXT: v_log_f32_e32 v0, v0 836; GFX11-NEXT: s_waitcnt_depctr 0xfff 837; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0 838; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 839; GFX11-NEXT: v_exp_f32_e32 v0, v0 840; GFX11-NEXT: s_setpc_b64 s[30:31] 841 %fabs.y = call float @llvm.fabs.f32(float %y) 842 %pow = call float @llvm.pow.f32(float %x, float %fabs.y) 843 ret float %pow 844} 845 846define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { 847; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: 848; GFX6: ; %bb.0: 849; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GFX6-NEXT: v_log_f32_e64 v0, |v0| 851; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 852; GFX6-NEXT: v_exp_f32_e32 v0, v0 853; GFX6-NEXT: s_setpc_b64 s[30:31] 854; 855; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: 856; GFX8: ; %bb.0: 857; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 858; GFX8-NEXT: v_log_f32_e64 v0, |v0| 859; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 860; GFX8-NEXT: v_exp_f32_e32 v0, v0 861; GFX8-NEXT: s_setpc_b64 s[30:31] 862; 863; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: 864; GFX9: ; %bb.0: 865; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 866; GFX9-NEXT: v_log_f32_e64 v0, |v0| 867; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 868; GFX9-NEXT: v_exp_f32_e32 v0, v0 869; GFX9-NEXT: s_setpc_b64 s[30:31] 870; 871; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs: 872; GFX90A: ; %bb.0: 873; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 874; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 875; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 876; GFX90A-NEXT: v_exp_f32_e32 v0, v0 877; GFX90A-NEXT: s_setpc_b64 s[30:31] 878; 879; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: 880; GFX10: ; %bb.0: 881; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 882; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 883; GFX10-NEXT: v_log_f32_e64 v0, |v0| 884; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 885; GFX10-NEXT: v_exp_f32_e32 v0, v0 886; GFX10-NEXT: s_setpc_b64 s[30:31] 887; 888; GFX11-LABEL: v_pow_f32_fabs_lhs_rhs: 889; GFX11: ; %bb.0: 890; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 891; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 892; GFX11-NEXT: v_log_f32_e64 v0, |v0| 893; GFX11-NEXT: s_waitcnt_depctr 0xfff 894; GFX11-NEXT: v_mul_dx9_zero_f32_e64 v0, |v1|, v0 895; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 896; GFX11-NEXT: v_exp_f32_e32 v0, v0 897; GFX11-NEXT: s_setpc_b64 s[30:31] 898 %fabs.x = call float @llvm.fabs.f32(float %x) 899 %fabs.y = call float @llvm.fabs.f32(float %y) 900 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) 901 ret float %pow 902} 903 904define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { 905; GFX6-LABEL: v_pow_f32_sgpr_vgpr: 906; GFX6: ; %bb.0: 907; GFX6-NEXT: v_log_f32_e32 v1, s0 908; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 909; GFX6-NEXT: v_exp_f32_e32 v0, v0 910; GFX6-NEXT: ; return to shader part epilog 911; 912; GFX8-LABEL: v_pow_f32_sgpr_vgpr: 913; GFX8: ; %bb.0: 914; GFX8-NEXT: v_log_f32_e32 v1, s0 915; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 916; GFX8-NEXT: v_exp_f32_e32 v0, v0 917; GFX8-NEXT: ; return to shader part epilog 918; 919; GFX9-LABEL: v_pow_f32_sgpr_vgpr: 920; GFX9: ; %bb.0: 921; GFX9-NEXT: v_log_f32_e32 v1, s0 922; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 923; GFX9-NEXT: v_exp_f32_e32 v0, v0 924; GFX9-NEXT: ; return to shader part epilog 925; 926; GFX90A-LABEL: v_pow_f32_sgpr_vgpr: 927; GFX90A: ; %bb.0: 928; GFX90A-NEXT: v_log_f32_e32 v1, s0 929; GFX90A-NEXT: v_mul_legacy_f32 v0, v0, v1 930; GFX90A-NEXT: v_exp_f32_e32 v0, v0 931; GFX90A-NEXT: ; return to shader part epilog 932; 933; GFX10-LABEL: v_pow_f32_sgpr_vgpr: 934; GFX10: ; %bb.0: 935; GFX10-NEXT: v_log_f32_e32 v1, s0 936; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 937; GFX10-NEXT: v_exp_f32_e32 v0, v0 938; GFX10-NEXT: ; return to shader part epilog 939; 940; GFX11-LABEL: v_pow_f32_sgpr_vgpr: 941; GFX11: ; %bb.0: 942; GFX11-NEXT: v_log_f32_e32 v1, s0 943; GFX11-NEXT: s_waitcnt_depctr 0xfff 944; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, v0, v1 945; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 946; GFX11-NEXT: v_exp_f32_e32 v0, v0 947; GFX11-NEXT: ; return to shader part epilog 948 %pow = call float @llvm.pow.f32(float %x, float %y) 949 ret float %pow 950} 951 952define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { 953; GFX6-LABEL: v_pow_f32_vgpr_sgpr: 954; GFX6: ; %bb.0: 955; GFX6-NEXT: v_log_f32_e32 v0, v0 956; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 957; GFX6-NEXT: v_exp_f32_e32 v0, v0 958; GFX6-NEXT: ; return to shader part epilog 959; 960; GFX8-LABEL: v_pow_f32_vgpr_sgpr: 961; GFX8: ; %bb.0: 962; GFX8-NEXT: v_log_f32_e32 v0, v0 963; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 964; GFX8-NEXT: v_exp_f32_e32 v0, v0 965; GFX8-NEXT: ; return to shader part epilog 966; 967; GFX9-LABEL: v_pow_f32_vgpr_sgpr: 968; GFX9: ; %bb.0: 969; GFX9-NEXT: v_log_f32_e32 v0, v0 970; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 971; GFX9-NEXT: v_exp_f32_e32 v0, v0 972; GFX9-NEXT: ; return to shader part epilog 973; 974; GFX90A-LABEL: v_pow_f32_vgpr_sgpr: 975; GFX90A: ; %bb.0: 976; GFX90A-NEXT: v_log_f32_e32 v0, v0 977; GFX90A-NEXT: v_mul_legacy_f32 v0, s0, v0 978; GFX90A-NEXT: v_exp_f32_e32 v0, v0 979; GFX90A-NEXT: ; return to shader part epilog 980; 981; GFX10-LABEL: v_pow_f32_vgpr_sgpr: 982; GFX10: ; %bb.0: 983; GFX10-NEXT: v_log_f32_e32 v0, v0 984; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 985; GFX10-NEXT: v_exp_f32_e32 v0, v0 986; GFX10-NEXT: ; return to shader part epilog 987; 988; GFX11-LABEL: v_pow_f32_vgpr_sgpr: 989; GFX11: ; %bb.0: 990; GFX11-NEXT: v_log_f32_e32 v0, v0 991; GFX11-NEXT: s_waitcnt_depctr 0xfff 992; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s0, v0 993; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 994; GFX11-NEXT: v_exp_f32_e32 v0, v0 995; GFX11-NEXT: ; return to shader part epilog 996 %pow = call float @llvm.pow.f32(float %x, float %y) 997 ret float %pow 998} 999 1000define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { 1001; GFX6-LABEL: v_pow_f32_sgpr_sgpr: 1002; GFX6: ; %bb.0: 1003; GFX6-NEXT: v_log_f32_e32 v0, s0 1004; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 1005; GFX6-NEXT: v_exp_f32_e32 v0, v0 1006; GFX6-NEXT: ; return to shader part epilog 1007; 1008; GFX8-LABEL: v_pow_f32_sgpr_sgpr: 1009; GFX8: ; %bb.0: 1010; GFX8-NEXT: v_log_f32_e32 v0, s0 1011; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 1012; GFX8-NEXT: v_exp_f32_e32 v0, v0 1013; GFX8-NEXT: ; return to shader part epilog 1014; 1015; GFX9-LABEL: v_pow_f32_sgpr_sgpr: 1016; GFX9: ; %bb.0: 1017; GFX9-NEXT: v_log_f32_e32 v0, s0 1018; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 1019; GFX9-NEXT: v_exp_f32_e32 v0, v0 1020; GFX9-NEXT: ; return to shader part epilog 1021; 1022; GFX90A-LABEL: v_pow_f32_sgpr_sgpr: 1023; GFX90A: ; %bb.0: 1024; GFX90A-NEXT: v_log_f32_e32 v0, s0 1025; GFX90A-NEXT: v_mul_legacy_f32 v0, s1, v0 1026; GFX90A-NEXT: v_exp_f32_e32 v0, v0 1027; GFX90A-NEXT: ; return to shader part epilog 1028; 1029; GFX10-LABEL: v_pow_f32_sgpr_sgpr: 1030; GFX10: ; %bb.0: 1031; GFX10-NEXT: v_log_f32_e32 v0, s0 1032; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 1033; GFX10-NEXT: v_exp_f32_e32 v0, v0 1034; GFX10-NEXT: ; return to shader part epilog 1035; 1036; GFX11-LABEL: v_pow_f32_sgpr_sgpr: 1037; GFX11: ; %bb.0: 1038; GFX11-NEXT: v_log_f32_e32 v0, s0 1039; GFX11-NEXT: s_waitcnt_depctr 0xfff 1040; GFX11-NEXT: v_mul_dx9_zero_f32_e32 v0, s1, v0 1041; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1042; GFX11-NEXT: v_exp_f32_e32 v0, v0 1043; GFX11-NEXT: ; return to shader part epilog 1044 %pow = call float @llvm.pow.f32(float %x, float %y) 1045 ret float %pow 1046} 1047 1048declare half @llvm.pow.f16(half, half) 1049declare float @llvm.pow.f32(float, float) 1050declare double @llvm.pow.f64(double, double) 1051 1052declare half @llvm.fabs.f16(half) 1053declare float @llvm.fabs.f32(float) 1054 1055declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) 1056declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 1057