1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s 6; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 7 8define float @v_pow_f32(float %x, float %y) { 9; GFX6-LABEL: v_pow_f32: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX6-NEXT: v_log_f32_e32 v0, v0 13; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 14; GFX6-NEXT: v_exp_f32_e32 v0, v0 15; GFX6-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX8-LABEL: v_pow_f32: 18; GFX8: ; %bb.0: 19; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX8-NEXT: v_log_f32_e32 v0, v0 21; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 22; GFX8-NEXT: v_exp_f32_e32 v0, v0 23; GFX8-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX9-LABEL: v_pow_f32: 26; GFX9: ; %bb.0: 27; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX9-NEXT: v_log_f32_e32 v0, v0 29; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 30; GFX9-NEXT: v_exp_f32_e32 v0, v0 31; GFX9-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX90A-LABEL: v_pow_f32: 34; GFX90A: ; %bb.0: 35; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX90A-NEXT: v_log_f32_e32 v0, v0 37; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 38; GFX90A-NEXT: v_exp_f32_e32 v0, v0 39; GFX90A-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX10-LABEL: v_pow_f32: 42; GFX10: ; %bb.0: 43; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 45; GFX10-NEXT: v_log_f32_e32 v0, v0 46; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 47; GFX10-NEXT: v_exp_f32_e32 v0, v0 48; GFX10-NEXT: s_setpc_b64 s[30:31] 49 %pow = call float @llvm.pow.f32(float %x, float %y) 50 ret float %pow 51} 52 53define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { 54; GFX6-LABEL: v_pow_v2f32: 55; GFX6: ; %bb.0: 56; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX6-NEXT: v_log_f32_e32 v0, v0 58; GFX6-NEXT: v_log_f32_e32 v1, v1 59; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 60; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 61; GFX6-NEXT: v_exp_f32_e32 v0, v0 62; GFX6-NEXT: v_exp_f32_e32 v1, v1 63; GFX6-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX8-LABEL: v_pow_v2f32: 66; GFX8: ; %bb.0: 67; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX8-NEXT: v_log_f32_e32 v0, v0 69; GFX8-NEXT: v_log_f32_e32 v1, v1 70; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 71; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 72; GFX8-NEXT: v_exp_f32_e32 v0, v0 73; GFX8-NEXT: v_exp_f32_e32 v1, v1 74; GFX8-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX9-LABEL: v_pow_v2f32: 77; GFX9: ; %bb.0: 78; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-NEXT: v_log_f32_e32 v0, v0 80; GFX9-NEXT: v_log_f32_e32 v1, v1 81; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 82; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 83; GFX9-NEXT: v_exp_f32_e32 v0, v0 84; GFX9-NEXT: v_exp_f32_e32 v1, v1 85; GFX9-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX90A-LABEL: v_pow_v2f32: 88; GFX90A: ; %bb.0: 89; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX90A-NEXT: v_log_f32_e32 v0, v0 91; GFX90A-NEXT: v_log_f32_e32 v1, v1 92; GFX90A-NEXT: v_mul_legacy_f32 v0, v2, v0 93; GFX90A-NEXT: v_mul_legacy_f32 v1, v3, v1 94; GFX90A-NEXT: v_exp_f32_e32 v0, v0 95; GFX90A-NEXT: v_exp_f32_e32 v1, v1 96; GFX90A-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX10-LABEL: v_pow_v2f32: 99; GFX10: ; %bb.0: 100; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 102; GFX10-NEXT: v_log_f32_e32 v0, v0 103; GFX10-NEXT: v_log_f32_e32 v1, v1 104; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 105; GFX10-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 106; GFX10-NEXT: v_exp_f32_e32 v0, v0 107; GFX10-NEXT: v_exp_f32_e32 v1, v1 108; GFX10-NEXT: s_setpc_b64 s[30:31] 109 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) 110 ret <2 x float> %pow 111} 112 113define half @v_pow_f16(half %x, half %y) { 114; GFX6-LABEL: v_pow_f16: 115; GFX6: ; %bb.0: 116; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 118; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 119; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 120; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 121; GFX6-NEXT: v_log_f32_e32 v0, v0 122; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 123; GFX6-NEXT: v_exp_f32_e32 v0, v0 124; GFX6-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX8-LABEL: v_pow_f16: 127; GFX8: ; %bb.0: 128; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 130; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 131; GFX8-NEXT: v_log_f32_e32 v0, v0 132; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 133; GFX8-NEXT: v_exp_f32_e32 v0, v0 134; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 135; GFX8-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX9-LABEL: v_pow_f16: 138; GFX9: ; %bb.0: 139; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 141; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 142; GFX9-NEXT: v_log_f32_e32 v0, v0 143; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 144; GFX9-NEXT: v_exp_f32_e32 v0, v0 145; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 146; GFX9-NEXT: s_setpc_b64 s[30:31] 147; 148; GFX90A-LABEL: v_pow_f16: 149; GFX90A: ; %bb.0: 150; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 152; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 153; GFX90A-NEXT: v_log_f32_e32 v0, v0 154; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 155; GFX90A-NEXT: v_exp_f32_e32 v0, v0 156; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 157; GFX90A-NEXT: s_setpc_b64 s[30:31] 158; 159; GFX10-LABEL: v_pow_f16: 160; GFX10: ; %bb.0: 161; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 163; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 164; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 165; GFX10-NEXT: v_log_f32_e32 v0, v0 166; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 167; GFX10-NEXT: v_exp_f32_e32 v0, v0 168; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 169; GFX10-NEXT: s_setpc_b64 s[30:31] 170 %pow = call half @llvm.pow.f16(half %x, half %y) 171 ret half %pow 172} 173 174define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { 175; GFX6-LABEL: v_pow_v2f16: 176; GFX6: ; %bb.0: 177; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 179; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 180; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 181; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 182; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 183; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 184; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 185; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 186; GFX6-NEXT: v_log_f32_e32 v0, v0 187; GFX6-NEXT: v_log_f32_e32 v1, v1 188; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 189; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 190; GFX6-NEXT: v_exp_f32_e32 v0, v0 191; GFX6-NEXT: v_exp_f32_e32 v1, v1 192; GFX6-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX8-LABEL: v_pow_v2f16: 195; GFX8: ; %bb.0: 196; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 198; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 199; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 200; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 201; GFX8-NEXT: v_log_f32_e32 v2, v2 202; GFX8-NEXT: v_log_f32_e32 v0, v0 203; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 204; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 205; GFX8-NEXT: v_exp_f32_e32 v1, v2 206; GFX8-NEXT: v_exp_f32_e32 v0, v0 207; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 208; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 209; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 210; GFX8-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX9-LABEL: v_pow_v2f16: 213; GFX9: ; %bb.0: 214; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 216; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 217; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 218; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 219; GFX9-NEXT: v_log_f32_e32 v2, v2 220; GFX9-NEXT: v_log_f32_e32 v0, v0 221; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 222; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 223; GFX9-NEXT: v_exp_f32_e32 v1, v2 224; GFX9-NEXT: v_exp_f32_e32 v0, v0 225; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 226; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 227; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 228; GFX9-NEXT: s_setpc_b64 s[30:31] 229; 230; GFX90A-LABEL: v_pow_v2f16: 231; GFX90A: ; %bb.0: 232; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 234; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 235; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 236; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 237; GFX90A-NEXT: v_log_f32_e32 v2, v2 238; GFX90A-NEXT: v_log_f32_e32 v0, v0 239; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 240; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 241; GFX90A-NEXT: v_exp_f32_e32 v1, v2 242; GFX90A-NEXT: v_exp_f32_e32 v0, v0 243; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 244; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 245; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 246; GFX90A-NEXT: s_setpc_b64 s[30:31] 247; 248; GFX10-LABEL: v_pow_v2f16: 249; GFX10: ; %bb.0: 250; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 252; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 253; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 254; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 255; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 256; GFX10-NEXT: v_log_f32_e32 v2, v2 257; GFX10-NEXT: v_log_f32_e32 v0, v0 258; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 259; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 260; GFX10-NEXT: v_exp_f32_e32 v1, v2 261; GFX10-NEXT: v_exp_f32_e32 v0, v0 262; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 263; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 264; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 265; GFX10-NEXT: s_setpc_b64 s[30:31] 266 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) 267 ret <2 x half> %pow 268} 269 270define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { 271; GFX6-LABEL: v_pow_v2f16_fneg_lhs: 272; GFX6: ; %bb.0: 273; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 275; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 276; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 277; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 278; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 279; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 280; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v3 281; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 282; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 283; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 284; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 285; GFX6-NEXT: v_log_f32_e32 v3, v3 286; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 287; GFX6-NEXT: v_log_f32_e32 v4, v0 288; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v3 289; GFX6-NEXT: v_exp_f32_e32 v0, v0 290; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v4 291; GFX6-NEXT: v_exp_f32_e32 v1, v1 292; GFX6-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX8-LABEL: v_pow_v2f16_fneg_lhs: 295; GFX8: ; %bb.0: 296; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 298; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 299; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 300; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 301; GFX8-NEXT: v_log_f32_e32 v2, v2 302; GFX8-NEXT: v_log_f32_e32 v0, v0 303; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 304; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 305; GFX8-NEXT: v_exp_f32_e32 v1, v2 306; GFX8-NEXT: v_exp_f32_e32 v0, v0 307; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 308; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 309; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 310; GFX8-NEXT: s_setpc_b64 s[30:31] 311; 312; GFX9-LABEL: v_pow_v2f16_fneg_lhs: 313; GFX9: ; %bb.0: 314; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 316; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 317; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 318; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 319; GFX9-NEXT: v_log_f32_e32 v2, v2 320; GFX9-NEXT: v_log_f32_e32 v0, v0 321; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 322; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 323; GFX9-NEXT: v_exp_f32_e32 v1, v2 324; GFX9-NEXT: v_exp_f32_e32 v0, v0 325; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 326; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 327; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 328; GFX9-NEXT: s_setpc_b64 s[30:31] 329; 330; GFX90A-LABEL: v_pow_v2f16_fneg_lhs: 331; GFX90A: ; %bb.0: 332; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 334; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 335; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 336; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 337; GFX90A-NEXT: v_log_f32_e32 v2, v2 338; GFX90A-NEXT: v_log_f32_e32 v0, v0 339; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 340; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 341; GFX90A-NEXT: v_exp_f32_e32 v1, v2 342; GFX90A-NEXT: v_exp_f32_e32 v0, v0 343; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 344; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 345; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 346; GFX90A-NEXT: s_setpc_b64 s[30:31] 347; 348; GFX10-LABEL: v_pow_v2f16_fneg_lhs: 349; GFX10: ; %bb.0: 350; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 352; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 353; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 354; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 355; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 356; GFX10-NEXT: v_log_f32_e32 v2, v2 357; GFX10-NEXT: v_log_f32_e32 v0, v0 358; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 359; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 360; GFX10-NEXT: v_exp_f32_e32 v1, v2 361; GFX10-NEXT: v_exp_f32_e32 v0, v0 362; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 363; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 364; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 365; GFX10-NEXT: s_setpc_b64 s[30:31] 366 %x.fneg = fneg <2 x half> %x 367 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) 368 ret <2 x half> %pow 369} 370 371define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { 372; GFX6-LABEL: v_pow_v2f16_fneg_rhs: 373; GFX6: ; %bb.0: 374; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 376; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 377; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 378; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 379; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 380; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 381; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 382; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 383; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 384; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 385; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 386; GFX6-NEXT: v_log_f32_e32 v0, v0 387; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 388; GFX6-NEXT: v_log_f32_e32 v1, v1 389; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 390; GFX6-NEXT: v_exp_f32_e32 v0, v0 391; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 392; GFX6-NEXT: v_exp_f32_e32 v1, v1 393; GFX6-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX8-LABEL: v_pow_v2f16_fneg_rhs: 396; GFX8: ; %bb.0: 397; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 399; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 400; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 401; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 402; GFX8-NEXT: v_log_f32_e32 v2, v2 403; GFX8-NEXT: v_log_f32_e32 v0, v0 404; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 405; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 406; GFX8-NEXT: v_exp_f32_e32 v1, v2 407; GFX8-NEXT: v_exp_f32_e32 v0, v0 408; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 409; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 410; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 411; GFX8-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX9-LABEL: v_pow_v2f16_fneg_rhs: 414; GFX9: ; %bb.0: 415; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 417; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 418; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 419; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 420; GFX9-NEXT: v_log_f32_e32 v2, v2 421; GFX9-NEXT: v_log_f32_e32 v0, v0 422; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 423; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 424; GFX9-NEXT: v_exp_f32_e32 v1, v2 425; GFX9-NEXT: v_exp_f32_e32 v0, v0 426; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 427; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 428; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 429; GFX9-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX90A-LABEL: v_pow_v2f16_fneg_rhs: 432; GFX90A: ; %bb.0: 433; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 435; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 436; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 437; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 438; GFX90A-NEXT: v_log_f32_e32 v2, v2 439; GFX90A-NEXT: v_log_f32_e32 v0, v0 440; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 441; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 442; GFX90A-NEXT: v_exp_f32_e32 v1, v2 443; GFX90A-NEXT: v_exp_f32_e32 v0, v0 444; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 445; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 446; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 447; GFX90A-NEXT: s_setpc_b64 s[30:31] 448; 449; GFX10-LABEL: v_pow_v2f16_fneg_rhs: 450; GFX10: ; %bb.0: 451; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 453; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 454; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 455; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 456; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 457; GFX10-NEXT: v_log_f32_e32 v2, v2 458; GFX10-NEXT: v_log_f32_e32 v0, v0 459; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 460; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 461; GFX10-NEXT: v_exp_f32_e32 v1, v2 462; GFX10-NEXT: v_exp_f32_e32 v0, v0 463; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 464; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 465; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 466; GFX10-NEXT: s_setpc_b64 s[30:31] 467 %y.fneg = fneg <2 x half> %y 468 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) 469 ret <2 x half> %pow 470} 471 472define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { 473; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: 474; GFX6: ; %bb.0: 475; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 477; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 478; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 479; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 480; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 481; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 482; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 483; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 484; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 485; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 486; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 487; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 488; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 489; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 490; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 491; GFX6-NEXT: v_log_f32_e32 v0, v0 492; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 493; GFX6-NEXT: v_log_f32_e32 v1, v1 494; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 495; GFX6-NEXT: v_exp_f32_e32 v0, v0 496; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 497; GFX6-NEXT: v_exp_f32_e32 v1, v1 498; GFX6-NEXT: s_setpc_b64 s[30:31] 499; 500; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: 501; GFX8: ; %bb.0: 502; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 503; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 504; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 505; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 506; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 507; GFX8-NEXT: v_log_f32_e32 v2, v2 508; GFX8-NEXT: v_log_f32_e32 v0, v0 509; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 510; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 511; GFX8-NEXT: v_exp_f32_e32 v1, v2 512; GFX8-NEXT: v_exp_f32_e32 v0, v0 513; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 514; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 515; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 516; GFX8-NEXT: s_setpc_b64 s[30:31] 517; 518; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: 519; GFX9: ; %bb.0: 520; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 521; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 522; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 523; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 524; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 525; GFX9-NEXT: v_log_f32_e32 v2, v2 526; GFX9-NEXT: v_log_f32_e32 v0, v0 527; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 528; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 529; GFX9-NEXT: v_exp_f32_e32 v1, v2 530; GFX9-NEXT: v_exp_f32_e32 v0, v0 531; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v1 532; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 533; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 534; GFX9-NEXT: s_setpc_b64 s[30:31] 535; 536; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs: 537; GFX90A: ; %bb.0: 538; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 539; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 540; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 541; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 542; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 543; GFX90A-NEXT: v_log_f32_e32 v2, v2 544; GFX90A-NEXT: v_log_f32_e32 v0, v0 545; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 546; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 547; GFX90A-NEXT: v_exp_f32_e32 v1, v2 548; GFX90A-NEXT: v_exp_f32_e32 v0, v0 549; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v1 550; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 551; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 552; GFX90A-NEXT: s_setpc_b64 s[30:31] 553; 554; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: 555; GFX10: ; %bb.0: 556; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 557; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 558; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 559; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 560; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 561; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 562; GFX10-NEXT: v_log_f32_e32 v2, v2 563; GFX10-NEXT: v_log_f32_e32 v0, v0 564; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 565; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 566; GFX10-NEXT: v_exp_f32_e32 v1, v2 567; GFX10-NEXT: v_exp_f32_e32 v0, v0 568; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 569; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 570; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 571; GFX10-NEXT: s_setpc_b64 s[30:31] 572 %x.fneg = fneg <2 x half> %x 573 %y.fneg = fneg <2 x half> %y 574 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) 575 ret <2 x half> %pow 576} 577 578; FIXME 579; define double @v_pow_f64(double %x, double %y) { 580; %pow = call double @llvm.pow.f64(double %x, double %y) 581; ret double %pow 582; } 583 584define float @v_pow_f32_fabs_lhs(float %x, float %y) { 585; GFX6-LABEL: v_pow_f32_fabs_lhs: 586; GFX6: ; %bb.0: 587; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX6-NEXT: v_log_f32_e64 v0, |v0| 589; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 590; GFX6-NEXT: v_exp_f32_e32 v0, v0 591; GFX6-NEXT: s_setpc_b64 s[30:31] 592; 593; GFX8-LABEL: v_pow_f32_fabs_lhs: 594; GFX8: ; %bb.0: 595; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 596; GFX8-NEXT: v_log_f32_e64 v0, |v0| 597; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 598; GFX8-NEXT: v_exp_f32_e32 v0, v0 599; GFX8-NEXT: s_setpc_b64 s[30:31] 600; 601; GFX9-LABEL: v_pow_f32_fabs_lhs: 602; GFX9: ; %bb.0: 603; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 604; GFX9-NEXT: v_log_f32_e64 v0, |v0| 605; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 606; GFX9-NEXT: v_exp_f32_e32 v0, v0 607; GFX9-NEXT: s_setpc_b64 s[30:31] 608; 609; GFX90A-LABEL: v_pow_f32_fabs_lhs: 610; GFX90A: ; %bb.0: 611; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 613; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 614; GFX90A-NEXT: v_exp_f32_e32 v0, v0 615; GFX90A-NEXT: s_setpc_b64 s[30:31] 616; 617; GFX10-LABEL: v_pow_f32_fabs_lhs: 618; GFX10: ; %bb.0: 619; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 620; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 621; GFX10-NEXT: v_log_f32_e64 v0, |v0| 622; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 623; GFX10-NEXT: v_exp_f32_e32 v0, v0 624; GFX10-NEXT: s_setpc_b64 s[30:31] 625 %fabs.x = call float @llvm.fabs.f32(float %x) 626 %pow = call float @llvm.pow.f32(float %fabs.x, float %y) 627 ret float %pow 628} 629 630define float @v_pow_f32_fabs_rhs(float %x, float %y) { 631; GFX6-LABEL: v_pow_f32_fabs_rhs: 632; GFX6: ; %bb.0: 633; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; GFX6-NEXT: v_log_f32_e32 v0, v0 635; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 636; GFX6-NEXT: v_exp_f32_e32 v0, v0 637; GFX6-NEXT: s_setpc_b64 s[30:31] 638; 639; GFX8-LABEL: v_pow_f32_fabs_rhs: 640; GFX8: ; %bb.0: 641; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 642; GFX8-NEXT: v_log_f32_e32 v0, v0 643; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 644; GFX8-NEXT: v_exp_f32_e32 v0, v0 645; GFX8-NEXT: s_setpc_b64 s[30:31] 646; 647; GFX9-LABEL: v_pow_f32_fabs_rhs: 648; GFX9: ; %bb.0: 649; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 650; GFX9-NEXT: v_log_f32_e32 v0, v0 651; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 652; GFX9-NEXT: v_exp_f32_e32 v0, v0 653; GFX9-NEXT: s_setpc_b64 s[30:31] 654; 655; GFX90A-LABEL: v_pow_f32_fabs_rhs: 656; GFX90A: ; %bb.0: 657; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 658; GFX90A-NEXT: v_log_f32_e32 v0, v0 659; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 660; GFX90A-NEXT: v_exp_f32_e32 v0, v0 661; GFX90A-NEXT: s_setpc_b64 s[30:31] 662; 663; GFX10-LABEL: v_pow_f32_fabs_rhs: 664; GFX10: ; %bb.0: 665; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 666; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 667; GFX10-NEXT: v_log_f32_e32 v0, v0 668; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 669; GFX10-NEXT: v_exp_f32_e32 v0, v0 670; GFX10-NEXT: s_setpc_b64 s[30:31] 671 %fabs.y = call float @llvm.fabs.f32(float %y) 672 %pow = call float @llvm.pow.f32(float %x, float %fabs.y) 673 ret float %pow 674} 675 676define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { 677; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: 678; GFX6: ; %bb.0: 679; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 680; GFX6-NEXT: v_log_f32_e64 v0, |v0| 681; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 682; GFX6-NEXT: v_exp_f32_e32 v0, v0 683; GFX6-NEXT: s_setpc_b64 s[30:31] 684; 685; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: 686; GFX8: ; %bb.0: 687; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX8-NEXT: v_log_f32_e64 v0, |v0| 689; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 690; GFX8-NEXT: v_exp_f32_e32 v0, v0 691; GFX8-NEXT: s_setpc_b64 s[30:31] 692; 693; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: 694; GFX9: ; %bb.0: 695; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; GFX9-NEXT: v_log_f32_e64 v0, |v0| 697; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 698; GFX9-NEXT: v_exp_f32_e32 v0, v0 699; GFX9-NEXT: s_setpc_b64 s[30:31] 700; 701; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs: 702; GFX90A: ; %bb.0: 703; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 704; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 705; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 706; GFX90A-NEXT: v_exp_f32_e32 v0, v0 707; GFX90A-NEXT: s_setpc_b64 s[30:31] 708; 709; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: 710; GFX10: ; %bb.0: 711; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 712; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 713; GFX10-NEXT: v_log_f32_e64 v0, |v0| 714; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 715; GFX10-NEXT: v_exp_f32_e32 v0, v0 716; GFX10-NEXT: s_setpc_b64 s[30:31] 717 %fabs.x = call float @llvm.fabs.f32(float %x) 718 %fabs.y = call float @llvm.fabs.f32(float %y) 719 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) 720 ret float %pow 721} 722 723define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { 724; GFX6-LABEL: v_pow_f32_sgpr_vgpr: 725; GFX6: ; %bb.0: 726; GFX6-NEXT: v_log_f32_e32 v1, s0 727; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 728; GFX6-NEXT: v_exp_f32_e32 v0, v0 729; GFX6-NEXT: ; return to shader part epilog 730; 731; GFX8-LABEL: v_pow_f32_sgpr_vgpr: 732; GFX8: ; %bb.0: 733; GFX8-NEXT: v_log_f32_e32 v1, s0 734; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 735; GFX8-NEXT: v_exp_f32_e32 v0, v0 736; GFX8-NEXT: ; return to shader part epilog 737; 738; GFX9-LABEL: v_pow_f32_sgpr_vgpr: 739; GFX9: ; %bb.0: 740; GFX9-NEXT: v_log_f32_e32 v1, s0 741; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 742; GFX9-NEXT: v_exp_f32_e32 v0, v0 743; GFX9-NEXT: ; return to shader part epilog 744; 745; GFX90A-LABEL: v_pow_f32_sgpr_vgpr: 746; GFX90A: ; %bb.0: 747; GFX90A-NEXT: v_log_f32_e32 v1, s0 748; GFX90A-NEXT: v_mul_legacy_f32 v0, v0, v1 749; GFX90A-NEXT: v_exp_f32_e32 v0, v0 750; GFX90A-NEXT: ; return to shader part epilog 751; 752; GFX10-LABEL: v_pow_f32_sgpr_vgpr: 753; GFX10: ; %bb.0: 754; GFX10-NEXT: v_log_f32_e32 v1, s0 755; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 756; GFX10-NEXT: v_exp_f32_e32 v0, v0 757; GFX10-NEXT: ; return to shader part epilog 758 %pow = call float @llvm.pow.f32(float %x, float %y) 759 ret float %pow 760} 761 762define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { 763; GFX6-LABEL: v_pow_f32_vgpr_sgpr: 764; GFX6: ; %bb.0: 765; GFX6-NEXT: v_log_f32_e32 v0, v0 766; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 767; GFX6-NEXT: v_exp_f32_e32 v0, v0 768; GFX6-NEXT: ; return to shader part epilog 769; 770; GFX8-LABEL: v_pow_f32_vgpr_sgpr: 771; GFX8: ; %bb.0: 772; GFX8-NEXT: v_log_f32_e32 v0, v0 773; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 774; GFX8-NEXT: v_exp_f32_e32 v0, v0 775; GFX8-NEXT: ; return to shader part epilog 776; 777; GFX9-LABEL: v_pow_f32_vgpr_sgpr: 778; GFX9: ; %bb.0: 779; GFX9-NEXT: v_log_f32_e32 v0, v0 780; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 781; GFX9-NEXT: v_exp_f32_e32 v0, v0 782; GFX9-NEXT: ; return to shader part epilog 783; 784; GFX90A-LABEL: v_pow_f32_vgpr_sgpr: 785; GFX90A: ; %bb.0: 786; GFX90A-NEXT: v_log_f32_e32 v0, v0 787; GFX90A-NEXT: v_mul_legacy_f32 v0, s0, v0 788; GFX90A-NEXT: v_exp_f32_e32 v0, v0 789; GFX90A-NEXT: ; return to shader part epilog 790; 791; GFX10-LABEL: v_pow_f32_vgpr_sgpr: 792; GFX10: ; %bb.0: 793; GFX10-NEXT: v_log_f32_e32 v0, v0 794; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 795; GFX10-NEXT: v_exp_f32_e32 v0, v0 796; GFX10-NEXT: ; return to shader part epilog 797 %pow = call float @llvm.pow.f32(float %x, float %y) 798 ret float %pow 799} 800 801define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { 802; GFX6-LABEL: v_pow_f32_sgpr_sgpr: 803; GFX6: ; %bb.0: 804; GFX6-NEXT: v_log_f32_e32 v0, s0 805; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 806; GFX6-NEXT: v_exp_f32_e32 v0, v0 807; GFX6-NEXT: ; return to shader part epilog 808; 809; GFX8-LABEL: v_pow_f32_sgpr_sgpr: 810; GFX8: ; %bb.0: 811; GFX8-NEXT: v_log_f32_e32 v0, s0 812; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 813; GFX8-NEXT: v_exp_f32_e32 v0, v0 814; GFX8-NEXT: ; return to shader part epilog 815; 816; GFX9-LABEL: v_pow_f32_sgpr_sgpr: 817; GFX9: ; %bb.0: 818; GFX9-NEXT: v_log_f32_e32 v0, s0 819; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 820; GFX9-NEXT: v_exp_f32_e32 v0, v0 821; GFX9-NEXT: ; return to shader part epilog 822; 823; GFX90A-LABEL: v_pow_f32_sgpr_sgpr: 824; GFX90A: ; %bb.0: 825; GFX90A-NEXT: v_log_f32_e32 v0, s0 826; GFX90A-NEXT: v_mul_legacy_f32 v0, s1, v0 827; GFX90A-NEXT: v_exp_f32_e32 v0, v0 828; GFX90A-NEXT: ; return to shader part epilog 829; 830; GFX10-LABEL: v_pow_f32_sgpr_sgpr: 831; GFX10: ; %bb.0: 832; GFX10-NEXT: v_log_f32_e32 v0, s0 833; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 834; GFX10-NEXT: v_exp_f32_e32 v0, v0 835; GFX10-NEXT: ; return to shader part epilog 836 %pow = call float @llvm.pow.f32(float %x, float %y) 837 ret float %pow 838} 839 840declare half @llvm.pow.f16(half, half) 841declare float @llvm.pow.f32(float, float) 842declare double @llvm.pow.f64(double, double) 843 844declare half @llvm.fabs.f16(half) 845declare float @llvm.fabs.f32(float) 846 847declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) 848declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 849