1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -march=amdgcn -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 5; RUN: llc -march=amdgcn -mcpu=gfx90a < %s | FileCheck -check-prefix=GFX90A %s 6; RUN: llc -march=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 7 8define float @v_pow_f32(float %x, float %y) { 9; GFX6-LABEL: v_pow_f32: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX6-NEXT: v_log_f32_e32 v0, v0 13; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 14; GFX6-NEXT: v_exp_f32_e32 v0, v0 15; GFX6-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX8-LABEL: v_pow_f32: 18; GFX8: ; %bb.0: 19; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX8-NEXT: v_log_f32_e32 v0, v0 21; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 22; GFX8-NEXT: v_exp_f32_e32 v0, v0 23; GFX8-NEXT: s_setpc_b64 s[30:31] 24; 25; GFX9-LABEL: v_pow_f32: 26; GFX9: ; %bb.0: 27; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; GFX9-NEXT: v_log_f32_e32 v0, v0 29; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 30; GFX9-NEXT: v_exp_f32_e32 v0, v0 31; GFX9-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX90A-LABEL: v_pow_f32: 34; GFX90A: ; %bb.0: 35; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX90A-NEXT: v_log_f32_e32 v0, v0 37; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 38; GFX90A-NEXT: v_exp_f32_e32 v0, v0 39; GFX90A-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX10-LABEL: v_pow_f32: 42; GFX10: ; %bb.0: 43; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 45; GFX10-NEXT: v_log_f32_e32 v0, v0 46; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 47; GFX10-NEXT: v_exp_f32_e32 v0, v0 48; GFX10-NEXT: s_setpc_b64 s[30:31] 49 %pow = call float @llvm.pow.f32(float %x, float %y) 50 ret float %pow 51} 52 53define <2 x float> @v_pow_v2f32(<2 x float> %x, <2 x float> %y) { 54; GFX6-LABEL: v_pow_v2f32: 55; GFX6: ; %bb.0: 56; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 57; GFX6-NEXT: v_log_f32_e32 v0, v0 58; GFX6-NEXT: v_log_f32_e32 v1, v1 59; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 60; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 61; GFX6-NEXT: v_exp_f32_e32 v0, v0 62; GFX6-NEXT: v_exp_f32_e32 v1, v1 63; GFX6-NEXT: s_setpc_b64 s[30:31] 64; 65; GFX8-LABEL: v_pow_v2f32: 66; GFX8: ; %bb.0: 67; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; GFX8-NEXT: v_log_f32_e32 v0, v0 69; GFX8-NEXT: v_log_f32_e32 v1, v1 70; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 71; GFX8-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 72; GFX8-NEXT: v_exp_f32_e32 v0, v0 73; GFX8-NEXT: v_exp_f32_e32 v1, v1 74; GFX8-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX9-LABEL: v_pow_v2f32: 77; GFX9: ; %bb.0: 78; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-NEXT: v_log_f32_e32 v0, v0 80; GFX9-NEXT: v_log_f32_e32 v1, v1 81; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 82; GFX9-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 83; GFX9-NEXT: v_exp_f32_e32 v0, v0 84; GFX9-NEXT: v_exp_f32_e32 v1, v1 85; GFX9-NEXT: s_setpc_b64 s[30:31] 86; 87; GFX90A-LABEL: v_pow_v2f32: 88; GFX90A: ; %bb.0: 89; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 90; GFX90A-NEXT: v_log_f32_e32 v0, v0 91; GFX90A-NEXT: v_log_f32_e32 v1, v1 92; GFX90A-NEXT: v_mul_legacy_f32 v0, v2, v0 93; GFX90A-NEXT: v_mul_legacy_f32 v1, v3, v1 94; GFX90A-NEXT: v_exp_f32_e32 v0, v0 95; GFX90A-NEXT: v_exp_f32_e32 v1, v1 96; GFX90A-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX10-LABEL: v_pow_v2f32: 99; GFX10: ; %bb.0: 100; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 102; GFX10-NEXT: v_log_f32_e32 v0, v0 103; GFX10-NEXT: v_log_f32_e32 v1, v1 104; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 105; GFX10-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 106; GFX10-NEXT: v_exp_f32_e32 v0, v0 107; GFX10-NEXT: v_exp_f32_e32 v1, v1 108; GFX10-NEXT: s_setpc_b64 s[30:31] 109 %pow = call <2 x float> @llvm.pow.v2f32(<2 x float> %x, <2 x float> %y) 110 ret <2 x float> %pow 111} 112 113define half @v_pow_f16(half %x, half %y) { 114; GFX6-LABEL: v_pow_f16: 115; GFX6: ; %bb.0: 116; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 118; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 119; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 120; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 121; GFX6-NEXT: v_log_f32_e32 v0, v0 122; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 123; GFX6-NEXT: v_exp_f32_e32 v0, v0 124; GFX6-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX8-LABEL: v_pow_f16: 127; GFX8: ; %bb.0: 128; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 130; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 131; GFX8-NEXT: v_log_f32_e32 v0, v0 132; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 133; GFX8-NEXT: v_exp_f32_e32 v0, v0 134; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 135; GFX8-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX9-LABEL: v_pow_f16: 138; GFX9: ; %bb.0: 139; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 141; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 142; GFX9-NEXT: v_log_f32_e32 v0, v0 143; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 144; GFX9-NEXT: v_exp_f32_e32 v0, v0 145; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 146; GFX9-NEXT: s_setpc_b64 s[30:31] 147; 148; GFX90A-LABEL: v_pow_f16: 149; GFX90A: ; %bb.0: 150; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 151; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 152; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 153; GFX90A-NEXT: v_log_f32_e32 v0, v0 154; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 155; GFX90A-NEXT: v_exp_f32_e32 v0, v0 156; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 157; GFX90A-NEXT: s_setpc_b64 s[30:31] 158; 159; GFX10-LABEL: v_pow_f16: 160; GFX10: ; %bb.0: 161; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 163; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 164; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 165; GFX10-NEXT: v_log_f32_e32 v0, v0 166; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 167; GFX10-NEXT: v_exp_f32_e32 v0, v0 168; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 169; GFX10-NEXT: s_setpc_b64 s[30:31] 170 %pow = call half @llvm.pow.f16(half %x, half %y) 171 ret half %pow 172} 173 174define <2 x half> @v_pow_v2f16(<2 x half> %x, <2 x half> %y) { 175; GFX6-LABEL: v_pow_v2f16: 176; GFX6: ; %bb.0: 177; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 178; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 179; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 180; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 181; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 182; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 183; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 184; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 185; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 186; GFX6-NEXT: v_log_f32_e32 v0, v0 187; GFX6-NEXT: v_log_f32_e32 v1, v1 188; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 189; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 190; GFX6-NEXT: v_exp_f32_e32 v0, v0 191; GFX6-NEXT: v_exp_f32_e32 v1, v1 192; GFX6-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX8-LABEL: v_pow_v2f16: 195; GFX8: ; %bb.0: 196; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 198; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 199; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 200; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 201; GFX8-NEXT: v_log_f32_e32 v2, v2 202; GFX8-NEXT: v_log_f32_e32 v0, v0 203; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 204; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 205; GFX8-NEXT: v_exp_f32_e32 v2, v2 206; GFX8-NEXT: v_exp_f32_e32 v0, v0 207; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 208; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 209; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 210; GFX8-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX9-LABEL: v_pow_v2f16: 213; GFX9: ; %bb.0: 214; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 216; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 217; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 218; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 219; GFX9-NEXT: v_log_f32_e32 v2, v2 220; GFX9-NEXT: v_log_f32_e32 v0, v0 221; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 222; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 223; GFX9-NEXT: v_exp_f32_e32 v2, v2 224; GFX9-NEXT: v_exp_f32_e32 v0, v0 225; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 226; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 227; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 228; GFX9-NEXT: s_setpc_b64 s[30:31] 229; 230; GFX90A-LABEL: v_pow_v2f16: 231; GFX90A: ; %bb.0: 232; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 234; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 235; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 236; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 237; GFX90A-NEXT: v_log_f32_e32 v2, v2 238; GFX90A-NEXT: v_log_f32_e32 v0, v0 239; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 240; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 241; GFX90A-NEXT: v_exp_f32_e32 v2, v2 242; GFX90A-NEXT: v_exp_f32_e32 v0, v0 243; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 244; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 245; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 246; GFX90A-NEXT: s_setpc_b64 s[30:31] 247; 248; GFX10-LABEL: v_pow_v2f16: 249; GFX10: ; %bb.0: 250; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 252; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 253; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 254; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 255; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 256; GFX10-NEXT: v_log_f32_e32 v2, v2 257; GFX10-NEXT: v_log_f32_e32 v0, v0 258; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 259; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 260; GFX10-NEXT: v_exp_f32_e32 v1, v2 261; GFX10-NEXT: v_exp_f32_e32 v0, v0 262; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 263; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 264; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 265; GFX10-NEXT: s_setpc_b64 s[30:31] 266 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y) 267 ret <2 x half> %pow 268} 269 270define <2 x half> @v_pow_v2f16_fneg_lhs(<2 x half> %x, <2 x half> %y) { 271; GFX6-LABEL: v_pow_v2f16_fneg_lhs: 272; GFX6: ; %bb.0: 273; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 275; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 276; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 277; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 278; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 279; GFX6-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 280; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v3 281; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 282; GFX6-NEXT: v_lshrrev_b32_e32 v0, 16, v0 283; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 284; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 285; GFX6-NEXT: v_log_f32_e32 v3, v3 286; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 287; GFX6-NEXT: v_log_f32_e32 v4, v0 288; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v3 289; GFX6-NEXT: v_exp_f32_e32 v0, v0 290; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v1, v4 291; GFX6-NEXT: v_exp_f32_e32 v1, v1 292; GFX6-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX8-LABEL: v_pow_v2f16_fneg_lhs: 295; GFX8: ; %bb.0: 296; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 298; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 299; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 300; GFX8-NEXT: v_cvt_f32_f16_e32 v1, v1 301; GFX8-NEXT: v_log_f32_e32 v2, v2 302; GFX8-NEXT: v_log_f32_e32 v0, v0 303; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 304; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 305; GFX8-NEXT: v_exp_f32_e32 v2, v2 306; GFX8-NEXT: v_exp_f32_e32 v0, v0 307; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 308; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 309; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 310; GFX8-NEXT: s_setpc_b64 s[30:31] 311; 312; GFX9-LABEL: v_pow_v2f16_fneg_lhs: 313; GFX9: ; %bb.0: 314; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 316; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 317; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 318; GFX9-NEXT: v_cvt_f32_f16_e32 v1, v1 319; GFX9-NEXT: v_log_f32_e32 v2, v2 320; GFX9-NEXT: v_log_f32_e32 v0, v0 321; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 322; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 323; GFX9-NEXT: v_exp_f32_e32 v2, v2 324; GFX9-NEXT: v_exp_f32_e32 v0, v0 325; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 326; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 327; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 328; GFX9-NEXT: s_setpc_b64 s[30:31] 329; 330; GFX90A-LABEL: v_pow_v2f16_fneg_lhs: 331; GFX90A: ; %bb.0: 332; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 334; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 335; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 336; GFX90A-NEXT: v_cvt_f32_f16_e32 v1, v1 337; GFX90A-NEXT: v_log_f32_e32 v2, v2 338; GFX90A-NEXT: v_log_f32_e32 v0, v0 339; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 340; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 341; GFX90A-NEXT: v_exp_f32_e32 v2, v2 342; GFX90A-NEXT: v_exp_f32_e32 v0, v0 343; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 344; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 345; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 346; GFX90A-NEXT: s_setpc_b64 s[30:31] 347; 348; GFX10-LABEL: v_pow_v2f16_fneg_lhs: 349; GFX10: ; %bb.0: 350; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 352; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 353; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 354; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 355; GFX10-NEXT: v_cvt_f32_f16_e32 v1, v1 356; GFX10-NEXT: v_log_f32_e32 v2, v2 357; GFX10-NEXT: v_log_f32_e32 v0, v0 358; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 359; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 360; GFX10-NEXT: v_exp_f32_e32 v1, v2 361; GFX10-NEXT: v_exp_f32_e32 v0, v0 362; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 363; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 364; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 365; GFX10-NEXT: s_setpc_b64 s[30:31] 366 %x.fneg = fneg <2 x half> %x 367 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y) 368 ret <2 x half> %pow 369} 370 371define <2 x half> @v_pow_v2f16_fneg_rhs(<2 x half> %x, <2 x half> %y) { 372; GFX6-LABEL: v_pow_v2f16_fneg_rhs: 373; GFX6: ; %bb.0: 374; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 376; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 377; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 378; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 379; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 380; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 381; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 382; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 383; GFX6-NEXT: v_xor_b32_e32 v2, 0x80008000, v2 384; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 385; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 386; GFX6-NEXT: v_log_f32_e32 v0, v0 387; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 388; GFX6-NEXT: v_log_f32_e32 v1, v1 389; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 390; GFX6-NEXT: v_exp_f32_e32 v0, v0 391; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 392; GFX6-NEXT: v_exp_f32_e32 v1, v1 393; GFX6-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX8-LABEL: v_pow_v2f16_fneg_rhs: 396; GFX8: ; %bb.0: 397; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 399; GFX8-NEXT: v_cvt_f32_f16_e32 v0, v0 400; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 401; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 402; GFX8-NEXT: v_log_f32_e32 v2, v2 403; GFX8-NEXT: v_log_f32_e32 v0, v0 404; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 405; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 406; GFX8-NEXT: v_exp_f32_e32 v2, v2 407; GFX8-NEXT: v_exp_f32_e32 v0, v0 408; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 409; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 410; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 411; GFX8-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX9-LABEL: v_pow_v2f16_fneg_rhs: 414; GFX9: ; %bb.0: 415; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 417; GFX9-NEXT: v_cvt_f32_f16_e32 v0, v0 418; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 419; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 420; GFX9-NEXT: v_log_f32_e32 v2, v2 421; GFX9-NEXT: v_log_f32_e32 v0, v0 422; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 423; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 424; GFX9-NEXT: v_exp_f32_e32 v2, v2 425; GFX9-NEXT: v_exp_f32_e32 v0, v0 426; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 427; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 428; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 429; GFX9-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX90A-LABEL: v_pow_v2f16_fneg_rhs: 432; GFX90A: ; %bb.0: 433; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 435; GFX90A-NEXT: v_cvt_f32_f16_e32 v0, v0 436; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 437; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 438; GFX90A-NEXT: v_log_f32_e32 v2, v2 439; GFX90A-NEXT: v_log_f32_e32 v0, v0 440; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 441; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 442; GFX90A-NEXT: v_exp_f32_e32 v2, v2 443; GFX90A-NEXT: v_exp_f32_e32 v0, v0 444; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 445; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 446; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 447; GFX90A-NEXT: s_setpc_b64 s[30:31] 448; 449; GFX10-LABEL: v_pow_v2f16_fneg_rhs: 450; GFX10: ; %bb.0: 451; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 453; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 454; GFX10-NEXT: v_cvt_f32_f16_e32 v0, v0 455; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 456; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 457; GFX10-NEXT: v_log_f32_e32 v2, v2 458; GFX10-NEXT: v_log_f32_e32 v0, v0 459; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 460; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 461; GFX10-NEXT: v_exp_f32_e32 v1, v2 462; GFX10-NEXT: v_exp_f32_e32 v0, v0 463; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 464; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 465; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 466; GFX10-NEXT: s_setpc_b64 s[30:31] 467 %y.fneg = fneg <2 x half> %y 468 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x, <2 x half> %y.fneg) 469 ret <2 x half> %pow 470} 471 472define <2 x half> @v_pow_v2f16_fneg_lhs_rhs(<2 x half> %x, <2 x half> %y) { 473; GFX6-LABEL: v_pow_v2f16_fneg_lhs_rhs: 474; GFX6: ; %bb.0: 475; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 476; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 477; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 478; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 479; GFX6-NEXT: v_cvt_f16_f32_e32 v2, v2 480; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 481; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 482; GFX6-NEXT: s_mov_b32 s4, 0x80008000 483; GFX6-NEXT: v_xor_b32_e32 v0, s4, v0 484; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 485; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 486; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 487; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 488; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 489; GFX6-NEXT: v_xor_b32_e32 v2, s4, v2 490; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 491; GFX6-NEXT: v_cvt_f32_f16_e32 v2, v2 492; GFX6-NEXT: v_log_f32_e32 v0, v0 493; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v3 494; GFX6-NEXT: v_log_f32_e32 v1, v1 495; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v2, v0 496; GFX6-NEXT: v_exp_f32_e32 v0, v0 497; GFX6-NEXT: v_mul_legacy_f32_e32 v1, v3, v1 498; GFX6-NEXT: v_exp_f32_e32 v1, v1 499; GFX6-NEXT: s_setpc_b64 s[30:31] 500; 501; GFX8-LABEL: v_pow_v2f16_fneg_lhs_rhs: 502; GFX8: ; %bb.0: 503; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 504; GFX8-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 505; GFX8-NEXT: v_cvt_f32_f16_e64 v0, -v0 506; GFX8-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 507; GFX8-NEXT: v_cvt_f32_f16_e64 v1, -v1 508; GFX8-NEXT: v_log_f32_e32 v2, v2 509; GFX8-NEXT: v_log_f32_e32 v0, v0 510; GFX8-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 511; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 512; GFX8-NEXT: v_exp_f32_e32 v2, v2 513; GFX8-NEXT: v_exp_f32_e32 v0, v0 514; GFX8-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 515; GFX8-NEXT: v_cvt_f16_f32_e32 v0, v0 516; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 517; GFX8-NEXT: s_setpc_b64 s[30:31] 518; 519; GFX9-LABEL: v_pow_v2f16_fneg_lhs_rhs: 520; GFX9: ; %bb.0: 521; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 522; GFX9-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 523; GFX9-NEXT: v_cvt_f32_f16_e64 v0, -v0 524; GFX9-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 525; GFX9-NEXT: v_cvt_f32_f16_e64 v1, -v1 526; GFX9-NEXT: v_log_f32_e32 v2, v2 527; GFX9-NEXT: v_log_f32_e32 v0, v0 528; GFX9-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 529; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 530; GFX9-NEXT: v_exp_f32_e32 v2, v2 531; GFX9-NEXT: v_exp_f32_e32 v0, v0 532; GFX9-NEXT: v_cvt_f16_f32_e32 v1, v2 533; GFX9-NEXT: v_cvt_f16_f32_e32 v0, v0 534; GFX9-NEXT: v_pack_b32_f16 v0, v0, v1 535; GFX9-NEXT: s_setpc_b64 s[30:31] 536; 537; GFX90A-LABEL: v_pow_v2f16_fneg_lhs_rhs: 538; GFX90A: ; %bb.0: 539; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 540; GFX90A-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 541; GFX90A-NEXT: v_cvt_f32_f16_e64 v0, -v0 542; GFX90A-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 543; GFX90A-NEXT: v_cvt_f32_f16_e64 v1, -v1 544; GFX90A-NEXT: v_log_f32_e32 v2, v2 545; GFX90A-NEXT: v_log_f32_e32 v0, v0 546; GFX90A-NEXT: v_mul_legacy_f32 v2, v3, v2 547; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 548; GFX90A-NEXT: v_exp_f32_e32 v2, v2 549; GFX90A-NEXT: v_exp_f32_e32 v0, v0 550; GFX90A-NEXT: v_cvt_f16_f32_e32 v1, v2 551; GFX90A-NEXT: v_cvt_f16_f32_e32 v0, v0 552; GFX90A-NEXT: v_pack_b32_f16 v0, v0, v1 553; GFX90A-NEXT: s_setpc_b64 s[30:31] 554; 555; GFX10-LABEL: v_pow_v2f16_fneg_lhs_rhs: 556; GFX10: ; %bb.0: 557; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 558; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 559; GFX10-NEXT: v_cvt_f32_f16_sdwa v2, -v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 560; GFX10-NEXT: v_cvt_f32_f16_e64 v0, -v0 561; GFX10-NEXT: v_cvt_f32_f16_sdwa v3, -v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 562; GFX10-NEXT: v_cvt_f32_f16_e64 v1, -v1 563; GFX10-NEXT: v_log_f32_e32 v2, v2 564; GFX10-NEXT: v_log_f32_e32 v0, v0 565; GFX10-NEXT: v_mul_legacy_f32_e32 v2, v3, v2 566; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 567; GFX10-NEXT: v_exp_f32_e32 v1, v2 568; GFX10-NEXT: v_exp_f32_e32 v0, v0 569; GFX10-NEXT: v_cvt_f16_f32_e32 v1, v1 570; GFX10-NEXT: v_cvt_f16_f32_e32 v0, v0 571; GFX10-NEXT: v_pack_b32_f16 v0, v0, v1 572; GFX10-NEXT: s_setpc_b64 s[30:31] 573 %x.fneg = fneg <2 x half> %x 574 %y.fneg = fneg <2 x half> %y 575 %pow = call <2 x half> @llvm.pow.v2f16(<2 x half> %x.fneg, <2 x half> %y.fneg) 576 ret <2 x half> %pow 577} 578 579; FIXME 580; define double @v_pow_f64(double %x, double %y) { 581; %pow = call double @llvm.pow.f64(double %x, double %y) 582; ret double %pow 583; } 584 585define float @v_pow_f32_fabs_lhs(float %x, float %y) { 586; GFX6-LABEL: v_pow_f32_fabs_lhs: 587; GFX6: ; %bb.0: 588; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 589; GFX6-NEXT: v_log_f32_e64 v0, |v0| 590; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 591; GFX6-NEXT: v_exp_f32_e32 v0, v0 592; GFX6-NEXT: s_setpc_b64 s[30:31] 593; 594; GFX8-LABEL: v_pow_f32_fabs_lhs: 595; GFX8: ; %bb.0: 596; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 597; GFX8-NEXT: v_log_f32_e64 v0, |v0| 598; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 599; GFX8-NEXT: v_exp_f32_e32 v0, v0 600; GFX8-NEXT: s_setpc_b64 s[30:31] 601; 602; GFX9-LABEL: v_pow_f32_fabs_lhs: 603; GFX9: ; %bb.0: 604; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 605; GFX9-NEXT: v_log_f32_e64 v0, |v0| 606; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 607; GFX9-NEXT: v_exp_f32_e32 v0, v0 608; GFX9-NEXT: s_setpc_b64 s[30:31] 609; 610; GFX90A-LABEL: v_pow_f32_fabs_lhs: 611; GFX90A: ; %bb.0: 612; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 613; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 614; GFX90A-NEXT: v_mul_legacy_f32 v0, v1, v0 615; GFX90A-NEXT: v_exp_f32_e32 v0, v0 616; GFX90A-NEXT: s_setpc_b64 s[30:31] 617; 618; GFX10-LABEL: v_pow_f32_fabs_lhs: 619; GFX10: ; %bb.0: 620; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 621; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 622; GFX10-NEXT: v_log_f32_e64 v0, |v0| 623; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v1, v0 624; GFX10-NEXT: v_exp_f32_e32 v0, v0 625; GFX10-NEXT: s_setpc_b64 s[30:31] 626 %fabs.x = call float @llvm.fabs.f32(float %x) 627 %pow = call float @llvm.pow.f32(float %fabs.x, float %y) 628 ret float %pow 629} 630 631define float @v_pow_f32_fabs_rhs(float %x, float %y) { 632; GFX6-LABEL: v_pow_f32_fabs_rhs: 633; GFX6: ; %bb.0: 634; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 635; GFX6-NEXT: v_log_f32_e32 v0, v0 636; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 637; GFX6-NEXT: v_exp_f32_e32 v0, v0 638; GFX6-NEXT: s_setpc_b64 s[30:31] 639; 640; GFX8-LABEL: v_pow_f32_fabs_rhs: 641; GFX8: ; %bb.0: 642; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX8-NEXT: v_log_f32_e32 v0, v0 644; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 645; GFX8-NEXT: v_exp_f32_e32 v0, v0 646; GFX8-NEXT: s_setpc_b64 s[30:31] 647; 648; GFX9-LABEL: v_pow_f32_fabs_rhs: 649; GFX9: ; %bb.0: 650; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 651; GFX9-NEXT: v_log_f32_e32 v0, v0 652; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 653; GFX9-NEXT: v_exp_f32_e32 v0, v0 654; GFX9-NEXT: s_setpc_b64 s[30:31] 655; 656; GFX90A-LABEL: v_pow_f32_fabs_rhs: 657; GFX90A: ; %bb.0: 658; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 659; GFX90A-NEXT: v_log_f32_e32 v0, v0 660; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 661; GFX90A-NEXT: v_exp_f32_e32 v0, v0 662; GFX90A-NEXT: s_setpc_b64 s[30:31] 663; 664; GFX10-LABEL: v_pow_f32_fabs_rhs: 665; GFX10: ; %bb.0: 666; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 667; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 668; GFX10-NEXT: v_log_f32_e32 v0, v0 669; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 670; GFX10-NEXT: v_exp_f32_e32 v0, v0 671; GFX10-NEXT: s_setpc_b64 s[30:31] 672 %fabs.y = call float @llvm.fabs.f32(float %y) 673 %pow = call float @llvm.pow.f32(float %x, float %fabs.y) 674 ret float %pow 675} 676 677define float @v_pow_f32_fabs_lhs_rhs(float %x, float %y) { 678; GFX6-LABEL: v_pow_f32_fabs_lhs_rhs: 679; GFX6: ; %bb.0: 680; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 681; GFX6-NEXT: v_log_f32_e64 v0, |v0| 682; GFX6-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 683; GFX6-NEXT: v_exp_f32_e32 v0, v0 684; GFX6-NEXT: s_setpc_b64 s[30:31] 685; 686; GFX8-LABEL: v_pow_f32_fabs_lhs_rhs: 687; GFX8: ; %bb.0: 688; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 689; GFX8-NEXT: v_log_f32_e64 v0, |v0| 690; GFX8-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 691; GFX8-NEXT: v_exp_f32_e32 v0, v0 692; GFX8-NEXT: s_setpc_b64 s[30:31] 693; 694; GFX9-LABEL: v_pow_f32_fabs_lhs_rhs: 695; GFX9: ; %bb.0: 696; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 697; GFX9-NEXT: v_log_f32_e64 v0, |v0| 698; GFX9-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 699; GFX9-NEXT: v_exp_f32_e32 v0, v0 700; GFX9-NEXT: s_setpc_b64 s[30:31] 701; 702; GFX90A-LABEL: v_pow_f32_fabs_lhs_rhs: 703; GFX90A: ; %bb.0: 704; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 705; GFX90A-NEXT: v_log_f32_e64 v0, |v0| 706; GFX90A-NEXT: v_mul_legacy_f32 v0, |v1|, v0 707; GFX90A-NEXT: v_exp_f32_e32 v0, v0 708; GFX90A-NEXT: s_setpc_b64 s[30:31] 709; 710; GFX10-LABEL: v_pow_f32_fabs_lhs_rhs: 711; GFX10: ; %bb.0: 712; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 713; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 714; GFX10-NEXT: v_log_f32_e64 v0, |v0| 715; GFX10-NEXT: v_mul_legacy_f32_e64 v0, |v1|, v0 716; GFX10-NEXT: v_exp_f32_e32 v0, v0 717; GFX10-NEXT: s_setpc_b64 s[30:31] 718 %fabs.x = call float @llvm.fabs.f32(float %x) 719 %fabs.y = call float @llvm.fabs.f32(float %y) 720 %pow = call float @llvm.pow.f32(float %fabs.x, float %fabs.y) 721 ret float %pow 722} 723 724define amdgpu_ps float @v_pow_f32_sgpr_vgpr(float inreg %x, float %y) { 725; GFX6-LABEL: v_pow_f32_sgpr_vgpr: 726; GFX6: ; %bb.0: 727; GFX6-NEXT: v_log_f32_e32 v1, s0 728; GFX6-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 729; GFX6-NEXT: v_exp_f32_e32 v0, v0 730; GFX6-NEXT: ; return to shader part epilog 731; 732; GFX8-LABEL: v_pow_f32_sgpr_vgpr: 733; GFX8: ; %bb.0: 734; GFX8-NEXT: v_log_f32_e32 v1, s0 735; GFX8-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 736; GFX8-NEXT: v_exp_f32_e32 v0, v0 737; GFX8-NEXT: ; return to shader part epilog 738; 739; GFX9-LABEL: v_pow_f32_sgpr_vgpr: 740; GFX9: ; %bb.0: 741; GFX9-NEXT: v_log_f32_e32 v1, s0 742; GFX9-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 743; GFX9-NEXT: v_exp_f32_e32 v0, v0 744; GFX9-NEXT: ; return to shader part epilog 745; 746; GFX90A-LABEL: v_pow_f32_sgpr_vgpr: 747; GFX90A: ; %bb.0: 748; GFX90A-NEXT: v_log_f32_e32 v1, s0 749; GFX90A-NEXT: v_mul_legacy_f32 v0, v0, v1 750; GFX90A-NEXT: v_exp_f32_e32 v0, v0 751; GFX90A-NEXT: ; return to shader part epilog 752; 753; GFX10-LABEL: v_pow_f32_sgpr_vgpr: 754; GFX10: ; %bb.0: 755; GFX10-NEXT: v_log_f32_e32 v1, s0 756; GFX10-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 757; GFX10-NEXT: v_exp_f32_e32 v0, v0 758; GFX10-NEXT: ; return to shader part epilog 759 %pow = call float @llvm.pow.f32(float %x, float %y) 760 ret float %pow 761} 762 763define amdgpu_ps float @v_pow_f32_vgpr_sgpr(float %x, float inreg %y) { 764; GFX6-LABEL: v_pow_f32_vgpr_sgpr: 765; GFX6: ; %bb.0: 766; GFX6-NEXT: v_log_f32_e32 v0, v0 767; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 768; GFX6-NEXT: v_exp_f32_e32 v0, v0 769; GFX6-NEXT: ; return to shader part epilog 770; 771; GFX8-LABEL: v_pow_f32_vgpr_sgpr: 772; GFX8: ; %bb.0: 773; GFX8-NEXT: v_log_f32_e32 v0, v0 774; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 775; GFX8-NEXT: v_exp_f32_e32 v0, v0 776; GFX8-NEXT: ; return to shader part epilog 777; 778; GFX9-LABEL: v_pow_f32_vgpr_sgpr: 779; GFX9: ; %bb.0: 780; GFX9-NEXT: v_log_f32_e32 v0, v0 781; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 782; GFX9-NEXT: v_exp_f32_e32 v0, v0 783; GFX9-NEXT: ; return to shader part epilog 784; 785; GFX90A-LABEL: v_pow_f32_vgpr_sgpr: 786; GFX90A: ; %bb.0: 787; GFX90A-NEXT: v_log_f32_e32 v0, v0 788; GFX90A-NEXT: v_mul_legacy_f32 v0, s0, v0 789; GFX90A-NEXT: v_exp_f32_e32 v0, v0 790; GFX90A-NEXT: ; return to shader part epilog 791; 792; GFX10-LABEL: v_pow_f32_vgpr_sgpr: 793; GFX10: ; %bb.0: 794; GFX10-NEXT: v_log_f32_e32 v0, v0 795; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s0, v0 796; GFX10-NEXT: v_exp_f32_e32 v0, v0 797; GFX10-NEXT: ; return to shader part epilog 798 %pow = call float @llvm.pow.f32(float %x, float %y) 799 ret float %pow 800} 801 802define amdgpu_ps float @v_pow_f32_sgpr_sgpr(float inreg %x, float inreg %y) { 803; GFX6-LABEL: v_pow_f32_sgpr_sgpr: 804; GFX6: ; %bb.0: 805; GFX6-NEXT: v_log_f32_e32 v0, s0 806; GFX6-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 807; GFX6-NEXT: v_exp_f32_e32 v0, v0 808; GFX6-NEXT: ; return to shader part epilog 809; 810; GFX8-LABEL: v_pow_f32_sgpr_sgpr: 811; GFX8: ; %bb.0: 812; GFX8-NEXT: v_log_f32_e32 v0, s0 813; GFX8-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 814; GFX8-NEXT: v_exp_f32_e32 v0, v0 815; GFX8-NEXT: ; return to shader part epilog 816; 817; GFX9-LABEL: v_pow_f32_sgpr_sgpr: 818; GFX9: ; %bb.0: 819; GFX9-NEXT: v_log_f32_e32 v0, s0 820; GFX9-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 821; GFX9-NEXT: v_exp_f32_e32 v0, v0 822; GFX9-NEXT: ; return to shader part epilog 823; 824; GFX90A-LABEL: v_pow_f32_sgpr_sgpr: 825; GFX90A: ; %bb.0: 826; GFX90A-NEXT: v_log_f32_e32 v0, s0 827; GFX90A-NEXT: v_mul_legacy_f32 v0, s1, v0 828; GFX90A-NEXT: v_exp_f32_e32 v0, v0 829; GFX90A-NEXT: ; return to shader part epilog 830; 831; GFX10-LABEL: v_pow_f32_sgpr_sgpr: 832; GFX10: ; %bb.0: 833; GFX10-NEXT: v_log_f32_e32 v0, s0 834; GFX10-NEXT: v_mul_legacy_f32_e32 v0, s1, v0 835; GFX10-NEXT: v_exp_f32_e32 v0, v0 836; GFX10-NEXT: ; return to shader part epilog 837 %pow = call float @llvm.pow.f32(float %x, float %y) 838 ret float %pow 839} 840 841declare half @llvm.pow.f16(half, half) 842declare float @llvm.pow.f32(float, float) 843declare double @llvm.pow.f64(double, double) 844 845declare half @llvm.fabs.f16(half) 846declare float @llvm.fabs.f32(float) 847 848declare <2 x half> @llvm.pow.v2f16(<2 x half>, <2 x half>) 849declare <2 x float> @llvm.pow.v2f32(<2 x float>, <2 x float>) 850