1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 3;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 4;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s 5 6define float @v_exp_f32(float %arg0) { 7; GCN-LABEL: v_exp_f32: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 11; GCN-NEXT: v_exp_f32_e32 v0, v0 12; GCN-NEXT: s_setpc_b64 s[30:31] 13 %result = call float @llvm.exp.f32(float %arg0) 14 ret float %result 15} 16 17define <2 x float> @v_exp_v2f32(<2 x float> %arg0) { 18; GCN-LABEL: v_exp_v2f32: 19; GCN: ; %bb.0: 20; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 22; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 23; GCN-NEXT: v_exp_f32_e32 v0, v0 24; GCN-NEXT: v_exp_f32_e32 v1, v1 25; GCN-NEXT: s_setpc_b64 s[30:31] 26 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0) 27 ret <2 x float> %result 28} 29 30define <3 x float> @v_exp_v3f32(<3 x float> %arg0) { 31; GCN-LABEL: v_exp_v3f32: 32; GCN: ; %bb.0: 33; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 35; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 36; GCN-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 37; GCN-NEXT: v_exp_f32_e32 v0, v0 38; GCN-NEXT: v_exp_f32_e32 v1, v1 39; GCN-NEXT: v_exp_f32_e32 v2, v2 40; GCN-NEXT: s_setpc_b64 s[30:31] 41 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0) 42 ret <3 x float> %result 43} 44 45define <4 x float> @v_exp_v4f32(<4 x float> %arg0) { 46; GCN-LABEL: v_exp_v4f32: 47; GCN: ; %bb.0: 48; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 50; GCN-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 51; GCN-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 52; GCN-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3 53; GCN-NEXT: v_exp_f32_e32 v0, v0 54; GCN-NEXT: v_exp_f32_e32 v1, v1 55; GCN-NEXT: v_exp_f32_e32 v2, v2 56; GCN-NEXT: v_exp_f32_e32 v3, v3 57; GCN-NEXT: s_setpc_b64 s[30:31] 58 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0) 59 ret <4 x float> %result 60} 61 62define half @v_exp_f16(half %arg0) { 63; SI-LABEL: v_exp_f16: 64; SI: ; %bb.0: 65; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 67; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 68; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 69; SI-NEXT: v_exp_f32_e32 v0, v0 70; SI-NEXT: s_setpc_b64 s[30:31] 71; 72; VI-LABEL: v_exp_f16: 73; VI: ; %bb.0: 74; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; VI-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 76; VI-NEXT: v_exp_f16_e32 v0, v0 77; VI-NEXT: s_setpc_b64 s[30:31] 78; 79; GFX9-LABEL: v_exp_f16: 80; GFX9: ; %bb.0: 81; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GFX9-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 83; GFX9-NEXT: v_exp_f16_e32 v0, v0 84; GFX9-NEXT: s_setpc_b64 s[30:31] 85 %result = call half @llvm.exp.f16(half %arg0) 86 ret half %result 87} 88 89define <2 x half> @v_exp_v2f16(<2 x half> %arg0) { 90; SI-LABEL: v_exp_v2f16: 91; SI: ; %bb.0: 92; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 93; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 94; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 95; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 96; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 97; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 98; SI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 99; SI-NEXT: v_exp_f32_e32 v0, v0 100; SI-NEXT: v_exp_f32_e32 v1, v1 101; SI-NEXT: s_setpc_b64 s[30:31] 102; 103; VI-LABEL: v_exp_v2f16: 104; VI: ; %bb.0: 105; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; VI-NEXT: v_mov_b32_e32 v1, 0x3dc5 107; VI-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 108; VI-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 109; VI-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 110; VI-NEXT: v_exp_f16_e32 v0, v0 111; VI-NEXT: v_or_b32_e32 v0, v0, v1 112; VI-NEXT: s_setpc_b64 s[30:31] 113; 114; GFX9-LABEL: v_exp_v2f16: 115; GFX9: ; %bb.0: 116; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX9-NEXT: s_movk_i32 s4, 0x3dc5 118; GFX9-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] 119; GFX9-NEXT: v_exp_f16_e32 v1, v0 120; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 121; GFX9-NEXT: v_pack_b32_f16 v0, v1, v0 122; GFX9-NEXT: s_setpc_b64 s[30:31] 123 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0) 124 ret <2 x half> %result 125} 126 127; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) { 128; %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0) 129; ret <3 x half> %result 130; } 131 132define <4 x half> @v_exp_v4f16(<4 x half> %arg0) { 133; SI-LABEL: v_exp_v4f16: 134; SI: ; %bb.0: 135; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 137; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 138; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 139; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 140; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 141; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 142; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 143; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 144; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 145; SI-NEXT: v_mul_f32_e32 v1, 0x3fb8aa3b, v1 146; SI-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v2 147; SI-NEXT: v_mul_f32_e32 v3, 0x3fb8aa3b, v3 148; SI-NEXT: v_exp_f32_e32 v0, v0 149; SI-NEXT: v_exp_f32_e32 v1, v1 150; SI-NEXT: v_exp_f32_e32 v2, v2 151; SI-NEXT: v_exp_f32_e32 v3, v3 152; SI-NEXT: s_setpc_b64 s[30:31] 153; 154; VI-LABEL: v_exp_v4f16: 155; VI: ; %bb.0: 156; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; VI-NEXT: v_mov_b32_e32 v3, 0x3dc5 158; VI-NEXT: v_mul_f16_e32 v2, 0x3dc5, v1 159; VI-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 160; VI-NEXT: v_mul_f16_e32 v4, 0x3dc5, v0 161; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 162; VI-NEXT: v_exp_f16_e32 v2, v2 163; VI-NEXT: v_exp_f16_e32 v4, v4 164; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 165; VI-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 166; VI-NEXT: v_or_b32_e32 v0, v4, v0 167; VI-NEXT: v_or_b32_e32 v1, v2, v1 168; VI-NEXT: s_setpc_b64 s[30:31] 169; 170; GFX9-LABEL: v_exp_v4f16: 171; GFX9: ; %bb.0: 172; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX9-NEXT: s_movk_i32 s4, 0x3dc5 174; GFX9-NEXT: v_mul_f16_e32 v2, 0x3dc5, v1 175; GFX9-NEXT: v_mul_f16_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 176; GFX9-NEXT: v_mul_f16_e32 v3, 0x3dc5, v0 177; GFX9-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 178; GFX9-NEXT: v_exp_f16_e32 v2, v2 179; GFX9-NEXT: v_exp_f16_e32 v3, v3 180; GFX9-NEXT: v_exp_f16_e32 v0, v0 181; GFX9-NEXT: v_exp_f16_e32 v1, v1 182; GFX9-NEXT: v_pack_b32_f16 v0, v3, v0 183; GFX9-NEXT: v_pack_b32_f16 v1, v2, v1 184; GFX9-NEXT: s_setpc_b64 s[30:31] 185 %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0) 186 ret <4 x half> %result 187} 188 189declare float @llvm.exp.f32(float) 190declare <2 x float> @llvm.exp.v2f32(<2 x float>) 191declare <3 x float> @llvm.exp.v3f32(<3 x float>) 192declare <4 x float> @llvm.exp.v4f32(<4 x float>) 193 194declare half @llvm.exp.f16(half) 195declare <2 x half> @llvm.exp.v2f16(<2 x half>) 196declare <3 x half> @llvm.exp.v3f16(<3 x half>) 197declare <4 x half> @llvm.exp.v4f16(<4 x half>) 198 199