1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 3;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 4;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s 5 6define float @v_exp_f32(float %arg0) { 7; GCN-LABEL: v_exp_f32: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 11; GCN-NEXT: v_exp_f32_e32 v0, v0 12; GCN-NEXT: s_setpc_b64 s[30:31] 13 %result = call float @llvm.exp.f32(float %arg0) 14 ret float %result 15} 16 17define <2 x float> @v_exp_v2f32(<2 x float> %arg0) { 18; GCN-LABEL: v_exp_v2f32: 19; GCN: ; %bb.0: 20; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; GCN-NEXT: s_mov_b32 s4, 0x3fb8aa3b 22; GCN-NEXT: v_mul_f32_e32 v0, s4, v0 23; GCN-NEXT: v_mul_f32_e32 v1, s4, v1 24; GCN-NEXT: v_exp_f32_e32 v0, v0 25; GCN-NEXT: v_exp_f32_e32 v1, v1 26; GCN-NEXT: s_setpc_b64 s[30:31] 27 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0) 28 ret <2 x float> %result 29} 30 31define <3 x float> @v_exp_v3f32(<3 x float> %arg0) { 32; GCN-LABEL: v_exp_v3f32: 33; GCN: ; %bb.0: 34; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GCN-NEXT: s_mov_b32 s4, 0x3fb8aa3b 36; GCN-NEXT: v_mul_f32_e32 v0, s4, v0 37; GCN-NEXT: v_mul_f32_e32 v1, s4, v1 38; GCN-NEXT: v_mul_f32_e32 v2, s4, v2 39; GCN-NEXT: v_exp_f32_e32 v0, v0 40; GCN-NEXT: v_exp_f32_e32 v1, v1 41; GCN-NEXT: v_exp_f32_e32 v2, v2 42; GCN-NEXT: s_setpc_b64 s[30:31] 43 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0) 44 ret <3 x float> %result 45} 46 47define <4 x float> @v_exp_v4f32(<4 x float> %arg0) { 48; GCN-LABEL: v_exp_v4f32: 49; GCN: ; %bb.0: 50; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GCN-NEXT: s_mov_b32 s4, 0x3fb8aa3b 52; GCN-NEXT: v_mul_f32_e32 v0, s4, v0 53; GCN-NEXT: v_mul_f32_e32 v1, s4, v1 54; GCN-NEXT: v_mul_f32_e32 v2, s4, v2 55; GCN-NEXT: v_mul_f32_e32 v3, s4, v3 56; GCN-NEXT: v_exp_f32_e32 v0, v0 57; GCN-NEXT: v_exp_f32_e32 v1, v1 58; GCN-NEXT: v_exp_f32_e32 v2, v2 59; GCN-NEXT: v_exp_f32_e32 v3, v3 60; GCN-NEXT: s_setpc_b64 s[30:31] 61 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0) 62 ret <4 x float> %result 63} 64 65define half @v_exp_f16(half %arg0) { 66; SI-LABEL: v_exp_f16: 67; SI: ; %bb.0: 68; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 70; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 71; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 72; SI-NEXT: v_exp_f32_e32 v0, v0 73; SI-NEXT: s_setpc_b64 s[30:31] 74; 75; VI-LABEL: v_exp_f16: 76; VI: ; %bb.0: 77; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 78; VI-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 79; VI-NEXT: v_exp_f16_e32 v0, v0 80; VI-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX9-LABEL: v_exp_f16: 83; GFX9: ; %bb.0: 84; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX9-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 86; GFX9-NEXT: v_exp_f16_e32 v0, v0 87; GFX9-NEXT: s_setpc_b64 s[30:31] 88 %result = call half @llvm.exp.f16(half %arg0) 89 ret half %result 90} 91 92define <2 x half> @v_exp_v2f16(<2 x half> %arg0) { 93; SI-LABEL: v_exp_v2f16: 94; SI: ; %bb.0: 95; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 97; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 98; SI-NEXT: s_mov_b32 s4, 0x3fb8aa3b 99; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 100; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 101; SI-NEXT: v_mul_f32_e32 v0, s4, v0 102; SI-NEXT: v_mul_f32_e32 v1, s4, v1 103; SI-NEXT: v_exp_f32_e32 v0, v0 104; SI-NEXT: v_exp_f32_e32 v1, v1 105; SI-NEXT: s_setpc_b64 s[30:31] 106; 107; VI-LABEL: v_exp_v2f16: 108; VI: ; %bb.0: 109; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; VI-NEXT: s_movk_i32 s4, 0x3dc5 111; VI-NEXT: v_mov_b32_e32 v1, s4 112; VI-NEXT: v_mul_f16_sdwa v1, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 113; VI-NEXT: v_mul_f16_e32 v0, s4, v0 114; VI-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 115; VI-NEXT: v_exp_f16_e32 v0, v0 116; VI-NEXT: v_or_b32_e32 v0, v0, v1 117; VI-NEXT: s_setpc_b64 s[30:31] 118; 119; GFX9-LABEL: v_exp_v2f16: 120; GFX9: ; %bb.0: 121; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 122; GFX9-NEXT: s_movk_i32 s4, 0x3dc5 123; GFX9-NEXT: v_pk_mul_f16 v0, v0, s4 op_sel_hi:[1,0] 124; GFX9-NEXT: v_exp_f16_e32 v1, v0 125; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 126; GFX9-NEXT: v_pack_b32_f16 v0, v1, v0 127; GFX9-NEXT: s_setpc_b64 s[30:31] 128 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0) 129 ret <2 x half> %result 130} 131 132; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) { 133; %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0) 134; ret <3 x half> %result 135; } 136 137define <4 x half> @v_exp_v4f16(<4 x half> %arg0) { 138; SI-LABEL: v_exp_v4f16: 139; SI: ; %bb.0: 140; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 142; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 143; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 144; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 145; SI-NEXT: s_mov_b32 s4, 0x3fb8aa3b 146; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 147; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 148; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 149; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 150; SI-NEXT: v_mul_f32_e32 v0, s4, v0 151; SI-NEXT: v_mul_f32_e32 v1, s4, v1 152; SI-NEXT: v_mul_f32_e32 v2, s4, v2 153; SI-NEXT: v_mul_f32_e32 v3, s4, v3 154; SI-NEXT: v_exp_f32_e32 v0, v0 155; SI-NEXT: v_exp_f32_e32 v1, v1 156; SI-NEXT: v_exp_f32_e32 v2, v2 157; SI-NEXT: v_exp_f32_e32 v3, v3 158; SI-NEXT: s_setpc_b64 s[30:31] 159; 160; VI-LABEL: v_exp_v4f16: 161; VI: ; %bb.0: 162; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 163; VI-NEXT: s_movk_i32 s4, 0x3dc5 164; VI-NEXT: v_mov_b32_e32 v3, s4 165; VI-NEXT: v_mul_f16_e32 v2, s4, v1 166; VI-NEXT: v_mul_f16_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 167; VI-NEXT: v_mul_f16_e32 v4, s4, v0 168; VI-NEXT: v_mul_f16_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 169; VI-NEXT: v_exp_f16_e32 v2, v2 170; VI-NEXT: v_exp_f16_e32 v4, v4 171; VI-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 172; VI-NEXT: v_exp_f16_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 173; VI-NEXT: v_or_b32_e32 v0, v4, v0 174; VI-NEXT: v_or_b32_e32 v1, v2, v1 175; VI-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX9-LABEL: v_exp_v4f16: 178; GFX9: ; %bb.0: 179; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX9-NEXT: s_movk_i32 s4, 0x3dc5 181; GFX9-NEXT: v_mul_f16_e32 v2, s4, v1 182; GFX9-NEXT: v_mul_f16_sdwa v1, v1, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 183; GFX9-NEXT: v_mul_f16_e32 v3, s4, v0 184; GFX9-NEXT: v_mul_f16_sdwa v0, v0, s4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 185; GFX9-NEXT: v_exp_f16_e32 v2, v2 186; GFX9-NEXT: v_exp_f16_e32 v3, v3 187; GFX9-NEXT: v_exp_f16_e32 v0, v0 188; GFX9-NEXT: v_exp_f16_e32 v1, v1 189; GFX9-NEXT: v_pack_b32_f16 v0, v3, v0 190; GFX9-NEXT: v_pack_b32_f16 v1, v2, v1 191; GFX9-NEXT: s_setpc_b64 s[30:31] 192 %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0) 193 ret <4 x half> %result 194} 195 196declare float @llvm.exp.f32(float) 197declare <2 x float> @llvm.exp.v2f32(<2 x float>) 198declare <3 x float> @llvm.exp.v3f32(<3 x float>) 199declare <4 x float> @llvm.exp.v4f32(<4 x float>) 200 201declare half @llvm.exp.f16(half) 202declare <2 x half> @llvm.exp.v2f16(<2 x half>) 203declare <3 x half> @llvm.exp.v3f16(<3 x half>) 204declare <4 x half> @llvm.exp.v4f16(<4 x half>) 205 206