1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2;RUN: llc -mtriple=amdgcn-- < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=SI %s 3;RUN: llc -mtriple=amdgcn-- -mcpu=fiji < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=VI %s 4;RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s 5 6define float @v_exp_f32(float %arg0) { 7; SI-LABEL: v_exp_f32: 8; SI: ; %bb.0: 9; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 11; SI-NEXT: v_exp_f32_e32 v0, v0 12; SI-NEXT: s_setpc_b64 s[30:31] 13; 14; VI-LABEL: v_exp_f32: 15; VI: ; %bb.0: 16; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; VI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 18; VI-NEXT: v_exp_f32_e32 v0, v0 19; VI-NEXT: s_setpc_b64 s[30:31] 20; 21; GFX9-LABEL: v_exp_f32: 22; GFX9: ; %bb.0: 23; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 24; GFX9-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 25; GFX9-NEXT: v_exp_f32_e32 v0, v0 26; GFX9-NEXT: s_setpc_b64 s[30:31] 27 %result = call float @llvm.exp.f32(float %arg0) 28 ret float %result 29} 30 31define <2 x float> @v_exp_v2f32(<2 x float> %arg0) { 32; GCN-LABEL: v_exp_v2f32: 33; GCN: ; %bb.0: 34; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b 36; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} 37; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} 38; GCN-NEXT: v_exp_f32_e32 v0, v0 39; GCN-NEXT: v_exp_f32_e32 v1, v1 40; GCN-NEXT: s_setpc_b64 s[30:31] 41 %result = call <2 x float> @llvm.exp.v2f32(<2 x float> %arg0) 42 ret <2 x float> %result 43} 44 45define <3 x float> @v_exp_v3f32(<3 x float> %arg0) { 46; GCN-LABEL: v_exp_v3f32: 47; GCN: ; %bb.0: 48; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GCN-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b 50; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} 51; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} 52; GCN-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} 53; GCN-NEXT: v_exp_f32_e32 v0, v0 54; GCN-NEXT: v_exp_f32_e32 v1, v1 55; GCN-NEXT: v_exp_f32_e32 v2, v2 56; GCN-NEXT: s_setpc_b64 s[30:31] 57; 58 %result = call <3 x float> @llvm.exp.v3f32(<3 x float> %arg0) 59 ret <3 x float> %result 60} 61 62define <4 x float> @v_exp_v4f32(<4 x float> %arg0) { 63; SI-LABEL: v_exp_v4f32: 64; SI: ; %bb.0: 65; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b 67; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0 68; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1 69; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2 70; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3 71; SI-NEXT: v_exp_f32_e32 v0, v0 72; SI-NEXT: v_exp_f32_e32 v1, v1 73; SI-NEXT: v_exp_f32_e32 v2, v2 74; SI-NEXT: v_exp_f32_e32 v3, v3 75; SI-NEXT: s_setpc_b64 s[30:31] 76 %result = call <4 x float> @llvm.exp.v4f32(<4 x float> %arg0) 77 ret <4 x float> %result 78} 79 80define half @v_exp_f16(half %arg0) { 81; SI-LABEL: v_exp_f16: 82; SI: ; %bb.0: 83; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 84; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 85; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 86; SI-NEXT: v_mul_f32_e32 v0, 0x3fb8aa3b, v0 87; SI-NEXT: v_exp_f32_e32 v0, v0 88; SI-NEXT: s_setpc_b64 s[30:31] 89; 90; VI-LABEL: v_exp_f16: 91; VI: ; %bb.0: 92; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 93; VI-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 94; VI-NEXT: v_exp_f16_e32 v0, v0 95; VI-NEXT: s_setpc_b64 s[30:31] 96; 97; GFX9-LABEL: v_exp_f16: 98; GFX9: ; %bb.0: 99; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX9-NEXT: v_mul_f16_e32 v0, 0x3dc5, v0 101; GFX9-NEXT: v_exp_f16_e32 v0, v0 102; GFX9-NEXT: s_setpc_b64 s[30:31] 103 %result = call half @llvm.exp.f16(half %arg0) 104 ret half %result 105} 106 107define <2 x half> @v_exp_v2f16(<2 x half> %arg0) { 108; SI-LABEL: v_exp_v2f16: 109; SI: ; %bb.0: 110; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 112; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 113; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b 114; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 115; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 116; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} 117; SI-NEXT: v_mul_f32_e32 v{{[0-9]+}}, [[SREG]], v{{[0-9]+}} 118; SI-NEXT: v_exp_f32_e32 v0, v0 119; SI-NEXT: v_exp_f32_e32 v1, v1 120; SI-NEXT: s_setpc_b64 s[30:31] 121; 122; VI-LABEL: v_exp_v2f16: 123; VI: ; %bb.0: 124; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 126; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]] 127; VI-NEXT: v_mul_f16_sdwa [[MUL1:v[0-9]+]], v{{[0-9]+}}, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 128; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v{{[0-9]+}} 129; VI-NEXT: v_exp_f16_sdwa [[MUL1]], [[MUL1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 130; VI-NEXT: v_exp_f16_e32 [[MUL2]], [[MUL2]] 131; VI-NEXT: v_or_b32_e32 v{{[0-9]+}}, [[MUL2]], [[MUL1]] 132; VI-NEXT: s_setpc_b64 s[30:31] 133; 134; GFX9-LABEL: v_exp_v2f16: 135; GFX9: ; %bb.0: 136; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 137; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 138; GFX9-NEXT: v_pk_mul_f16 v0, v0, [[SREG]] op_sel_hi:[1,0] 139; GFX9-NEXT: v_exp_f16_e32 v1, v0 140; GFX9-NEXT: v_exp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 141; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v1 142; GFX9-NEXT: v_lshl_or_b32 v0, v0, 16, v1 143; GFX9-NEXT: s_setpc_b64 s[30:31] 144 %result = call <2 x half> @llvm.exp.v2f16(<2 x half> %arg0) 145 ret <2 x half> %result 146} 147 148; define <3 x half> @v_exp_v3f16(<3 x half> %arg0) { 149; %result = call <3 x half> @llvm.exp.v3f16(<3 x half> %arg0) 150; ret <3 x half> %result 151; } 152 153define <4 x half> @v_exp_v4f16(<4 x half> %arg0) { 154; SI-LABEL: v_exp_v4f16: 155; SI: ; %bb.0: 156; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 158; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 159; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 160; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 161; SI-NEXT: s_mov_b32 [[SREG:s[0-9]+]], 0x3fb8aa3b 162; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 163; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 164; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 165; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 166; SI-NEXT: v_mul_f32_e32 v0, [[SREG]], v0 167; SI-NEXT: v_mul_f32_e32 v1, [[SREG]], v1 168; SI-NEXT: v_mul_f32_e32 v2, [[SREG]], v2 169; SI-NEXT: v_mul_f32_e32 v3, [[SREG]], v3 170; SI-NEXT: v_exp_f32_e32 v0, v0 171; SI-NEXT: v_exp_f32_e32 v1, v1 172; SI-NEXT: v_exp_f32_e32 v2, v2 173; SI-NEXT: v_exp_f32_e32 v3, v3 174; SI-NEXT: s_setpc_b64 s[30:31] 175; 176; VI-LABEL: v_exp_v4f16: 177; VI: ; %bb.0: 178; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; VI-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 180; VI-NEXT: v_mov_b32_e32 [[VREG:v[0-9]+]], [[SREG]] 181; VI-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1 182; VI-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0 183; VI-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 184; VI-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[VREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 185; VI-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]] 186; VI-NEXT: v_exp_f16_sdwa [[EXP2:v[0-9]+]], v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 187; VI-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL2]] 188; VI-NEXT: v_exp_f16_sdwa [[EXP4:v[0-9]+]], v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 189; VI-NEXT: v_or_b32_e32 v1, [[EXP1]], [[EXP2]] 190; VI-NEXT: v_or_b32_e32 v0, [[EXP3]], [[EXP4]] 191; VI-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX9-LABEL: v_exp_v4f16: 194; GFX9: ; %bb.0: 195; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX9-NEXT: s_movk_i32 [[SREG:s[0-9]+]], 0x3dc5 197; GFX9-NEXT: v_mul_f16_e32 [[MUL1:v[0-9]+]], [[SREG]], v1 198; GFX9-NEXT: v_mul_f16_e32 [[MUL2:v[0-9]+]], [[SREG]], v0 199; GFX9-NEXT: v_mul_f16_sdwa [[MUL3:v[0-9]+]], v1, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 200; GFX9-NEXT: v_mul_f16_sdwa [[MUL4:v[0-9]+]], v0, [[SREG]] dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 201; GFX9-NEXT: v_exp_f16_e32 [[EXP1:v[0-9]+]], [[MUL1]] 202; GFX9-NEXT: v_exp_f16_e32 [[EXP2:v[0-9]+]], [[MUL2]] 203; GFX9-NEXT: v_exp_f16_e32 [[EXP3:v[0-9]+]], [[MUL4]] 204; GFX9-NEXT: v_exp_f16_e32 [[EXP4:v[0-9]+]], [[MUL3]] 205; GFX9-NEXT: v_mov_b32_e32 [[VCONST:v[0-9]+]], 0xffff 206; GFX9-NEXT: v_and_b32_e32 [[AND1:v[0-9]+]], [[VCONST]], [[EXP2]] 207; GFX9-NEXT: v_and_b32_e32 [[AND2:v[0-9]+]], [[VCONST]], [[EXP1]] 208; GFX9-NEXT: v_lshl_or_b32 v0, [[EXP3]], 16, [[AND1]] 209; GFX9-NEXT: v_lshl_or_b32 v1, [[EXP4]], 16, [[AND2]] 210; GFX9-NEXT: s_setpc_b64 s[30:31] 211 %result = call <4 x half> @llvm.exp.v4f16(<4 x half> %arg0) 212 ret <4 x half> %result 213} 214 215declare float @llvm.exp.f32(float) 216declare <2 x float> @llvm.exp.v2f32(<2 x float>) 217declare <3 x float> @llvm.exp.v3f32(<3 x float>) 218declare <4 x float> @llvm.exp.v4f32(<4 x float>) 219 220declare half @llvm.exp.f16(half) 221declare <2 x half> @llvm.exp.v2f16(<2 x half>) 222declare <3 x half> @llvm.exp.v3f16(<3 x half>) 223declare <4 x half> @llvm.exp.v4f16(<4 x half>) 224 225