1; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s 3 4; These tests check that fdiv is expanded correctly and also test that the 5; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate 6; instruction groups. 7 8; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div. 9 10; FUNC-LABEL: {{^}}fdiv_f32: 11; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 12; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 13 14; SI: v_div_scale_f32 15; SI-DAG: v_div_scale_f32 16 17; SI-DAG: v_rcp_f32 18; SI: v_fma_f32 19; SI: v_fma_f32 20; SI: v_mul_f32 21; SI: v_fma_f32 22; SI: v_fma_f32 23; SI: v_fma_f32 24; SI: v_div_fmas_f32 25; SI: v_div_fixup_f32 26define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) #0 { 27entry: 28 %fdiv = fdiv float %a, %b 29 store float %fdiv, float addrspace(1)* %out 30 ret void 31} 32 33; FUNC-LABEL: {{^}}fdiv_25ulp_f32: 34; SI: v_cndmask_b32 35; SI: v_mul_f32 36; SI: v_rcp_f32 37; SI: v_mul_f32 38; SI: v_mul_f32 39define void @fdiv_25ulp_f32(float addrspace(1)* %out, float %a, float %b) #0 { 40entry: 41 %fdiv = fdiv float %a, %b, !fpmath !0 42 store float %fdiv, float addrspace(1)* %out 43 ret void 44} 45 46; Use correct fdiv 47; FUNC-LABEL: {{^}}fdiv_25ulp_denormals_f32: 48; SI: v_fma_f32 49; SI: v_div_fmas_f32 50; SI: v_div_fixup_f32 51define void @fdiv_25ulp_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { 52entry: 53 %fdiv = fdiv float %a, %b, !fpmath !0 54 store float %fdiv, float addrspace(1)* %out 55 ret void 56} 57 58; FUNC-LABEL: {{^}}fdiv_fast_denormals_f32: 59; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} 60; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] 61; SI-NOT: [[RESULT]] 62; SI: buffer_store_dword [[RESULT]] 63define void @fdiv_fast_denormals_f32(float addrspace(1)* %out, float %a, float %b) #2 { 64entry: 65 %fdiv = fdiv fast float %a, %b 66 store float %fdiv, float addrspace(1)* %out 67 ret void 68} 69 70; FUNC-LABEL: {{^}}fdiv_f32_fast_math: 71; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 72; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 73 74; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} 75; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] 76; SI-NOT: [[RESULT]] 77; SI: buffer_store_dword [[RESULT]] 78define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) #0 { 79entry: 80 %fdiv = fdiv fast float %a, %b 81 store float %fdiv, float addrspace(1)* %out 82 ret void 83} 84 85; FUNC-LABEL: {{^}}fdiv_f32_arcp_math: 86; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 87; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 88 89; SI: v_rcp_f32_e32 [[RCP:v[0-9]+]], s{{[0-9]+}} 90; SI: v_mul_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}, [[RCP]] 91; SI-NOT: [[RESULT]] 92; SI: buffer_store_dword [[RESULT]] 93define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) #0 { 94entry: 95 %fdiv = fdiv arcp float %a, %b 96 store float %fdiv, float addrspace(1)* %out 97 ret void 98} 99 100; FUNC-LABEL: {{^}}fdiv_v2f32: 101; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 102; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 103; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 104; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 105 106; SI: v_div_scale_f32 107; SI: v_div_scale_f32 108; SI: v_div_scale_f32 109; SI: v_div_scale_f32 110define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 111entry: 112 %fdiv = fdiv <2 x float> %a, %b 113 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 114 ret void 115} 116 117; FUNC-LABEL: {{^}}fdiv_ulp25_v2f32: 118; SI: v_cmp_gt_f32 119; SI: v_cmp_gt_f32 120define void @fdiv_ulp25_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 121entry: 122 %fdiv = fdiv arcp <2 x float> %a, %b, !fpmath !0 123 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 124 ret void 125} 126 127; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math: 128; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 129; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 130; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 131; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 132 133; SI: v_rcp_f32 134; SI: v_rcp_f32 135define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 136entry: 137 %fdiv = fdiv fast <2 x float> %a, %b 138 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 139 ret void 140} 141 142; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math: 143; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 144; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 145; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 146; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 147 148; SI: v_rcp_f32 149; SI: v_rcp_f32 150define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #0 { 151entry: 152 %fdiv = fdiv arcp <2 x float> %a, %b 153 store <2 x float> %fdiv, <2 x float> addrspace(1)* %out 154 ret void 155} 156 157; FUNC-LABEL: {{^}}fdiv_v4f32: 158; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 159; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 160; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 161; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 162; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 163; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 164; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 165; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 166 167; SI: v_div_fixup_f32 168; SI: v_div_fixup_f32 169; SI: v_div_fixup_f32 170; SI: v_div_fixup_f32 171define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 172 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 173 %a = load <4 x float>, <4 x float> addrspace(1) * %in 174 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 175 %result = fdiv <4 x float> %a, %b 176 store <4 x float> %result, <4 x float> addrspace(1)* %out 177 ret void 178} 179 180; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math: 181; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 182; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 183; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 184; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 185; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 186; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 187; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 188; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 189 190; SI: v_rcp_f32 191; SI: v_rcp_f32 192; SI: v_rcp_f32 193; SI: v_rcp_f32 194define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 195 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 196 %a = load <4 x float>, <4 x float> addrspace(1) * %in 197 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 198 %result = fdiv fast <4 x float> %a, %b 199 store <4 x float> %result, <4 x float> addrspace(1)* %out 200 ret void 201} 202 203; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math: 204; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 205; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 206; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 207; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 208; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 209; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 210; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 211; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 212 213; SI: v_rcp_f32 214; SI: v_rcp_f32 215; SI: v_rcp_f32 216; SI: v_rcp_f32 217define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 { 218 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 219 %a = load <4 x float>, <4 x float> addrspace(1) * %in 220 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 221 %result = fdiv arcp <4 x float> %a, %b 222 store <4 x float> %result, <4 x float> addrspace(1)* %out 223 ret void 224} 225 226attributes #0 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="-fp32-denormals" } 227attributes #1 = { nounwind "enable-unsafe-fp-math"="true" "target-features"="-fp32-denormals" } 228attributes #2 = { nounwind "enable-unsafe-fp-math"="false" "target-features"="+fp32-denormals" } 229 230!0 = !{float 2.500000e+00} 231