1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VL 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X86 --check-prefix=X86-AVX512VLDQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VLDQ 8 9define <2 x double> @fabs_v2f64(<2 x double> %p) { 10; X86-AVX-LABEL: fabs_v2f64: 11; X86-AVX: # %bb.0: 12; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 13; X86-AVX-NEXT: retl 14; 15; X86-AVX512VL-LABEL: fabs_v2f64: 16; X86-AVX512VL: # %bb.0: 17; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 18; X86-AVX512VL-NEXT: retl 19; 20; X86-AVX512VLDQ-LABEL: fabs_v2f64: 21; X86-AVX512VLDQ: # %bb.0: 22; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to2}, %xmm0, %xmm0 23; X86-AVX512VLDQ-NEXT: retl 24; 25; X64-AVX-LABEL: fabs_v2f64: 26; X64-AVX: # %bb.0: 27; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 28; X64-AVX-NEXT: retq 29; 30; X64-AVX512VL-LABEL: fabs_v2f64: 31; X64-AVX512VL: # %bb.0: 32; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 33; X64-AVX512VL-NEXT: retq 34; 35; X64-AVX512VLDQ-LABEL: fabs_v2f64: 36; X64-AVX512VLDQ: # %bb.0: 37; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to2}, %xmm0, %xmm0 38; X64-AVX512VLDQ-NEXT: retq 39 %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 40 ret <2 x double> %t 41} 42declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 43 44define <4 x float> @fabs_v4f32(<4 x float> %p) { 45; X86-AVX-LABEL: fabs_v4f32: 46; X86-AVX: # %bb.0: 47; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 48; X86-AVX-NEXT: retl 49; 50; X86-AVX512VL-LABEL: fabs_v4f32: 51; X86-AVX512VL: # %bb.0: 52; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 53; X86-AVX512VL-NEXT: retl 54; 55; X86-AVX512VLDQ-LABEL: fabs_v4f32: 56; X86-AVX512VLDQ: # %bb.0: 57; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %xmm0, %xmm0 58; X86-AVX512VLDQ-NEXT: retl 59; 60; X64-AVX-LABEL: fabs_v4f32: 61; X64-AVX: # %bb.0: 62; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 63; X64-AVX-NEXT: retq 64; 65; X64-AVX512VL-LABEL: fabs_v4f32: 66; X64-AVX512VL: # %bb.0: 67; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 68; X64-AVX512VL-NEXT: retq 69; 70; X64-AVX512VLDQ-LABEL: fabs_v4f32: 71; X64-AVX512VLDQ: # %bb.0: 72; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 73; X64-AVX512VLDQ-NEXT: retq 74 %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 75 ret <4 x float> %t 76} 77declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 78 79define <4 x double> @fabs_v4f64(<4 x double> %p) { 80; X86-AVX-LABEL: fabs_v4f64: 81; X86-AVX: # %bb.0: 82; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 83; X86-AVX-NEXT: retl 84; 85; X86-AVX512VL-LABEL: fabs_v4f64: 86; X86-AVX512VL: # %bb.0: 87; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0 88; X86-AVX512VL-NEXT: retl 89; 90; X86-AVX512VLDQ-LABEL: fabs_v4f64: 91; X86-AVX512VLDQ: # %bb.0: 92; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to4}, %ymm0, %ymm0 93; X86-AVX512VLDQ-NEXT: retl 94; 95; X64-AVX-LABEL: fabs_v4f64: 96; X64-AVX: # %bb.0: 97; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 98; X64-AVX-NEXT: retq 99; 100; X64-AVX512VL-LABEL: fabs_v4f64: 101; X64-AVX512VL: # %bb.0: 102; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 103; X64-AVX512VL-NEXT: retq 104; 105; X64-AVX512VLDQ-LABEL: fabs_v4f64: 106; X64-AVX512VLDQ: # %bb.0: 107; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 108; X64-AVX512VLDQ-NEXT: retq 109 %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 110 ret <4 x double> %t 111} 112declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 113 114define <8 x float> @fabs_v8f32(<8 x float> %p) { 115; X86-AVX-LABEL: fabs_v8f32: 116; X86-AVX: # %bb.0: 117; X86-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 118; X86-AVX-NEXT: retl 119; 120; X86-AVX512VL-LABEL: fabs_v8f32: 121; X86-AVX512VL: # %bb.0: 122; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0 123; X86-AVX512VL-NEXT: retl 124; 125; X86-AVX512VLDQ-LABEL: fabs_v8f32: 126; X86-AVX512VLDQ: # %bb.0: 127; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %ymm0, %ymm0 128; X86-AVX512VLDQ-NEXT: retl 129; 130; X64-AVX-LABEL: fabs_v8f32: 131; X64-AVX: # %bb.0: 132; X64-AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 133; X64-AVX-NEXT: retq 134; 135; X64-AVX512VL-LABEL: fabs_v8f32: 136; X64-AVX512VL: # %bb.0: 137; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 138; X64-AVX512VL-NEXT: retq 139; 140; X64-AVX512VLDQ-LABEL: fabs_v8f32: 141; X64-AVX512VLDQ: # %bb.0: 142; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %ymm0, %ymm0 143; X64-AVX512VLDQ-NEXT: retq 144 %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 145 ret <8 x float> %t 146} 147declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 148 149define <8 x double> @fabs_v8f64(<8 x double> %p) { 150; X86-AVX-LABEL: fabs_v8f64: 151; X86-AVX: # %bb.0: 152; X86-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] 153; X86-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 154; X86-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 155; X86-AVX-NEXT: retl 156; 157; X86-AVX512VL-LABEL: fabs_v8f64: 158; X86-AVX512VL: # %bb.0: 159; X86-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0 160; X86-AVX512VL-NEXT: retl 161; 162; X86-AVX512VLDQ-LABEL: fabs_v8f64: 163; X86-AVX512VLDQ: # %bb.0: 164; X86-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}{1to8}, %zmm0, %zmm0 165; X86-AVX512VLDQ-NEXT: retl 166; 167; X64-AVX-LABEL: fabs_v8f64: 168; X64-AVX: # %bb.0: 169; X64-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN] 170; X64-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 171; X64-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 172; X64-AVX-NEXT: retq 173; 174; X64-AVX512VL-LABEL: fabs_v8f64: 175; X64-AVX512VL: # %bb.0: 176; X64-AVX512VL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 177; X64-AVX512VL-NEXT: retq 178; 179; X64-AVX512VLDQ-LABEL: fabs_v8f64: 180; X64-AVX512VLDQ: # %bb.0: 181; X64-AVX512VLDQ-NEXT: vandpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to8}, %zmm0, %zmm0 182; X64-AVX512VLDQ-NEXT: retq 183 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 184 ret <8 x double> %t 185} 186declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 187 188define <16 x float> @fabs_v16f32(<16 x float> %p) { 189; X86-AVX-LABEL: fabs_v16f32: 190; X86-AVX: # %bb.0: 191; X86-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 192; X86-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 193; X86-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 194; X86-AVX-NEXT: retl 195; 196; X86-AVX512VL-LABEL: fabs_v16f32: 197; X86-AVX512VL: # %bb.0: 198; X86-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0 199; X86-AVX512VL-NEXT: retl 200; 201; X86-AVX512VLDQ-LABEL: fabs_v16f32: 202; X86-AVX512VLDQ: # %bb.0: 203; X86-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}{1to16}, %zmm0, %zmm0 204; X86-AVX512VLDQ-NEXT: retl 205; 206; X64-AVX-LABEL: fabs_v16f32: 207; X64-AVX: # %bb.0: 208; X64-AVX-NEXT: vmovaps {{.*#+}} ymm2 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN] 209; X64-AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 210; X64-AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 211; X64-AVX-NEXT: retq 212; 213; X64-AVX512VL-LABEL: fabs_v16f32: 214; X64-AVX512VL: # %bb.0: 215; X64-AVX512VL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 216; X64-AVX512VL-NEXT: retq 217; 218; X64-AVX512VLDQ-LABEL: fabs_v16f32: 219; X64-AVX512VLDQ: # %bb.0: 220; X64-AVX512VLDQ-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to16}, %zmm0, %zmm0 221; X64-AVX512VLDQ-NEXT: retq 222 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 223 ret <16 x float> %t 224} 225declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 226 227; PR20354: when generating code for a vector fabs op, 228; make sure that we're only turning off the sign bit of each float value. 229; No constant pool loads or vector ops are needed for the fabs of a 230; bitcasted integer constant; we should just return an integer constant 231; that has the sign bits turned off. 232; 233; So instead of something like this: 234; movabsq (constant pool load of mask for sign bits) 235; vmovq (move from integer register to vector/fp register) 236; vandps (mask off sign bits) 237; vmovq (move vector/fp register back to integer return register) 238; 239; We should generate: 240; mov (put constant value in return register) 241 242define i64 @fabs_v2f32_1() { 243; X86-LABEL: fabs_v2f32_1: 244; X86: # %bb.0: 245; X86-NEXT: xorl %eax, %eax 246; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF 247; X86-NEXT: retl 248; 249; X64-LABEL: fabs_v2f32_1: 250; X64: # %bb.0: 251; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000 252; X64-NEXT: retq 253 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 254 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 255 %ret = bitcast <2 x float> %fabs to i64 256 ret i64 %ret 257} 258 259define i64 @fabs_v2f32_2() { 260; X86-LABEL: fabs_v2f32_2: 261; X86: # %bb.0: 262; X86-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 263; X86-NEXT: xorl %edx, %edx 264; X86-NEXT: retl 265; 266; X64-LABEL: fabs_v2f32_2: 267; X64: # %bb.0: 268; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 269; X64-NEXT: retq 270 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF 271 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 272 %ret = bitcast <2 x float> %fabs to i64 273 ret i64 %ret 274} 275 276declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p) 277