1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VL 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X32 --check-prefix=X32_AVX512VLDQ 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VL 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefix=X64 --check-prefix=X64_AVX512VLDQ 8 9; FIXME: Drop the regex pattern matching of 'nan' once we drop support for MSVC 10; 2013. 11 12define <2 x double> @fabs_v2f64(<2 x double> %p) { 13; X32_AVX-LABEL: fabs_v2f64: 14; X32_AVX: # BB#0: 15; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 16; X32_AVX-NEXT: retl 17; 18; X32_AVX512VL-LABEL: fabs_v2f64: 19; X32_AVX512VL: # BB#0: 20; X32_AVX512VL-NEXT: vpand {{\.LCPI.*}}, %xmm0, %xmm0 21; X32_AVX512VL-NEXT: retl 22; 23; X32_AVX512VLDQ-LABEL: fabs_v2f64: 24; X32_AVX512VLDQ: # BB#0: 25; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 26; X32_AVX512VLDQ-NEXT: retl 27; 28; X64_AVX-LABEL: fabs_v2f64: 29; X64_AVX: # BB#0: 30; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 31; X64_AVX-NEXT: retq 32; 33; X64_AVX512VL-LABEL: fabs_v2f64: 34; X64_AVX512VL: # BB#0: 35; X64_AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 36; X64_AVX512VL-NEXT: retq 37; 38; X64_AVX512VLDQ-LABEL: fabs_v2f64: 39; X64_AVX512VLDQ: # BB#0: 40; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 41; X64_AVX512VLDQ-NEXT: retq 42 %t = call <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 43 ret <2 x double> %t 44} 45declare <2 x double> @llvm.fabs.v2f64(<2 x double> %p) 46 47define <4 x float> @fabs_v4f32(<4 x float> %p) { 48; X32_AVX-LABEL: fabs_v4f32: 49; X32_AVX: # BB#0: 50; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %xmm0, %xmm0 51; X32_AVX-NEXT: retl 52; 53; X32_AVX512VL-LABEL: fabs_v4f32: 54; X32_AVX512VL: # BB#0: 55; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 56; X32_AVX512VL-NEXT: retl 57; 58; X32_AVX512VLDQ-LABEL: fabs_v4f32: 59; X32_AVX512VLDQ: # BB#0: 60; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 61; X32_AVX512VLDQ-NEXT: retl 62; 63; X64_AVX-LABEL: fabs_v4f32: 64; X64_AVX: # BB#0: 65; X64_AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 66; X64_AVX-NEXT: retq 67; 68; X64_AVX512VL-LABEL: fabs_v4f32: 69; X64_AVX512VL: # BB#0: 70; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to4}, %xmm0, %xmm0 71; X64_AVX512VL-NEXT: retq 72; 73; X64_AVX512VLDQ-LABEL: fabs_v4f32: 74; X64_AVX512VLDQ: # BB#0: 75; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to4}, %xmm0, %xmm0 76; X64_AVX512VLDQ-NEXT: retq 77 %t = call <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 78 ret <4 x float> %t 79} 80declare <4 x float> @llvm.fabs.v4f32(<4 x float> %p) 81 82define <4 x double> @fabs_v4f64(<4 x double> %p) { 83; X32_AVX-LABEL: fabs_v4f64: 84; X32_AVX: # BB#0: 85; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 86; X32_AVX-NEXT: retl 87; 88; X32_AVX512VL-LABEL: fabs_v4f64: 89; X32_AVX512VL: # BB#0: 90; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 91; X32_AVX512VL-NEXT: retl 92; 93; X32_AVX512VLDQ-LABEL: fabs_v4f64: 94; X32_AVX512VLDQ: # BB#0: 95; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to4}, %ymm0, %ymm0 96; X32_AVX512VLDQ-NEXT: retl 97; 98; X64_AVX-LABEL: fabs_v4f64: 99; X64_AVX: # BB#0: 100; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 101; X64_AVX-NEXT: retq 102; 103; X64_AVX512VL-LABEL: fabs_v4f64: 104; X64_AVX512VL: # BB#0: 105; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm0 106; X64_AVX512VL-NEXT: retq 107; 108; X64_AVX512VLDQ-LABEL: fabs_v4f64: 109; X64_AVX512VLDQ: # BB#0: 110; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 111; X64_AVX512VLDQ-NEXT: retq 112 %t = call <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 113 ret <4 x double> %t 114} 115declare <4 x double> @llvm.fabs.v4f64(<4 x double> %p) 116 117define <8 x float> @fabs_v8f32(<8 x float> %p) { 118; X32_AVX-LABEL: fabs_v8f32: 119; X32_AVX: # BB#0: 120; X32_AVX-NEXT: vandps {{\.LCPI.*}}, %ymm0, %ymm0 121; X32_AVX-NEXT: retl 122; 123; X32_AVX512VL-LABEL: fabs_v8f32: 124; X32_AVX512VL: # BB#0: 125; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 126; X32_AVX512VL-NEXT: retl 127; 128; X32_AVX512VLDQ-LABEL: fabs_v8f32: 129; X32_AVX512VLDQ: # BB#0: 130; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 131; X32_AVX512VLDQ-NEXT: retl 132; 133; X64_AVX-LABEL: fabs_v8f32: 134; X64_AVX: # BB#0: 135; X64_AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 136; X64_AVX-NEXT: retq 137; 138; X64_AVX512VL-LABEL: fabs_v8f32: 139; X64_AVX512VL: # BB#0: 140; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to8}, %ymm0, %ymm0 141; X64_AVX512VL-NEXT: retq 142; 143; X64_AVX512VLDQ-LABEL: fabs_v8f32: 144; X64_AVX512VLDQ: # BB#0: 145; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to8}, %ymm0, %ymm0 146; X64_AVX512VLDQ-NEXT: retq 147 %t = call <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 148 ret <8 x float> %t 149} 150declare <8 x float> @llvm.fabs.v8f32(<8 x float> %p) 151 152define <8 x double> @fabs_v8f64(<8 x double> %p) { 153; X32_AVX-LABEL: fabs_v8f64: 154; X32_AVX: # BB#0: 155; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 156; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 157; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 158; X32_AVX-NEXT: retl 159; 160; X32_AVX512VL-LABEL: fabs_v8f64: 161; X32_AVX512VL: # BB#0: 162; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0 163; X32_AVX512VL-NEXT: retl 164; 165; X32_AVX512VLDQ-LABEL: fabs_v8f64: 166; X32_AVX512VLDQ: # BB#0: 167; X32_AVX512VLDQ-NEXT: vandpd {{\.LCPI.*}}{1to8}, %zmm0, %zmm0 168; X32_AVX512VLDQ-NEXT: retl 169; 170; X64_AVX-LABEL: fabs_v8f64: 171; X64_AVX: # BB#0: 172; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 173; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 174; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 175; X64_AVX-NEXT: retq 176; 177; X64_AVX512VL-LABEL: fabs_v8f64: 178; X64_AVX512VL: # BB#0: 179; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 180; X64_AVX512VL-NEXT: retq 181; 182; X64_AVX512VLDQ-LABEL: fabs_v8f64: 183; X64_AVX512VLDQ: # BB#0: 184; X64_AVX512VLDQ-NEXT: vandpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 185; X64_AVX512VLDQ-NEXT: retq 186 %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 187 ret <8 x double> %t 188} 189declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) 190 191define <16 x float> @fabs_v16f32(<16 x float> %p) { 192; X32_AVX-LABEL: fabs_v16f32: 193; X32_AVX: # BB#0: 194; X32_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 195; X32_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 196; X32_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 197; X32_AVX-NEXT: retl 198; 199; X32_AVX512VL-LABEL: fabs_v16f32: 200; X32_AVX512VL: # BB#0: 201; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0 202; X32_AVX512VL-NEXT: retl 203; 204; X32_AVX512VLDQ-LABEL: fabs_v16f32: 205; X32_AVX512VLDQ: # BB#0: 206; X32_AVX512VLDQ-NEXT: vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm0 207; X32_AVX512VLDQ-NEXT: retl 208; 209; X64_AVX-LABEL: fabs_v16f32: 210; X64_AVX: # BB#0: 211; X64_AVX-NEXT: vmovaps {{.*#+}} ymm2 = [{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}},{{(nan|1\.#QNAN0e\+00)}}] 212; X64_AVX-NEXT: vandps %ymm2, %ymm0, %ymm0 213; X64_AVX-NEXT: vandps %ymm2, %ymm1, %ymm1 214; X64_AVX-NEXT: retq 215; 216; X64_AVX512VL-LABEL: fabs_v16f32: 217; X64_AVX512VL: # BB#0: 218; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 219; X64_AVX512VL-NEXT: retq 220; 221; X64_AVX512VLDQ-LABEL: fabs_v16f32: 222; X64_AVX512VLDQ: # BB#0: 223; X64_AVX512VLDQ-NEXT: vandps {{.*}}(%rip){1to16}, %zmm0, %zmm0 224; X64_AVX512VLDQ-NEXT: retq 225 %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 226 ret <16 x float> %t 227} 228declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) 229 230; PR20354: when generating code for a vector fabs op, 231; make sure that we're only turning off the sign bit of each float value. 232; No constant pool loads or vector ops are needed for the fabs of a 233; bitcasted integer constant; we should just return an integer constant 234; that has the sign bits turned off. 235; 236; So instead of something like this: 237; movabsq (constant pool load of mask for sign bits) 238; vmovq (move from integer register to vector/fp register) 239; vandps (mask off sign bits) 240; vmovq (move vector/fp register back to integer return register) 241; 242; We should generate: 243; mov (put constant value in return register) 244 245define i64 @fabs_v2f32_1() { 246; X32-LABEL: fabs_v2f32_1: 247; X32: # BB#0: 248; X32-NEXT: xorl %eax, %eax 249; X32-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF 250; X32-NEXT: retl 251; 252; X64-LABEL: fabs_v2f32_1: 253; X64: # BB#0: 254; X64-NEXT: movabsq $9223372032559808512, %rax # imm = 0x7FFFFFFF00000000 255; X64-NEXT: retq 256 %bitcast = bitcast i64 18446744069414584320 to <2 x float> ; 0xFFFF_FFFF_0000_0000 257 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 258 %ret = bitcast <2 x float> %fabs to i64 259 ret i64 %ret 260} 261 262define i64 @fabs_v2f32_2() { 263; X32-LABEL: fabs_v2f32_2: 264; X32: # BB#0: 265; X32-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 266; X32-NEXT: xorl %edx, %edx 267; X32-NEXT: retl 268; 269; X64-LABEL: fabs_v2f32_2: 270; X64: # BB#0: 271; X64-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF 272; X64-NEXT: retq 273 %bitcast = bitcast i64 4294967295 to <2 x float> ; 0x0000_0000_FFFF_FFFF 274 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %bitcast) 275 %ret = bitcast <2 x float> %fabs to i64 276 ret i64 %ret 277} 278 279declare <2 x float> @llvm.fabs.v2f32(<2 x float> %p) 280