1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=X64_AVX1 5; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefixes=X32_AVX1 6 7declare i32 @llvm.fptoui.sat.i32.f32(float) 8declare i64 @llvm.fptosi.sat.i64.f64(double) 9 10define float @trunc_unsigned_f32(float %x) #0 { 11; SSE2-LABEL: trunc_unsigned_f32: 12; SSE2: # %bb.0: 13; SSE2-NEXT: cvttss2si %xmm0, %rax 14; SSE2-NEXT: movl %eax, %eax 15; SSE2-NEXT: xorps %xmm0, %xmm0 16; SSE2-NEXT: cvtsi2ss %rax, %xmm0 17; SSE2-NEXT: retq 18; 19; SSE41-LABEL: trunc_unsigned_f32: 20; SSE41: # %bb.0: 21; SSE41-NEXT: roundss $11, %xmm0, %xmm0 22; SSE41-NEXT: retq 23; 24; X64_AVX1-LABEL: trunc_unsigned_f32: 25; X64_AVX1: # %bb.0: 26; X64_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 27; X64_AVX1-NEXT: retq 28; 29; X32_AVX1-LABEL: trunc_unsigned_f32: 30; X32_AVX1: # %bb.0: 31; X32_AVX1-NEXT: pushl %eax 32; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 33; X32_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 34; X32_AVX1-NEXT: vmovss %xmm0, (%esp) 35; X32_AVX1-NEXT: flds (%esp) 36; X32_AVX1-NEXT: popl %eax 37; X32_AVX1-NEXT: retl 38 %i = fptoui float %x to i32 39 %r = uitofp i32 %i to float 40 ret float %r 41} 42 43define double @trunc_unsigned_f64(double %x) #0 { 44; SSE2-LABEL: trunc_unsigned_f64: 45; SSE2: # %bb.0: 46; SSE2-NEXT: cvttsd2si %xmm0, %rax 47; SSE2-NEXT: movq %rax, %rcx 48; SSE2-NEXT: sarq $63, %rcx 49; SSE2-NEXT: subsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 50; SSE2-NEXT: cvttsd2si %xmm0, %rdx 51; SSE2-NEXT: andq %rcx, %rdx 52; SSE2-NEXT: orq %rax, %rdx 53; SSE2-NEXT: movq %rdx, %xmm1 54; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1] 55; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 56; SSE2-NEXT: movapd %xmm1, %xmm0 57; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 58; SSE2-NEXT: addsd %xmm1, %xmm0 59; SSE2-NEXT: retq 60; 61; SSE41-LABEL: trunc_unsigned_f64: 62; SSE41: # %bb.0: 63; SSE41-NEXT: roundsd $11, %xmm0, %xmm0 64; SSE41-NEXT: retq 65; 66; X64_AVX1-LABEL: trunc_unsigned_f64: 67; X64_AVX1: # %bb.0: 68; X64_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 69; X64_AVX1-NEXT: retq 70; 71; X32_AVX1-LABEL: trunc_unsigned_f64: 72; X32_AVX1: # %bb.0: 73; X32_AVX1-NEXT: pushl %ebp 74; X32_AVX1-NEXT: movl %esp, %ebp 75; X32_AVX1-NEXT: andl $-8, %esp 76; X32_AVX1-NEXT: subl $8, %esp 77; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 78; X32_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 79; X32_AVX1-NEXT: vmovsd %xmm0, (%esp) 80; X32_AVX1-NEXT: fldl (%esp) 81; X32_AVX1-NEXT: movl %ebp, %esp 82; X32_AVX1-NEXT: popl %ebp 83; X32_AVX1-NEXT: retl 84 %i = fptoui double %x to i64 85 %r = uitofp i64 %i to double 86 ret double %r 87} 88 89define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 { 90; SSE2-LABEL: trunc_unsigned_v4f32: 91; SSE2: # %bb.0: 92; SSE2-NEXT: cvttps2dq %xmm0, %xmm1 93; SSE2-NEXT: movdqa %xmm1, %xmm2 94; SSE2-NEXT: psrad $31, %xmm2 95; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 96; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 97; SSE2-NEXT: pand %xmm2, %xmm0 98; SSE2-NEXT: por %xmm1, %xmm0 99; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [65535,65535,65535,65535] 100; SSE2-NEXT: pand %xmm0, %xmm1 101; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 102; SSE2-NEXT: psrld $16, %xmm0 103; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 104; SSE2-NEXT: subps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 105; SSE2-NEXT: addps %xmm1, %xmm0 106; SSE2-NEXT: retq 107; 108; SSE41-LABEL: trunc_unsigned_v4f32: 109; SSE41: # %bb.0: 110; SSE41-NEXT: roundps $11, %xmm0, %xmm0 111; SSE41-NEXT: retq 112; 113; X64_AVX1-LABEL: trunc_unsigned_v4f32: 114; X64_AVX1: # %bb.0: 115; X64_AVX1-NEXT: vroundps $11, %xmm0, %xmm0 116; X64_AVX1-NEXT: retq 117; 118; X32_AVX1-LABEL: trunc_unsigned_v4f32: 119; X32_AVX1: # %bb.0: 120; X32_AVX1-NEXT: vroundps $11, %xmm0, %xmm0 121; X32_AVX1-NEXT: retl 122 %i = fptoui <4 x float> %x to <4 x i32> 123 %r = uitofp <4 x i32> %i to <4 x float> 124 ret <4 x float> %r 125} 126 127define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { 128; SSE2-LABEL: trunc_unsigned_v2f64: 129; SSE2: # %bb.0: 130; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 131; SSE2-NEXT: movapd %xmm0, %xmm1 132; SSE2-NEXT: subsd %xmm2, %xmm1 133; SSE2-NEXT: cvttsd2si %xmm1, %rax 134; SSE2-NEXT: cvttsd2si %xmm0, %rcx 135; SSE2-NEXT: movq %rcx, %rdx 136; SSE2-NEXT: sarq $63, %rdx 137; SSE2-NEXT: andq %rax, %rdx 138; SSE2-NEXT: orq %rcx, %rdx 139; SSE2-NEXT: movq %rdx, %xmm1 140; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 141; SSE2-NEXT: cvttsd2si %xmm0, %rax 142; SSE2-NEXT: subsd %xmm2, %xmm0 143; SSE2-NEXT: cvttsd2si %xmm0, %rcx 144; SSE2-NEXT: movq %rax, %rdx 145; SSE2-NEXT: sarq $63, %rdx 146; SSE2-NEXT: andq %rcx, %rdx 147; SSE2-NEXT: orq %rax, %rdx 148; SSE2-NEXT: movq %rdx, %xmm0 149; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 150; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295] 151; SSE2-NEXT: pand %xmm1, %xmm0 152; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 153; SSE2-NEXT: psrlq $32, %xmm1 154; SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 155; SSE2-NEXT: subpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 156; SSE2-NEXT: addpd %xmm0, %xmm1 157; SSE2-NEXT: movapd %xmm1, %xmm0 158; SSE2-NEXT: retq 159; 160; SSE41-LABEL: trunc_unsigned_v2f64: 161; SSE41: # %bb.0: 162; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 163; SSE41-NEXT: retq 164; 165; X64_AVX1-LABEL: trunc_unsigned_v2f64: 166; X64_AVX1: # %bb.0: 167; X64_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0 168; X64_AVX1-NEXT: retq 169; 170; X32_AVX1-LABEL: trunc_unsigned_v2f64: 171; X32_AVX1: # %bb.0: 172; X32_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0 173; X32_AVX1-NEXT: retl 174 %i = fptoui <2 x double> %x to <2 x i64> 175 %r = uitofp <2 x i64> %i to <2 x double> 176 ret <2 x double> %r 177} 178 179define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 { 180; SSE2-LABEL: trunc_unsigned_v4f64: 181; SSE2: # %bb.0: 182; SSE2-NEXT: movapd %xmm1, %xmm2 183; SSE2-NEXT: movsd {{.*#+}} xmm3 = mem[0],zero 184; SSE2-NEXT: subsd %xmm3, %xmm1 185; SSE2-NEXT: cvttsd2si %xmm1, %rax 186; SSE2-NEXT: cvttsd2si %xmm2, %rcx 187; SSE2-NEXT: movq %rcx, %rdx 188; SSE2-NEXT: sarq $63, %rdx 189; SSE2-NEXT: andq %rax, %rdx 190; SSE2-NEXT: orq %rcx, %rdx 191; SSE2-NEXT: movq %rdx, %xmm1 192; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1,1] 193; SSE2-NEXT: cvttsd2si %xmm2, %rax 194; SSE2-NEXT: subsd %xmm3, %xmm2 195; SSE2-NEXT: cvttsd2si %xmm2, %rcx 196; SSE2-NEXT: movq %rax, %rdx 197; SSE2-NEXT: sarq $63, %rdx 198; SSE2-NEXT: andq %rcx, %rdx 199; SSE2-NEXT: orq %rax, %rdx 200; SSE2-NEXT: movq %rdx, %xmm2 201; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 202; SSE2-NEXT: movapd %xmm0, %xmm2 203; SSE2-NEXT: subsd %xmm3, %xmm2 204; SSE2-NEXT: cvttsd2si %xmm2, %rax 205; SSE2-NEXT: cvttsd2si %xmm0, %rcx 206; SSE2-NEXT: movq %rcx, %rdx 207; SSE2-NEXT: sarq $63, %rdx 208; SSE2-NEXT: andq %rax, %rdx 209; SSE2-NEXT: orq %rcx, %rdx 210; SSE2-NEXT: movq %rdx, %xmm2 211; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 212; SSE2-NEXT: cvttsd2si %xmm0, %rax 213; SSE2-NEXT: subsd %xmm3, %xmm0 214; SSE2-NEXT: cvttsd2si %xmm0, %rcx 215; SSE2-NEXT: movq %rax, %rdx 216; SSE2-NEXT: sarq $63, %rdx 217; SSE2-NEXT: andq %rcx, %rdx 218; SSE2-NEXT: orq %rax, %rdx 219; SSE2-NEXT: movq %rdx, %xmm0 220; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 221; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [4294967295,4294967295] 222; SSE2-NEXT: movdqa %xmm2, %xmm3 223; SSE2-NEXT: pand %xmm0, %xmm3 224; SSE2-NEXT: movdqa {{.*#+}} xmm4 = [4841369599423283200,4841369599423283200] 225; SSE2-NEXT: por %xmm4, %xmm3 226; SSE2-NEXT: psrlq $32, %xmm2 227; SSE2-NEXT: movdqa {{.*#+}} xmm5 = [4985484787499139072,4985484787499139072] 228; SSE2-NEXT: por %xmm5, %xmm2 229; SSE2-NEXT: movapd {{.*#+}} xmm6 = [1.9342813118337666E+25,1.9342813118337666E+25] 230; SSE2-NEXT: subpd %xmm6, %xmm2 231; SSE2-NEXT: addpd %xmm3, %xmm2 232; SSE2-NEXT: pand %xmm1, %xmm0 233; SSE2-NEXT: por %xmm4, %xmm0 234; SSE2-NEXT: psrlq $32, %xmm1 235; SSE2-NEXT: por %xmm5, %xmm1 236; SSE2-NEXT: subpd %xmm6, %xmm1 237; SSE2-NEXT: addpd %xmm0, %xmm1 238; SSE2-NEXT: movapd %xmm2, %xmm0 239; SSE2-NEXT: retq 240; 241; SSE41-LABEL: trunc_unsigned_v4f64: 242; SSE41: # %bb.0: 243; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 244; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 245; SSE41-NEXT: retq 246; 247; X64_AVX1-LABEL: trunc_unsigned_v4f64: 248; X64_AVX1: # %bb.0: 249; X64_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 250; X64_AVX1-NEXT: retq 251; 252; X32_AVX1-LABEL: trunc_unsigned_v4f64: 253; X32_AVX1: # %bb.0: 254; X32_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 255; X32_AVX1-NEXT: retl 256 %i = fptoui <4 x double> %x to <4 x i64> 257 %r = uitofp <4 x i64> %i to <4 x double> 258 ret <4 x double> %r 259} 260 261define float @trunc_signed_f32_no_fast_math(float %x) { 262; SSE-LABEL: trunc_signed_f32_no_fast_math: 263; SSE: # %bb.0: 264; SSE-NEXT: cvttps2dq %xmm0, %xmm0 265; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 266; SSE-NEXT: retq 267; 268; X64_AVX1-LABEL: trunc_signed_f32_no_fast_math: 269; X64_AVX1: # %bb.0: 270; X64_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 271; X64_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 272; X64_AVX1-NEXT: retq 273; 274; X32_AVX1-LABEL: trunc_signed_f32_no_fast_math: 275; X32_AVX1: # %bb.0: 276; X32_AVX1-NEXT: pushl %eax 277; X32_AVX1-NEXT: .cfi_def_cfa_offset 8 278; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 279; X32_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 280; X32_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 281; X32_AVX1-NEXT: vmovss %xmm0, (%esp) 282; X32_AVX1-NEXT: flds (%esp) 283; X32_AVX1-NEXT: popl %eax 284; X32_AVX1-NEXT: .cfi_def_cfa_offset 4 285; X32_AVX1-NEXT: retl 286 %i = fptosi float %x to i32 287 %r = sitofp i32 %i to float 288 ret float %r 289} 290 291; Without -0.0, it is ok to use roundss if it is available. 292 293define float @trunc_signed_f32_nsz(float %x) #0 { 294; SSE2-LABEL: trunc_signed_f32_nsz: 295; SSE2: # %bb.0: 296; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 297; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 298; SSE2-NEXT: retq 299; 300; SSE41-LABEL: trunc_signed_f32_nsz: 301; SSE41: # %bb.0: 302; SSE41-NEXT: roundss $11, %xmm0, %xmm0 303; SSE41-NEXT: retq 304; 305; X64_AVX1-LABEL: trunc_signed_f32_nsz: 306; X64_AVX1: # %bb.0: 307; X64_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 308; X64_AVX1-NEXT: retq 309; 310; X32_AVX1-LABEL: trunc_signed_f32_nsz: 311; X32_AVX1: # %bb.0: 312; X32_AVX1-NEXT: pushl %eax 313; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 314; X32_AVX1-NEXT: vroundss $11, %xmm0, %xmm0, %xmm0 315; X32_AVX1-NEXT: vmovss %xmm0, (%esp) 316; X32_AVX1-NEXT: flds (%esp) 317; X32_AVX1-NEXT: popl %eax 318; X32_AVX1-NEXT: retl 319 %i = fptosi float %x to i32 320 %r = sitofp i32 %i to float 321 ret float %r 322} 323 324define double @trunc_signed32_f64_no_fast_math(double %x) { 325; SSE-LABEL: trunc_signed32_f64_no_fast_math: 326; SSE: # %bb.0: 327; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 328; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 329; SSE-NEXT: retq 330; 331; X64_AVX1-LABEL: trunc_signed32_f64_no_fast_math: 332; X64_AVX1: # %bb.0: 333; X64_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 334; X64_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 335; X64_AVX1-NEXT: retq 336; 337; X32_AVX1-LABEL: trunc_signed32_f64_no_fast_math: 338; X32_AVX1: # %bb.0: 339; X32_AVX1-NEXT: pushl %ebp 340; X32_AVX1-NEXT: .cfi_def_cfa_offset 8 341; X32_AVX1-NEXT: .cfi_offset %ebp, -8 342; X32_AVX1-NEXT: movl %esp, %ebp 343; X32_AVX1-NEXT: .cfi_def_cfa_register %ebp 344; X32_AVX1-NEXT: andl $-8, %esp 345; X32_AVX1-NEXT: subl $8, %esp 346; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 347; X32_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 348; X32_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 349; X32_AVX1-NEXT: vmovlps %xmm0, (%esp) 350; X32_AVX1-NEXT: fldl (%esp) 351; X32_AVX1-NEXT: movl %ebp, %esp 352; X32_AVX1-NEXT: popl %ebp 353; X32_AVX1-NEXT: .cfi_def_cfa %esp, 4 354; X32_AVX1-NEXT: retl 355 %i = fptosi double %x to i32 356 %r = sitofp i32 %i to double 357 ret double %r 358} 359 360define double @trunc_signed32_f64_nsz(double %x) #0 { 361; SSE2-LABEL: trunc_signed32_f64_nsz: 362; SSE2: # %bb.0: 363; SSE2-NEXT: cvttpd2dq %xmm0, %xmm0 364; SSE2-NEXT: cvtdq2pd %xmm0, %xmm0 365; SSE2-NEXT: retq 366; 367; SSE41-LABEL: trunc_signed32_f64_nsz: 368; SSE41: # %bb.0: 369; SSE41-NEXT: roundsd $11, %xmm0, %xmm0 370; SSE41-NEXT: retq 371; 372; X64_AVX1-LABEL: trunc_signed32_f64_nsz: 373; X64_AVX1: # %bb.0: 374; X64_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 375; X64_AVX1-NEXT: retq 376; 377; X32_AVX1-LABEL: trunc_signed32_f64_nsz: 378; X32_AVX1: # %bb.0: 379; X32_AVX1-NEXT: pushl %ebp 380; X32_AVX1-NEXT: movl %esp, %ebp 381; X32_AVX1-NEXT: andl $-8, %esp 382; X32_AVX1-NEXT: subl $8, %esp 383; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 384; X32_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 385; X32_AVX1-NEXT: vmovsd %xmm0, (%esp) 386; X32_AVX1-NEXT: fldl (%esp) 387; X32_AVX1-NEXT: movl %ebp, %esp 388; X32_AVX1-NEXT: popl %ebp 389; X32_AVX1-NEXT: retl 390 %i = fptosi double %x to i32 391 %r = sitofp i32 %i to double 392 ret double %r 393} 394 395define double @trunc_f32_signed32_f64_no_fast_math(float %x) { 396; SSE-LABEL: trunc_f32_signed32_f64_no_fast_math: 397; SSE: # %bb.0: 398; SSE-NEXT: cvttps2dq %xmm0, %xmm0 399; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 400; SSE-NEXT: retq 401; 402; X64_AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math: 403; X64_AVX1: # %bb.0: 404; X64_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 405; X64_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 406; X64_AVX1-NEXT: retq 407; 408; X32_AVX1-LABEL: trunc_f32_signed32_f64_no_fast_math: 409; X32_AVX1: # %bb.0: 410; X32_AVX1-NEXT: pushl %ebp 411; X32_AVX1-NEXT: .cfi_def_cfa_offset 8 412; X32_AVX1-NEXT: .cfi_offset %ebp, -8 413; X32_AVX1-NEXT: movl %esp, %ebp 414; X32_AVX1-NEXT: .cfi_def_cfa_register %ebp 415; X32_AVX1-NEXT: andl $-8, %esp 416; X32_AVX1-NEXT: subl $8, %esp 417; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 418; X32_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 419; X32_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 420; X32_AVX1-NEXT: vmovlps %xmm0, (%esp) 421; X32_AVX1-NEXT: fldl (%esp) 422; X32_AVX1-NEXT: movl %ebp, %esp 423; X32_AVX1-NEXT: popl %ebp 424; X32_AVX1-NEXT: .cfi_def_cfa %esp, 4 425; X32_AVX1-NEXT: retl 426 %i = fptosi float %x to i32 427 %r = sitofp i32 %i to double 428 ret double %r 429} 430 431define double @trunc_f32_signed32_f64_nsz(float %x) #0 { 432; SSE-LABEL: trunc_f32_signed32_f64_nsz: 433; SSE: # %bb.0: 434; SSE-NEXT: cvttps2dq %xmm0, %xmm0 435; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 436; SSE-NEXT: retq 437; 438; X64_AVX1-LABEL: trunc_f32_signed32_f64_nsz: 439; X64_AVX1: # %bb.0: 440; X64_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 441; X64_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 442; X64_AVX1-NEXT: retq 443; 444; X32_AVX1-LABEL: trunc_f32_signed32_f64_nsz: 445; X32_AVX1: # %bb.0: 446; X32_AVX1-NEXT: pushl %ebp 447; X32_AVX1-NEXT: movl %esp, %ebp 448; X32_AVX1-NEXT: andl $-8, %esp 449; X32_AVX1-NEXT: subl $8, %esp 450; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 451; X32_AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 452; X32_AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 453; X32_AVX1-NEXT: vmovlps %xmm0, (%esp) 454; X32_AVX1-NEXT: fldl (%esp) 455; X32_AVX1-NEXT: movl %ebp, %esp 456; X32_AVX1-NEXT: popl %ebp 457; X32_AVX1-NEXT: retl 458 %i = fptosi float %x to i32 459 %r = sitofp i32 %i to double 460 ret double %r 461} 462 463define float @trunc_f64_signed32_f32_no_fast_math(double %x) { 464; SSE-LABEL: trunc_f64_signed32_f32_no_fast_math: 465; SSE: # %bb.0: 466; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 467; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 468; SSE-NEXT: retq 469; 470; X64_AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math: 471; X64_AVX1: # %bb.0: 472; X64_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 473; X64_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 474; X64_AVX1-NEXT: retq 475; 476; X32_AVX1-LABEL: trunc_f64_signed32_f32_no_fast_math: 477; X32_AVX1: # %bb.0: 478; X32_AVX1-NEXT: pushl %eax 479; X32_AVX1-NEXT: .cfi_def_cfa_offset 8 480; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 481; X32_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 482; X32_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 483; X32_AVX1-NEXT: vmovss %xmm0, (%esp) 484; X32_AVX1-NEXT: flds (%esp) 485; X32_AVX1-NEXT: popl %eax 486; X32_AVX1-NEXT: .cfi_def_cfa_offset 4 487; X32_AVX1-NEXT: retl 488 %i = fptosi double %x to i32 489 %r = sitofp i32 %i to float 490 ret float %r 491} 492 493define float @trunc_f64_signed32_f32_nsz(double %x) #0 { 494; SSE-LABEL: trunc_f64_signed32_f32_nsz: 495; SSE: # %bb.0: 496; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 497; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 498; SSE-NEXT: retq 499; 500; X64_AVX1-LABEL: trunc_f64_signed32_f32_nsz: 501; X64_AVX1: # %bb.0: 502; X64_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 503; X64_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 504; X64_AVX1-NEXT: retq 505; 506; X32_AVX1-LABEL: trunc_f64_signed32_f32_nsz: 507; X32_AVX1: # %bb.0: 508; X32_AVX1-NEXT: pushl %eax 509; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 510; X32_AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 511; X32_AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 512; X32_AVX1-NEXT: vmovss %xmm0, (%esp) 513; X32_AVX1-NEXT: flds (%esp) 514; X32_AVX1-NEXT: popl %eax 515; X32_AVX1-NEXT: retl 516 %i = fptosi double %x to i32 517 %r = sitofp i32 %i to float 518 ret float %r 519} 520 521define double @trunc_signed_f64_no_fast_math(double %x) { 522; SSE-LABEL: trunc_signed_f64_no_fast_math: 523; SSE: # %bb.0: 524; SSE-NEXT: cvttsd2si %xmm0, %rax 525; SSE-NEXT: xorps %xmm0, %xmm0 526; SSE-NEXT: cvtsi2sd %rax, %xmm0 527; SSE-NEXT: retq 528; 529; X64_AVX1-LABEL: trunc_signed_f64_no_fast_math: 530; X64_AVX1: # %bb.0: 531; X64_AVX1-NEXT: vcvttsd2si %xmm0, %rax 532; X64_AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 533; X64_AVX1-NEXT: retq 534; 535; X32_AVX1-LABEL: trunc_signed_f64_no_fast_math: 536; X32_AVX1: # %bb.0: 537; X32_AVX1-NEXT: pushl %ebp 538; X32_AVX1-NEXT: .cfi_def_cfa_offset 8 539; X32_AVX1-NEXT: .cfi_offset %ebp, -8 540; X32_AVX1-NEXT: movl %esp, %ebp 541; X32_AVX1-NEXT: .cfi_def_cfa_register %ebp 542; X32_AVX1-NEXT: andl $-8, %esp 543; X32_AVX1-NEXT: subl $24, %esp 544; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 545; X32_AVX1-NEXT: vmovsd %xmm0, (%esp) 546; X32_AVX1-NEXT: fldl (%esp) 547; X32_AVX1-NEXT: fisttpll (%esp) 548; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 549; X32_AVX1-NEXT: vmovlps %xmm0, {{[0-9]+}}(%esp) 550; X32_AVX1-NEXT: fildll {{[0-9]+}}(%esp) 551; X32_AVX1-NEXT: fstpl {{[0-9]+}}(%esp) 552; X32_AVX1-NEXT: fldl {{[0-9]+}}(%esp) 553; X32_AVX1-NEXT: movl %ebp, %esp 554; X32_AVX1-NEXT: popl %ebp 555; X32_AVX1-NEXT: .cfi_def_cfa %esp, 4 556; X32_AVX1-NEXT: retl 557 %i = fptosi double %x to i64 558 %r = sitofp i64 %i to double 559 ret double %r 560} 561 562define double @trunc_signed_f64_nsz(double %x) #0 { 563; SSE2-LABEL: trunc_signed_f64_nsz: 564; SSE2: # %bb.0: 565; SSE2-NEXT: cvttsd2si %xmm0, %rax 566; SSE2-NEXT: xorps %xmm0, %xmm0 567; SSE2-NEXT: cvtsi2sd %rax, %xmm0 568; SSE2-NEXT: retq 569; 570; SSE41-LABEL: trunc_signed_f64_nsz: 571; SSE41: # %bb.0: 572; SSE41-NEXT: roundsd $11, %xmm0, %xmm0 573; SSE41-NEXT: retq 574; 575; X64_AVX1-LABEL: trunc_signed_f64_nsz: 576; X64_AVX1: # %bb.0: 577; X64_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 578; X64_AVX1-NEXT: retq 579; 580; X32_AVX1-LABEL: trunc_signed_f64_nsz: 581; X32_AVX1: # %bb.0: 582; X32_AVX1-NEXT: pushl %ebp 583; X32_AVX1-NEXT: movl %esp, %ebp 584; X32_AVX1-NEXT: andl $-8, %esp 585; X32_AVX1-NEXT: subl $8, %esp 586; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 587; X32_AVX1-NEXT: vroundsd $11, %xmm0, %xmm0, %xmm0 588; X32_AVX1-NEXT: vmovsd %xmm0, (%esp) 589; X32_AVX1-NEXT: fldl (%esp) 590; X32_AVX1-NEXT: movl %ebp, %esp 591; X32_AVX1-NEXT: popl %ebp 592; X32_AVX1-NEXT: retl 593 %i = fptosi double %x to i64 594 %r = sitofp i64 %i to double 595 ret double %r 596} 597 598define <4 x float> @trunc_signed_v4f32_nsz(<4 x float> %x) #0 { 599; SSE2-LABEL: trunc_signed_v4f32_nsz: 600; SSE2: # %bb.0: 601; SSE2-NEXT: cvttps2dq %xmm0, %xmm0 602; SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 603; SSE2-NEXT: retq 604; 605; SSE41-LABEL: trunc_signed_v4f32_nsz: 606; SSE41: # %bb.0: 607; SSE41-NEXT: roundps $11, %xmm0, %xmm0 608; SSE41-NEXT: retq 609; 610; X64_AVX1-LABEL: trunc_signed_v4f32_nsz: 611; X64_AVX1: # %bb.0: 612; X64_AVX1-NEXT: vroundps $11, %xmm0, %xmm0 613; X64_AVX1-NEXT: retq 614; 615; X32_AVX1-LABEL: trunc_signed_v4f32_nsz: 616; X32_AVX1: # %bb.0: 617; X32_AVX1-NEXT: vroundps $11, %xmm0, %xmm0 618; X32_AVX1-NEXT: retl 619 %i = fptosi <4 x float> %x to <4 x i32> 620 %r = sitofp <4 x i32> %i to <4 x float> 621 ret <4 x float> %r 622} 623 624define <2 x double> @trunc_signed_v2f64_nsz(<2 x double> %x) #0 { 625; SSE2-LABEL: trunc_signed_v2f64_nsz: 626; SSE2: # %bb.0: 627; SSE2-NEXT: cvttsd2si %xmm0, %rax 628; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 629; SSE2-NEXT: cvttsd2si %xmm0, %rcx 630; SSE2-NEXT: xorps %xmm0, %xmm0 631; SSE2-NEXT: cvtsi2sd %rax, %xmm0 632; SSE2-NEXT: cvtsi2sd %rcx, %xmm1 633; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 634; SSE2-NEXT: retq 635; 636; SSE41-LABEL: trunc_signed_v2f64_nsz: 637; SSE41: # %bb.0: 638; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 639; SSE41-NEXT: retq 640; 641; X64_AVX1-LABEL: trunc_signed_v2f64_nsz: 642; X64_AVX1: # %bb.0: 643; X64_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0 644; X64_AVX1-NEXT: retq 645; 646; X32_AVX1-LABEL: trunc_signed_v2f64_nsz: 647; X32_AVX1: # %bb.0: 648; X32_AVX1-NEXT: vroundpd $11, %xmm0, %xmm0 649; X32_AVX1-NEXT: retl 650 %i = fptosi <2 x double> %x to <2 x i64> 651 %r = sitofp <2 x i64> %i to <2 x double> 652 ret <2 x double> %r 653} 654 655define <4 x double> @trunc_signed_v4f64_nsz(<4 x double> %x) #0 { 656; SSE2-LABEL: trunc_signed_v4f64_nsz: 657; SSE2: # %bb.0: 658; SSE2-NEXT: cvttsd2si %xmm1, %rax 659; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] 660; SSE2-NEXT: cvttsd2si %xmm1, %rcx 661; SSE2-NEXT: cvttsd2si %xmm0, %rdx 662; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1,1] 663; SSE2-NEXT: cvttsd2si %xmm0, %rsi 664; SSE2-NEXT: xorps %xmm0, %xmm0 665; SSE2-NEXT: cvtsi2sd %rdx, %xmm0 666; SSE2-NEXT: xorps %xmm1, %xmm1 667; SSE2-NEXT: cvtsi2sd %rsi, %xmm1 668; SSE2-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 669; SSE2-NEXT: xorps %xmm1, %xmm1 670; SSE2-NEXT: cvtsi2sd %rax, %xmm1 671; SSE2-NEXT: cvtsi2sd %rcx, %xmm2 672; SSE2-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 673; SSE2-NEXT: retq 674; 675; SSE41-LABEL: trunc_signed_v4f64_nsz: 676; SSE41: # %bb.0: 677; SSE41-NEXT: roundpd $11, %xmm0, %xmm0 678; SSE41-NEXT: roundpd $11, %xmm1, %xmm1 679; SSE41-NEXT: retq 680; 681; X64_AVX1-LABEL: trunc_signed_v4f64_nsz: 682; X64_AVX1: # %bb.0: 683; X64_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 684; X64_AVX1-NEXT: retq 685; 686; X32_AVX1-LABEL: trunc_signed_v4f64_nsz: 687; X32_AVX1: # %bb.0: 688; X32_AVX1-NEXT: vroundpd $11, %ymm0, %ymm0 689; X32_AVX1-NEXT: retl 690 %i = fptosi <4 x double> %x to <4 x i64> 691 %r = sitofp <4 x i64> %i to <4 x double> 692 ret <4 x double> %r 693} 694 695; The FTRUNC ("round**" x86 asm) fold relies on UB in the case of overflow. 696; This used to be guarded with an attribute check. That allowed existing 697; code to continue working based on its assumptions that float->int 698; overflow had saturating behavior. 699; 700; Now, we expect a front-end to use IR intrinsics if it wants to avoid this 701; transform. 702 703define float @trunc_unsigned_f32_disable_via_intrinsic(float %x) #0 { 704; SSE-LABEL: trunc_unsigned_f32_disable_via_intrinsic: 705; SSE: # %bb.0: 706; SSE-NEXT: cvttss2si %xmm0, %rax 707; SSE-NEXT: xorl %ecx, %ecx 708; SSE-NEXT: xorps %xmm1, %xmm1 709; SSE-NEXT: ucomiss %xmm1, %xmm0 710; SSE-NEXT: cmovael %eax, %ecx 711; SSE-NEXT: ucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 712; SSE-NEXT: movl $-1, %eax 713; SSE-NEXT: cmovbel %ecx, %eax 714; SSE-NEXT: xorps %xmm0, %xmm0 715; SSE-NEXT: cvtsi2ss %rax, %xmm0 716; SSE-NEXT: retq 717; 718; X64_AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic: 719; X64_AVX1: # %bb.0: 720; X64_AVX1-NEXT: vcvttss2si %xmm0, %rax 721; X64_AVX1-NEXT: xorl %ecx, %ecx 722; X64_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 723; X64_AVX1-NEXT: vucomiss %xmm1, %xmm0 724; X64_AVX1-NEXT: cmovael %eax, %ecx 725; X64_AVX1-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 726; X64_AVX1-NEXT: movl $-1, %eax 727; X64_AVX1-NEXT: cmovbel %ecx, %eax 728; X64_AVX1-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 729; X64_AVX1-NEXT: retq 730; 731; X32_AVX1-LABEL: trunc_unsigned_f32_disable_via_intrinsic: 732; X32_AVX1: # %bb.0: 733; X32_AVX1-NEXT: pushl %eax 734; X32_AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 735; X32_AVX1-NEXT: vcvttss2si %xmm0, %eax 736; X32_AVX1-NEXT: movl %eax, %ecx 737; X32_AVX1-NEXT: sarl $31, %ecx 738; X32_AVX1-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 739; X32_AVX1-NEXT: vcvttss2si %xmm1, %edx 740; X32_AVX1-NEXT: andl %ecx, %edx 741; X32_AVX1-NEXT: orl %eax, %edx 742; X32_AVX1-NEXT: xorl %eax, %eax 743; X32_AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 744; X32_AVX1-NEXT: vucomiss %xmm1, %xmm0 745; X32_AVX1-NEXT: cmovael %edx, %eax 746; X32_AVX1-NEXT: vucomiss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 747; X32_AVX1-NEXT: movl $-1, %ecx 748; X32_AVX1-NEXT: cmovbel %eax, %ecx 749; X32_AVX1-NEXT: vmovd %ecx, %xmm0 750; X32_AVX1-NEXT: vpor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 751; X32_AVX1-NEXT: vsubsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0 752; X32_AVX1-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 753; X32_AVX1-NEXT: vmovss %xmm0, (%esp) 754; X32_AVX1-NEXT: flds (%esp) 755; X32_AVX1-NEXT: popl %eax 756; X32_AVX1-NEXT: retl 757 %i = call i32 @llvm.fptoui.sat.i32.f32(float %x) 758 %r = uitofp i32 %i to float 759 ret float %r 760} 761 762define double @trunc_signed_f64_disable_via_intrinsic(double %x) #0 { 763; SSE-LABEL: trunc_signed_f64_disable_via_intrinsic: 764; SSE: # %bb.0: 765; SSE-NEXT: cvttsd2si %xmm0, %rax 766; SSE-NEXT: ucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 767; SSE-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF 768; SSE-NEXT: cmovbeq %rax, %rcx 769; SSE-NEXT: xorl %eax, %eax 770; SSE-NEXT: ucomisd %xmm0, %xmm0 771; SSE-NEXT: cmovnpq %rcx, %rax 772; SSE-NEXT: xorps %xmm0, %xmm0 773; SSE-NEXT: cvtsi2sd %rax, %xmm0 774; SSE-NEXT: retq 775; 776; X64_AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic: 777; X64_AVX1: # %bb.0: 778; X64_AVX1-NEXT: vcvttsd2si %xmm0, %rax 779; X64_AVX1-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 780; X64_AVX1-NEXT: movabsq $9223372036854775807, %rcx # imm = 0x7FFFFFFFFFFFFFFF 781; X64_AVX1-NEXT: cmovbeq %rax, %rcx 782; X64_AVX1-NEXT: xorl %eax, %eax 783; X64_AVX1-NEXT: vucomisd %xmm0, %xmm0 784; X64_AVX1-NEXT: cmovnpq %rcx, %rax 785; X64_AVX1-NEXT: vcvtsi2sd %rax, %xmm1, %xmm0 786; X64_AVX1-NEXT: retq 787; 788; X32_AVX1-LABEL: trunc_signed_f64_disable_via_intrinsic: 789; X32_AVX1: # %bb.0: 790; X32_AVX1-NEXT: pushl %ebp 791; X32_AVX1-NEXT: movl %esp, %ebp 792; X32_AVX1-NEXT: pushl %esi 793; X32_AVX1-NEXT: andl $-8, %esp 794; X32_AVX1-NEXT: subl $32, %esp 795; X32_AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 796; X32_AVX1-NEXT: vmovsd %xmm0, (%esp) 797; X32_AVX1-NEXT: fldl (%esp) 798; X32_AVX1-NEXT: fisttpll (%esp) 799; X32_AVX1-NEXT: xorl %eax, %eax 800; X32_AVX1-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 801; X32_AVX1-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 802; X32_AVX1-NEXT: movl $0, %edx 803; X32_AVX1-NEXT: jb .LBB19_2 804; X32_AVX1-NEXT: # %bb.1: 805; X32_AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx 806; X32_AVX1-NEXT: movl (%esp), %edx 807; X32_AVX1-NEXT: .LBB19_2: 808; X32_AVX1-NEXT: vucomisd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 809; X32_AVX1-NEXT: movl $-1, %esi 810; X32_AVX1-NEXT: cmovbel %edx, %esi 811; X32_AVX1-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF 812; X32_AVX1-NEXT: cmovbel %ecx, %edx 813; X32_AVX1-NEXT: vucomisd %xmm0, %xmm0 814; X32_AVX1-NEXT: cmovpl %eax, %edx 815; X32_AVX1-NEXT: cmovpl %eax, %esi 816; X32_AVX1-NEXT: vmovd %esi, %xmm0 817; X32_AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 818; X32_AVX1-NEXT: vmovq %xmm0, {{[0-9]+}}(%esp) 819; X32_AVX1-NEXT: fildll {{[0-9]+}}(%esp) 820; X32_AVX1-NEXT: fstpl {{[0-9]+}}(%esp) 821; X32_AVX1-NEXT: fldl {{[0-9]+}}(%esp) 822; X32_AVX1-NEXT: leal -4(%ebp), %esp 823; X32_AVX1-NEXT: popl %esi 824; X32_AVX1-NEXT: popl %ebp 825; X32_AVX1-NEXT: retl 826 %i = call i64 @llvm.fptosi.sat.i64.f64(double %x) 827 %r = sitofp i64 %i to double 828 ret double %r 829} 830 831attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" } 832