1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck %s --check-prefixes=SI 3; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck %s --check-prefixes=VI 4; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck %s --check-prefixes=EG 5 6declare float @llvm.fabs.f32(float) #1 7 8define amdgpu_kernel void @fp_to_sint_i32(i32 addrspace(1)* %out, float %in) { 9; SI-LABEL: fp_to_sint_i32: 10; SI: ; %bb.0: 11; SI-NEXT: s_load_dword s4, s[0:1], 0xb 12; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 13; SI-NEXT: s_mov_b32 s3, 0xf000 14; SI-NEXT: s_mov_b32 s2, -1 15; SI-NEXT: s_waitcnt lgkmcnt(0) 16; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 17; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 18; SI-NEXT: s_endpgm 19; 20; VI-LABEL: fp_to_sint_i32: 21; VI: ; %bb.0: 22; VI-NEXT: s_load_dword s2, s[0:1], 0x2c 23; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 24; VI-NEXT: s_mov_b32 s3, 0xf000 25; VI-NEXT: s_waitcnt lgkmcnt(0) 26; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 27; VI-NEXT: s_mov_b32 s2, -1 28; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 29; VI-NEXT: s_endpgm 30; 31; EG-LABEL: fp_to_sint_i32: 32; EG: ; %bb.0: 33; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 34; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 35; EG-NEXT: CF_END 36; EG-NEXT: PAD 37; EG-NEXT: ALU clause starting at 4: 38; EG-NEXT: TRUNC * T0.W, KC0[2].Z, 39; EG-NEXT: FLT_TO_INT T0.X, PV.W, 40; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 41; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 42 %conv = fptosi float %in to i32 43 store i32 %conv, i32 addrspace(1)* %out 44 ret void 45} 46 47define amdgpu_kernel void @fp_to_sint_i32_fabs(i32 addrspace(1)* %out, float %in) { 48; SI-LABEL: fp_to_sint_i32_fabs: 49; SI: ; %bb.0: 50; SI-NEXT: s_load_dword s4, s[0:1], 0xb 51; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 52; SI-NEXT: s_mov_b32 s3, 0xf000 53; SI-NEXT: s_mov_b32 s2, -1 54; SI-NEXT: s_waitcnt lgkmcnt(0) 55; SI-NEXT: v_cvt_i32_f32_e64 v0, |s4| 56; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 57; SI-NEXT: s_endpgm 58; 59; VI-LABEL: fp_to_sint_i32_fabs: 60; VI: ; %bb.0: 61; VI-NEXT: s_load_dword s2, s[0:1], 0x2c 62; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 63; VI-NEXT: s_mov_b32 s3, 0xf000 64; VI-NEXT: s_waitcnt lgkmcnt(0) 65; VI-NEXT: v_cvt_i32_f32_e64 v0, |s2| 66; VI-NEXT: s_mov_b32 s2, -1 67; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0 68; VI-NEXT: s_endpgm 69; 70; EG-LABEL: fp_to_sint_i32_fabs: 71; EG: ; %bb.0: 72; EG-NEXT: ALU 3, @4, KC0[CB0:0-32], KC1[] 73; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 74; EG-NEXT: CF_END 75; EG-NEXT: PAD 76; EG-NEXT: ALU clause starting at 4: 77; EG-NEXT: TRUNC * T0.W, |KC0[2].Z|, 78; EG-NEXT: FLT_TO_INT T0.X, PV.W, 79; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 80; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 81 %in.fabs = call float @llvm.fabs.f32(float %in) 82 %conv = fptosi float %in.fabs to i32 83 store i32 %conv, i32 addrspace(1)* %out 84 ret void 85} 86 87define amdgpu_kernel void @fp_to_sint_v2i32(<2 x i32> addrspace(1)* %out, <2 x float> %in) { 88; SI-LABEL: fp_to_sint_v2i32: 89; SI: ; %bb.0: 90; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb 91; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 92; SI-NEXT: s_mov_b32 s3, 0xf000 93; SI-NEXT: s_mov_b32 s2, -1 94; SI-NEXT: s_waitcnt lgkmcnt(0) 95; SI-NEXT: v_cvt_i32_f32_e32 v1, s5 96; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 97; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 98; SI-NEXT: s_endpgm 99; 100; VI-LABEL: fp_to_sint_v2i32: 101; VI: ; %bb.0: 102; VI-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x2c 103; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 104; VI-NEXT: s_waitcnt lgkmcnt(0) 105; VI-NEXT: v_cvt_i32_f32_e32 v1, s3 106; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 107; VI-NEXT: s_mov_b32 s3, 0xf000 108; VI-NEXT: s_mov_b32 s2, -1 109; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 110; VI-NEXT: s_endpgm 111; 112; EG-LABEL: fp_to_sint_v2i32: 113; EG: ; %bb.0: 114; EG-NEXT: ALU 5, @4, KC0[CB0:0-32], KC1[] 115; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 116; EG-NEXT: CF_END 117; EG-NEXT: PAD 118; EG-NEXT: ALU clause starting at 4: 119; EG-NEXT: TRUNC * T0.W, KC0[3].X, 120; EG-NEXT: FLT_TO_INT T0.Y, PV.W, 121; EG-NEXT: TRUNC * T0.W, KC0[2].W, 122; EG-NEXT: FLT_TO_INT T0.X, PV.W, 123; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 124; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 125 %result = fptosi <2 x float> %in to <2 x i32> 126 store <2 x i32> %result, <2 x i32> addrspace(1)* %out 127 ret void 128} 129 130define amdgpu_kernel void @fp_to_sint_v4i32(<4 x i32> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 131; SI-LABEL: fp_to_sint_v4i32: 132; SI: ; %bb.0: 133; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9 134; SI-NEXT: s_waitcnt lgkmcnt(0) 135; SI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 136; SI-NEXT: s_mov_b32 s3, 0xf000 137; SI-NEXT: s_mov_b32 s2, -1 138; SI-NEXT: s_waitcnt lgkmcnt(0) 139; SI-NEXT: v_cvt_i32_f32_e32 v3, s7 140; SI-NEXT: v_cvt_i32_f32_e32 v2, s6 141; SI-NEXT: v_cvt_i32_f32_e32 v1, s5 142; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 143; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 144; SI-NEXT: s_endpgm 145; 146; VI-LABEL: fp_to_sint_v4i32: 147; VI: ; %bb.0: 148; VI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x24 149; VI-NEXT: s_waitcnt lgkmcnt(0) 150; VI-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 151; VI-NEXT: s_mov_b32 s3, 0xf000 152; VI-NEXT: s_mov_b32 s2, -1 153; VI-NEXT: s_waitcnt lgkmcnt(0) 154; VI-NEXT: v_cvt_i32_f32_e32 v3, s7 155; VI-NEXT: v_cvt_i32_f32_e32 v2, s6 156; VI-NEXT: v_cvt_i32_f32_e32 v1, s5 157; VI-NEXT: v_cvt_i32_f32_e32 v0, s4 158; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 159; VI-NEXT: s_endpgm 160; 161; EG-LABEL: fp_to_sint_v4i32: 162; EG: ; %bb.0: 163; EG-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 164; EG-NEXT: TEX 0 @6 165; EG-NEXT: ALU 9, @9, KC0[CB0:0-32], KC1[] 166; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 1 167; EG-NEXT: CF_END 168; EG-NEXT: PAD 169; EG-NEXT: Fetch clause starting at 6: 170; EG-NEXT: VTX_READ_128 T0.XYZW, T0.X, 0, #1 171; EG-NEXT: ALU clause starting at 8: 172; EG-NEXT: MOV * T0.X, KC0[2].Z, 173; EG-NEXT: ALU clause starting at 9: 174; EG-NEXT: TRUNC T0.W, T0.W, 175; EG-NEXT: TRUNC * T1.W, T0.Z, 176; EG-NEXT: FLT_TO_INT * T0.W, PV.W, 177; EG-NEXT: FLT_TO_INT T0.Z, T1.W, 178; EG-NEXT: TRUNC * T1.W, T0.Y, 179; EG-NEXT: FLT_TO_INT T0.Y, PV.W, 180; EG-NEXT: TRUNC * T1.W, T0.X, 181; EG-NEXT: FLT_TO_INT T0.X, PV.W, 182; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 183; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 184 %value = load <4 x float>, <4 x float> addrspace(1) * %in 185 %result = fptosi <4 x float> %value to <4 x i32> 186 store <4 x i32> %result, <4 x i32> addrspace(1)* %out 187 ret void 188} 189 190; Check that the compiler doesn't crash with a "cannot select" error 191define amdgpu_kernel void @fp_to_sint_i64 (i64 addrspace(1)* %out, float %in) { 192; SI-LABEL: fp_to_sint_i64: 193; SI: ; %bb.0: ; %entry 194; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 195; SI-NEXT: s_load_dword s0, s[0:1], 0xb 196; SI-NEXT: s_mov_b32 s7, 0xf000 197; SI-NEXT: s_mov_b32 s6, -1 198; SI-NEXT: s_mov_b32 s1, 0x2f800000 199; SI-NEXT: s_mov_b32 s2, 0xcf800000 200; SI-NEXT: s_waitcnt lgkmcnt(0) 201; SI-NEXT: v_trunc_f32_e32 v0, s0 202; SI-NEXT: v_mul_f32_e64 v1, |v0|, s1 203; SI-NEXT: v_ashrrev_i32_e32 v2, 31, v0 204; SI-NEXT: v_floor_f32_e32 v1, v1 205; SI-NEXT: v_cvt_u32_f32_e32 v3, v1 206; SI-NEXT: v_fma_f32 v0, v1, s2, |v0| 207; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 208; SI-NEXT: v_xor_b32_e32 v1, v3, v2 209; SI-NEXT: v_xor_b32_e32 v0, v0, v2 210; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v2 211; SI-NEXT: v_subb_u32_e32 v1, vcc, v1, v2, vcc 212; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 213; SI-NEXT: s_endpgm 214; 215; VI-LABEL: fp_to_sint_i64: 216; VI: ; %bb.0: ; %entry 217; VI-NEXT: s_load_dword s2, s[0:1], 0x2c 218; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 219; VI-NEXT: s_mov_b32 s4, 0x2f800000 220; VI-NEXT: s_mov_b32 s5, 0xcf800000 221; VI-NEXT: s_mov_b32 s3, 0xf000 222; VI-NEXT: s_waitcnt lgkmcnt(0) 223; VI-NEXT: v_trunc_f32_e32 v0, s2 224; VI-NEXT: v_mul_f32_e64 v1, |v0|, s4 225; VI-NEXT: v_floor_f32_e32 v1, v1 226; VI-NEXT: v_fma_f32 v2, v1, s5, |v0| 227; VI-NEXT: v_cvt_u32_f32_e32 v2, v2 228; VI-NEXT: v_cvt_u32_f32_e32 v1, v1 229; VI-NEXT: v_ashrrev_i32_e32 v3, 31, v0 230; VI-NEXT: s_mov_b32 s2, -1 231; VI-NEXT: v_xor_b32_e32 v0, v2, v3 232; VI-NEXT: v_xor_b32_e32 v1, v1, v3 233; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v3 234; VI-NEXT: v_subb_u32_e32 v1, vcc, v1, v3, vcc 235; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 236; VI-NEXT: s_endpgm 237; 238; EG-LABEL: fp_to_sint_i64: 239; EG: ; %bb.0: ; %entry 240; EG-NEXT: ALU 40, @4, KC0[CB0:0-32], KC1[] 241; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 242; EG-NEXT: CF_END 243; EG-NEXT: PAD 244; EG-NEXT: ALU clause starting at 4: 245; EG-NEXT: MOV * T0.W, literal.x, 246; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 247; EG-NEXT: BFE_UINT T0.W, KC0[2].Z, literal.x, PV.W, 248; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.y, 249; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 250; EG-NEXT: OR_INT T1.W, PS, literal.x, 251; EG-NEXT: ADD_INT * T2.W, PV.W, literal.y, 252; EG-NEXT: 8388608(1.175494e-38), -150(nan) 253; EG-NEXT: ADD_INT T0.X, T0.W, literal.x, 254; EG-NEXT: SUB_INT T0.Y, literal.y, T0.W, 255; EG-NEXT: AND_INT T0.Z, PS, literal.z, 256; EG-NEXT: NOT_INT T0.W, PS, 257; EG-NEXT: LSHR * T3.W, PV.W, 1, 258; EG-NEXT: -127(nan), 150(2.101948e-43) 259; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 260; EG-NEXT: BIT_ALIGN_INT T1.X, 0.0, PS, PV.W, 261; EG-NEXT: LSHL T1.Y, T1.W, PV.Z, 262; EG-NEXT: AND_INT T0.Z, T2.W, literal.x, BS:VEC_120/SCL_212 263; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 264; EG-NEXT: AND_INT * T1.W, PV.Y, literal.x, 265; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 266; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 267; EG-NEXT: CNDE_INT T1.Z, PV.Z, PV.Y, 0.0, 268; EG-NEXT: CNDE_INT T0.W, PV.Z, PV.X, PV.Y, 269; EG-NEXT: SETGT_INT * T1.W, T0.X, literal.x, 270; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 271; EG-NEXT: CNDE_INT T0.Z, PS, 0.0, PV.W, 272; EG-NEXT: CNDE_INT T0.W, PS, PV.Y, PV.Z, 273; EG-NEXT: ASHR * T1.W, KC0[2].Z, literal.x, 274; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 275; EG-NEXT: XOR_INT T0.W, PV.W, PS, 276; EG-NEXT: XOR_INT * T2.W, PV.Z, PS, 277; EG-NEXT: SUB_INT T2.W, PS, T1.W, 278; EG-NEXT: SUBB_UINT * T3.W, PV.W, T1.W, 279; EG-NEXT: SUB_INT T2.W, PV.W, PS, 280; EG-NEXT: SETGT_INT * T3.W, 0.0, T0.X, 281; EG-NEXT: CNDE_INT T0.Y, PS, PV.W, 0.0, 282; EG-NEXT: SUB_INT * T0.W, T0.W, T1.W, 283; EG-NEXT: CNDE_INT T0.X, T3.W, PV.W, 0.0, 284; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 285; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 286entry: 287 %0 = fptosi float %in to i64 288 store i64 %0, i64 addrspace(1)* %out 289 ret void 290} 291 292define amdgpu_kernel void @fp_to_sint_v2i64(<2 x i64> addrspace(1)* %out, <2 x float> %x) { 293; SI-LABEL: fp_to_sint_v2i64: 294; SI: ; %bb.0: 295; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 296; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0xb 297; SI-NEXT: s_mov_b32 s7, 0xf000 298; SI-NEXT: s_mov_b32 s6, -1 299; SI-NEXT: s_mov_b32 s2, 0x2f800000 300; SI-NEXT: s_mov_b32 s3, 0xcf800000 301; SI-NEXT: s_waitcnt lgkmcnt(0) 302; SI-NEXT: v_trunc_f32_e32 v0, s1 303; SI-NEXT: v_trunc_f32_e32 v1, s0 304; SI-NEXT: v_mul_f32_e64 v2, |v0|, s2 305; SI-NEXT: v_ashrrev_i32_e32 v3, 31, v0 306; SI-NEXT: v_mul_f32_e64 v4, |v1|, s2 307; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v1 308; SI-NEXT: v_floor_f32_e32 v2, v2 309; SI-NEXT: v_floor_f32_e32 v4, v4 310; SI-NEXT: v_cvt_u32_f32_e32 v6, v2 311; SI-NEXT: v_fma_f32 v0, v2, s3, |v0| 312; SI-NEXT: v_cvt_u32_f32_e32 v2, v4 313; SI-NEXT: v_fma_f32 v1, v4, s3, |v1| 314; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 315; SI-NEXT: v_xor_b32_e32 v4, v6, v3 316; SI-NEXT: v_cvt_u32_f32_e32 v1, v1 317; SI-NEXT: v_xor_b32_e32 v6, v2, v5 318; SI-NEXT: v_xor_b32_e32 v0, v0, v3 319; SI-NEXT: v_xor_b32_e32 v1, v1, v5 320; SI-NEXT: v_sub_i32_e32 v2, vcc, v0, v3 321; SI-NEXT: v_subb_u32_e32 v3, vcc, v4, v3, vcc 322; SI-NEXT: v_sub_i32_e32 v0, vcc, v1, v5 323; SI-NEXT: v_subb_u32_e32 v1, vcc, v6, v5, vcc 324; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 325; SI-NEXT: s_endpgm 326; 327; VI-LABEL: fp_to_sint_v2i64: 328; VI: ; %bb.0: 329; VI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x2c 330; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 331; VI-NEXT: s_mov_b32 s6, 0x2f800000 332; VI-NEXT: s_mov_b32 s7, 0xcf800000 333; VI-NEXT: s_mov_b32 s3, 0xf000 334; VI-NEXT: s_waitcnt lgkmcnt(0) 335; VI-NEXT: v_trunc_f32_e32 v0, s5 336; VI-NEXT: v_mul_f32_e64 v1, |v0|, s6 337; VI-NEXT: v_floor_f32_e32 v1, v1 338; VI-NEXT: v_fma_f32 v2, v1, s7, |v0| 339; VI-NEXT: v_trunc_f32_e32 v4, s4 340; VI-NEXT: v_cvt_u32_f32_e32 v2, v2 341; VI-NEXT: v_mul_f32_e64 v3, |v4|, s6 342; VI-NEXT: v_cvt_u32_f32_e32 v1, v1 343; VI-NEXT: v_floor_f32_e32 v3, v3 344; VI-NEXT: v_cvt_u32_f32_e32 v5, v3 345; VI-NEXT: v_fma_f32 v3, v3, s7, |v4| 346; VI-NEXT: v_ashrrev_i32_e32 v0, 31, v0 347; VI-NEXT: v_cvt_u32_f32_e32 v6, v3 348; VI-NEXT: v_xor_b32_e32 v2, v2, v0 349; VI-NEXT: v_xor_b32_e32 v1, v1, v0 350; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v0 351; VI-NEXT: v_subb_u32_e32 v3, vcc, v1, v0, vcc 352; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v4 353; VI-NEXT: v_xor_b32_e32 v0, v6, v1 354; VI-NEXT: v_xor_b32_e32 v4, v5, v1 355; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 356; VI-NEXT: s_mov_b32 s2, -1 357; VI-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc 358; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 359; VI-NEXT: s_endpgm 360; 361; EG-LABEL: fp_to_sint_v2i64: 362; EG: ; %bb.0: 363; EG-NEXT: ALU 75, @4, KC0[CB0:0-32], KC1[] 364; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.XYZW, T0.X, 1 365; EG-NEXT: CF_END 366; EG-NEXT: PAD 367; EG-NEXT: ALU clause starting at 4: 368; EG-NEXT: MOV * T0.W, literal.x, 369; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 370; EG-NEXT: BFE_UINT * T1.W, KC0[2].W, literal.x, PV.W, 371; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 372; EG-NEXT: AND_INT T0.Z, KC0[2].W, literal.x, 373; EG-NEXT: BFE_UINT T0.W, KC0[3].X, literal.y, T0.W, 374; EG-NEXT: ADD_INT * T2.W, PV.W, literal.z, 375; EG-NEXT: 8388607(1.175494e-38), 23(3.222986e-44) 376; EG-NEXT: -150(nan), 0(0.000000e+00) 377; EG-NEXT: SUB_INT T0.X, literal.x, PV.W, 378; EG-NEXT: SUB_INT T0.Y, literal.x, T1.W, 379; EG-NEXT: AND_INT T1.Z, PS, literal.y, 380; EG-NEXT: OR_INT T3.W, PV.Z, literal.z, 381; EG-NEXT: AND_INT * T4.W, KC0[3].X, literal.w, 382; EG-NEXT: 150(2.101948e-43), 31(4.344025e-44) 383; EG-NEXT: 8388608(1.175494e-38), 8388607(1.175494e-38) 384; EG-NEXT: OR_INT T1.X, PS, literal.x, 385; EG-NEXT: LSHL T1.Y, PV.W, PV.Z, 386; EG-NEXT: AND_INT T0.Z, T2.W, literal.y, 387; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.W, PV.Y, 388; EG-NEXT: AND_INT * T5.W, PV.Y, literal.y, 389; EG-NEXT: 8388608(1.175494e-38), 32(4.484155e-44) 390; EG-NEXT: CNDE_INT T2.X, PS, PV.W, 0.0, 391; EG-NEXT: CNDE_INT T0.Y, PV.Z, PV.Y, 0.0, 392; EG-NEXT: ADD_INT T1.Z, T0.W, literal.x, 393; EG-NEXT: BIT_ALIGN_INT T4.W, 0.0, PV.X, T0.X, 394; EG-NEXT: AND_INT * T5.W, T0.X, literal.y, 395; EG-NEXT: -150(nan), 32(4.484155e-44) 396; EG-NEXT: CNDE_INT T0.X, PS, PV.W, 0.0, 397; EG-NEXT: NOT_INT T2.Y, T2.W, 398; EG-NEXT: AND_INT T2.Z, PV.Z, literal.x, 399; EG-NEXT: NOT_INT T2.W, PV.Z, 400; EG-NEXT: LSHR * T4.W, T1.X, 1, 401; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 402; EG-NEXT: LSHR T3.X, T3.W, 1, 403; EG-NEXT: ADD_INT T3.Y, T0.W, literal.x, BS:VEC_120/SCL_212 404; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, 405; EG-NEXT: LSHL T0.W, T1.X, PV.Z, 406; EG-NEXT: AND_INT * T2.W, T1.Z, literal.y, 407; EG-NEXT: -127(nan), 32(4.484155e-44) 408; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, 409; EG-NEXT: CNDE_INT T4.Y, PS, PV.Z, PV.W, 410; EG-NEXT: SETGT_INT T1.Z, PV.Y, literal.x, 411; EG-NEXT: BIT_ALIGN_INT T0.W, 0.0, PV.X, T2.Y, 412; EG-NEXT: ADD_INT * T1.W, T1.W, literal.y, 413; EG-NEXT: 23(3.222986e-44), -127(nan) 414; EG-NEXT: CNDE_INT T3.X, T0.Z, PV.W, T1.Y, 415; EG-NEXT: SETGT_INT T1.Y, PS, literal.x, 416; EG-NEXT: CNDE_INT T0.Z, PV.Z, 0.0, PV.Y, 417; EG-NEXT: CNDE_INT T0.W, PV.Z, T0.X, PV.X, 418; EG-NEXT: ASHR * T2.W, KC0[3].X, literal.y, 419; EG-NEXT: 23(3.222986e-44), 31(4.344025e-44) 420; EG-NEXT: XOR_INT T0.X, PV.W, PS, 421; EG-NEXT: XOR_INT T2.Y, PV.Z, PS, 422; EG-NEXT: CNDE_INT T0.Z, PV.Y, 0.0, PV.X, 423; EG-NEXT: CNDE_INT T0.W, PV.Y, T2.X, T0.Y, 424; EG-NEXT: ASHR * T3.W, KC0[2].W, literal.x, 425; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 426; EG-NEXT: XOR_INT T0.Y, PV.W, PS, 427; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, 428; EG-NEXT: SUB_INT T0.W, PV.Y, T2.W, 429; EG-NEXT: SUBB_UINT * T4.W, PV.X, T2.W, 430; EG-NEXT: SUB_INT T1.Y, PV.W, PS, 431; EG-NEXT: SETGT_INT T1.Z, 0.0, T3.Y, 432; EG-NEXT: SUB_INT T0.W, PV.Z, T3.W, 433; EG-NEXT: SUBB_UINT * T4.W, PV.Y, T3.W, 434; EG-NEXT: SUB_INT T0.Z, PV.W, PS, 435; EG-NEXT: SETGT_INT T0.W, 0.0, T1.W, 436; EG-NEXT: CNDE_INT * T1.W, PV.Z, PV.Y, 0.0, 437; EG-NEXT: CNDE_INT T1.Y, PV.W, PV.Z, 0.0, 438; EG-NEXT: SUB_INT * T2.W, T0.X, T2.W, 439; EG-NEXT: CNDE_INT T1.Z, T1.Z, PV.W, 0.0, 440; EG-NEXT: SUB_INT * T2.W, T0.Y, T3.W, 441; EG-NEXT: CNDE_INT T1.X, T0.W, PV.W, 0.0, 442; EG-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 443; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 444 %conv = fptosi <2 x float> %x to <2 x i64> 445 store <2 x i64> %conv, <2 x i64> addrspace(1)* %out 446 ret void 447} 448 449define amdgpu_kernel void @fp_to_sint_v4i64(<4 x i64> addrspace(1)* %out, <4 x float> %x) { 450; SI-LABEL: fp_to_sint_v4i64: 451; SI: ; %bb.0: 452; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9 453; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd 454; SI-NEXT: s_mov_b32 s7, 0xf000 455; SI-NEXT: s_mov_b32 s6, -1 456; SI-NEXT: s_mov_b32 s8, 0x2f800000 457; SI-NEXT: s_mov_b32 s9, 0xcf800000 458; SI-NEXT: s_waitcnt lgkmcnt(0) 459; SI-NEXT: v_trunc_f32_e32 v0, s1 460; SI-NEXT: v_trunc_f32_e32 v1, s0 461; SI-NEXT: v_trunc_f32_e32 v2, s3 462; SI-NEXT: v_trunc_f32_e32 v3, s2 463; SI-NEXT: v_mul_f32_e64 v4, |v0|, s8 464; SI-NEXT: v_ashrrev_i32_e32 v5, 31, v0 465; SI-NEXT: v_mul_f32_e64 v6, |v1|, s8 466; SI-NEXT: v_ashrrev_i32_e32 v7, 31, v1 467; SI-NEXT: v_mul_f32_e64 v8, |v2|, s8 468; SI-NEXT: v_ashrrev_i32_e32 v9, 31, v2 469; SI-NEXT: v_mul_f32_e64 v10, |v3|, s8 470; SI-NEXT: v_ashrrev_i32_e32 v11, 31, v3 471; SI-NEXT: v_floor_f32_e32 v4, v4 472; SI-NEXT: v_floor_f32_e32 v6, v6 473; SI-NEXT: v_floor_f32_e32 v8, v8 474; SI-NEXT: v_floor_f32_e32 v10, v10 475; SI-NEXT: v_cvt_u32_f32_e32 v12, v4 476; SI-NEXT: v_fma_f32 v0, v4, s9, |v0| 477; SI-NEXT: v_cvt_u32_f32_e32 v4, v6 478; SI-NEXT: v_fma_f32 v1, v6, s9, |v1| 479; SI-NEXT: v_cvt_u32_f32_e32 v6, v8 480; SI-NEXT: v_fma_f32 v2, v8, s9, |v2| 481; SI-NEXT: v_cvt_u32_f32_e32 v8, v10 482; SI-NEXT: v_fma_f32 v3, v10, s9, |v3| 483; SI-NEXT: v_cvt_u32_f32_e32 v0, v0 484; SI-NEXT: v_xor_b32_e32 v10, v12, v5 485; SI-NEXT: v_cvt_u32_f32_e32 v1, v1 486; SI-NEXT: v_xor_b32_e32 v4, v4, v7 487; SI-NEXT: v_cvt_u32_f32_e32 v2, v2 488; SI-NEXT: v_xor_b32_e32 v12, v6, v9 489; SI-NEXT: v_cvt_u32_f32_e32 v3, v3 490; SI-NEXT: v_xor_b32_e32 v8, v8, v11 491; SI-NEXT: v_xor_b32_e32 v0, v0, v5 492; SI-NEXT: v_xor_b32_e32 v1, v1, v7 493; SI-NEXT: v_xor_b32_e32 v6, v2, v9 494; SI-NEXT: v_xor_b32_e32 v13, v3, v11 495; SI-NEXT: v_sub_i32_e32 v2, vcc, v0, v5 496; SI-NEXT: v_subb_u32_e32 v3, vcc, v10, v5, vcc 497; SI-NEXT: v_sub_i32_e32 v0, vcc, v1, v7 498; SI-NEXT: v_subb_u32_e32 v1, vcc, v4, v7, vcc 499; SI-NEXT: v_sub_i32_e32 v6, vcc, v6, v9 500; SI-NEXT: v_subb_u32_e32 v7, vcc, v12, v9, vcc 501; SI-NEXT: v_sub_i32_e32 v4, vcc, v13, v11 502; SI-NEXT: v_subb_u32_e32 v5, vcc, v8, v11, vcc 503; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[4:7], 0 offset:16 504; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[4:7], 0 505; SI-NEXT: s_endpgm 506; 507; VI-LABEL: fp_to_sint_v4i64: 508; VI: ; %bb.0: 509; VI-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x34 510; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 511; VI-NEXT: s_mov_b32 s8, 0x2f800000 512; VI-NEXT: s_mov_b32 s9, 0xcf800000 513; VI-NEXT: s_mov_b32 s3, 0xf000 514; VI-NEXT: s_waitcnt lgkmcnt(0) 515; VI-NEXT: v_trunc_f32_e32 v0, s5 516; VI-NEXT: v_mul_f32_e64 v1, |v0|, s8 517; VI-NEXT: v_floor_f32_e32 v1, v1 518; VI-NEXT: v_fma_f32 v2, v1, s9, |v0| 519; VI-NEXT: v_cvt_u32_f32_e32 v2, v2 520; VI-NEXT: v_trunc_f32_e32 v4, s4 521; VI-NEXT: v_cvt_u32_f32_e32 v1, v1 522; VI-NEXT: v_mul_f32_e64 v3, |v4|, s8 523; VI-NEXT: v_floor_f32_e32 v3, v3 524; VI-NEXT: v_ashrrev_i32_e32 v0, 31, v0 525; VI-NEXT: v_cvt_u32_f32_e32 v5, v3 526; VI-NEXT: v_fma_f32 v3, v3, s9, |v4| 527; VI-NEXT: v_xor_b32_e32 v2, v2, v0 528; VI-NEXT: v_cvt_u32_f32_e32 v6, v3 529; VI-NEXT: v_xor_b32_e32 v1, v1, v0 530; VI-NEXT: v_sub_u32_e32 v2, vcc, v2, v0 531; VI-NEXT: v_subb_u32_e32 v3, vcc, v1, v0, vcc 532; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v4 533; VI-NEXT: v_xor_b32_e32 v4, v5, v1 534; VI-NEXT: v_trunc_f32_e32 v5, s7 535; VI-NEXT: v_xor_b32_e32 v0, v6, v1 536; VI-NEXT: v_mul_f32_e64 v6, |v5|, s8 537; VI-NEXT: v_floor_f32_e32 v6, v6 538; VI-NEXT: v_cvt_u32_f32_e32 v7, v6 539; VI-NEXT: v_fma_f32 v6, v6, s9, |v5| 540; VI-NEXT: v_cvt_u32_f32_e32 v6, v6 541; VI-NEXT: v_sub_u32_e32 v0, vcc, v0, v1 542; VI-NEXT: v_subb_u32_e32 v1, vcc, v4, v1, vcc 543; VI-NEXT: v_ashrrev_i32_e32 v4, 31, v5 544; VI-NEXT: v_trunc_f32_e32 v8, s6 545; VI-NEXT: v_xor_b32_e32 v5, v6, v4 546; VI-NEXT: v_mul_f32_e64 v6, |v8|, s8 547; VI-NEXT: v_floor_f32_e32 v6, v6 548; VI-NEXT: v_cvt_u32_f32_e32 v9, v6 549; VI-NEXT: v_fma_f32 v6, v6, s9, |v8| 550; VI-NEXT: v_cvt_u32_f32_e32 v10, v6 551; VI-NEXT: v_xor_b32_e32 v7, v7, v4 552; VI-NEXT: v_sub_u32_e32 v6, vcc, v5, v4 553; VI-NEXT: v_ashrrev_i32_e32 v5, 31, v8 554; VI-NEXT: v_subb_u32_e32 v7, vcc, v7, v4, vcc 555; VI-NEXT: v_xor_b32_e32 v4, v10, v5 556; VI-NEXT: v_xor_b32_e32 v8, v9, v5 557; VI-NEXT: v_sub_u32_e32 v4, vcc, v4, v5 558; VI-NEXT: s_mov_b32 s2, -1 559; VI-NEXT: v_subb_u32_e32 v5, vcc, v8, v5, vcc 560; VI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 561; VI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 562; VI-NEXT: s_endpgm 563; 564; EG-LABEL: fp_to_sint_v4i64: 565; EG: ; %bb.0: 566; EG-NEXT: ALU 101, @6, KC0[CB0:0-32], KC1[] 567; EG-NEXT: ALU 54, @108, KC0[CB0:0-32], KC1[] 568; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T4.XYZW, T0.X, 0 569; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T6.XYZW, T2.X, 1 570; EG-NEXT: CF_END 571; EG-NEXT: PAD 572; EG-NEXT: ALU clause starting at 6: 573; EG-NEXT: MOV * T0.W, literal.x, 574; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00) 575; EG-NEXT: BFE_UINT T1.W, KC0[4].X, literal.x, PV.W, 576; EG-NEXT: AND_INT * T2.W, KC0[4].X, literal.y, 577; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 578; EG-NEXT: OR_INT T0.Z, PS, literal.x, 579; EG-NEXT: BFE_UINT T2.W, KC0[3].Z, literal.y, T0.W, 580; EG-NEXT: ADD_INT * T3.W, PV.W, literal.z, 581; EG-NEXT: 8388608(1.175494e-38), 23(3.222986e-44) 582; EG-NEXT: -150(nan), 0(0.000000e+00) 583; EG-NEXT: ADD_INT T0.Y, PV.W, literal.x, 584; EG-NEXT: AND_INT T1.Z, PS, literal.y, 585; EG-NEXT: NOT_INT T4.W, PS, 586; EG-NEXT: LSHR * T5.W, PV.Z, 1, 587; EG-NEXT: -127(nan), 31(4.344025e-44) 588; EG-NEXT: ADD_INT T0.X, T1.W, literal.x, 589; EG-NEXT: BIT_ALIGN_INT T1.Y, 0.0, PS, PV.W, 590; EG-NEXT: AND_INT T2.Z, T3.W, literal.y, BS:VEC_201 591; EG-NEXT: LSHL T3.W, T0.Z, PV.Z, 592; EG-NEXT: SUB_INT * T1.W, literal.z, T1.W, 593; EG-NEXT: -127(nan), 32(4.484155e-44) 594; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 595; EG-NEXT: AND_INT T1.X, PS, literal.x, 596; EG-NEXT: BIT_ALIGN_INT T2.Y, 0.0, T0.Z, PS, 597; EG-NEXT: AND_INT T0.Z, KC0[3].Z, literal.y, 598; EG-NEXT: CNDE_INT T1.W, PV.Z, PV.Y, PV.W, 599; EG-NEXT: SETGT_INT * T4.W, PV.X, literal.z, 600; EG-NEXT: 32(4.484155e-44), 8388607(1.175494e-38) 601; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 602; EG-NEXT: CNDE_INT T2.X, PS, 0.0, PV.W, 603; EG-NEXT: OR_INT T1.Y, PV.Z, literal.x, 604; EG-NEXT: ADD_INT T0.Z, T2.W, literal.y, 605; EG-NEXT: CNDE_INT T1.W, PV.X, PV.Y, 0.0, 606; EG-NEXT: CNDE_INT * T3.W, T2.Z, T3.W, 0.0, 607; EG-NEXT: 8388608(1.175494e-38), -150(nan) 608; EG-NEXT: CNDE_INT T1.X, T4.W, PV.W, PS, 609; EG-NEXT: ASHR T2.Y, KC0[4].X, literal.x, 610; EG-NEXT: AND_INT T1.Z, PV.Z, literal.x, 611; EG-NEXT: NOT_INT T1.W, PV.Z, 612; EG-NEXT: LSHR * T3.W, PV.Y, 1, 613; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 614; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, 615; EG-NEXT: LSHL T3.Y, T1.Y, PV.Z, 616; EG-NEXT: XOR_INT T1.Z, PV.X, PV.Y, 617; EG-NEXT: XOR_INT T1.W, T2.X, PV.Y, 618; EG-NEXT: SUB_INT * T2.W, literal.x, T2.W, 619; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 620; EG-NEXT: AND_INT T1.X, T0.Z, literal.x, 621; EG-NEXT: AND_INT T4.Y, PS, literal.x, 622; EG-NEXT: BIT_ALIGN_INT T0.Z, 0.0, T1.Y, PS, BS:VEC_021/SCL_122 623; EG-NEXT: SUB_INT T1.W, PV.W, T2.Y, 624; EG-NEXT: SUBB_UINT * T2.W, PV.Z, T2.Y, 625; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 626; EG-NEXT: SUB_INT T2.X, PV.W, PS, 627; EG-NEXT: CNDE_INT T1.Y, PV.Y, PV.Z, 0.0, 628; EG-NEXT: CNDE_INT T0.Z, PV.X, T3.Y, 0.0, 629; EG-NEXT: CNDE_INT T1.W, PV.X, T3.X, T3.Y, BS:VEC_021/SCL_122 630; EG-NEXT: SETGT_INT * T2.W, T0.Y, literal.x, 631; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 632; EG-NEXT: BFE_UINT T1.X, KC0[3].W, literal.x, T0.W, 633; EG-NEXT: AND_INT T3.Y, KC0[3].W, literal.y, 634; EG-NEXT: CNDE_INT T2.Z, PS, 0.0, PV.W, 635; EG-NEXT: CNDE_INT T1.W, PS, PV.Y, PV.Z, 636; EG-NEXT: ASHR * T2.W, KC0[3].Z, literal.z, 637; EG-NEXT: 23(3.222986e-44), 8388607(1.175494e-38) 638; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 639; EG-NEXT: BFE_UINT T3.X, KC0[3].Y, literal.x, T0.W, 640; EG-NEXT: XOR_INT T1.Y, PV.W, PS, 641; EG-NEXT: XOR_INT T0.Z, PV.Z, PS, 642; EG-NEXT: OR_INT T0.W, PV.Y, literal.y, 643; EG-NEXT: SUB_INT * T1.W, literal.z, PV.X, 644; EG-NEXT: 23(3.222986e-44), 8388608(1.175494e-38) 645; EG-NEXT: 150(2.101948e-43), 0(0.000000e+00) 646; EG-NEXT: AND_INT T4.X, KC0[3].Y, literal.x, 647; EG-NEXT: AND_INT T3.Y, PS, literal.y, 648; EG-NEXT: BIT_ALIGN_INT T2.Z, 0.0, PV.W, PS, 649; EG-NEXT: SUB_INT T1.W, PV.Z, T2.W, 650; EG-NEXT: SUBB_UINT * T3.W, PV.Y, T2.W, 651; EG-NEXT: 8388607(1.175494e-38), 32(4.484155e-44) 652; EG-NEXT: SUB_INT T5.X, PV.W, PS, 653; EG-NEXT: SETGT_INT T0.Y, 0.0, T0.Y, 654; EG-NEXT: CNDE_INT T0.Z, PV.Y, PV.Z, 0.0, 655; EG-NEXT: OR_INT T1.W, PV.X, literal.x, 656; EG-NEXT: ADD_INT * T3.W, T3.X, literal.y, 657; EG-NEXT: 8388608(1.175494e-38), -150(nan) 658; EG-NEXT: ADD_INT T4.X, T3.X, literal.x, 659; EG-NEXT: SUB_INT T3.Y, literal.y, T3.X, 660; EG-NEXT: AND_INT T2.Z, PS, literal.z, 661; EG-NEXT: NOT_INT T4.W, PS, 662; EG-NEXT: LSHR * T5.W, PV.W, 1, 663; EG-NEXT: -127(nan), 150(2.101948e-43) 664; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 665; EG-NEXT: BIT_ALIGN_INT T3.X, 0.0, PS, PV.W, 666; EG-NEXT: LSHL T4.Y, T1.W, PV.Z, 667; EG-NEXT: AND_INT T2.Z, T3.W, literal.x, BS:VEC_120/SCL_212 668; EG-NEXT: BIT_ALIGN_INT T1.W, 0.0, T1.W, PV.Y, BS:VEC_021/SCL_122 669; EG-NEXT: AND_INT * T3.W, PV.Y, literal.x, 670; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 671; EG-NEXT: ADD_INT T6.X, T1.X, literal.x, 672; EG-NEXT: CNDE_INT T3.Y, PS, PV.W, 0.0, 673; EG-NEXT: CNDE_INT * T3.Z, PV.Z, PV.Y, 0.0, 674; EG-NEXT: -150(nan), 0(0.000000e+00) 675; EG-NEXT: ALU clause starting at 108: 676; EG-NEXT: CNDE_INT T1.W, T2.Z, T3.X, T4.Y, 677; EG-NEXT: SETGT_INT * T3.W, T4.X, literal.x, 678; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 679; EG-NEXT: CNDE_INT T3.X, PS, 0.0, PV.W, 680; EG-NEXT: CNDE_INT T3.Y, PS, T3.Y, T3.Z, 681; EG-NEXT: AND_INT T2.Z, T6.X, literal.x, 682; EG-NEXT: NOT_INT T1.W, T6.X, 683; EG-NEXT: LSHR * T3.W, T0.W, 1, 684; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 685; EG-NEXT: ASHR T7.X, KC0[3].Y, literal.x, 686; EG-NEXT: ADD_INT T4.Y, T1.X, literal.y, 687; EG-NEXT: BIT_ALIGN_INT T3.Z, 0.0, PS, PV.W, 688; EG-NEXT: LSHL T0.W, T0.W, PV.Z, 689; EG-NEXT: AND_INT * T1.W, T6.X, literal.z, 690; EG-NEXT: 31(4.344025e-44), -127(nan) 691; EG-NEXT: 32(4.484155e-44), 0(0.000000e+00) 692; EG-NEXT: CNDE_INT T1.X, PS, PV.W, 0.0, 693; EG-NEXT: CNDE_INT T5.Y, PS, PV.Z, PV.W, 694; EG-NEXT: SETGT_INT T2.Z, PV.Y, literal.x, 695; EG-NEXT: XOR_INT T0.W, T3.Y, PV.X, 696; EG-NEXT: XOR_INT * T1.W, T3.X, PV.X, 697; EG-NEXT: 23(3.222986e-44), 0(0.000000e+00) 698; EG-NEXT: SUB_INT T3.X, PS, T7.X, 699; EG-NEXT: SUBB_UINT T3.Y, PV.W, T7.X, 700; EG-NEXT: CNDE_INT T3.Z, PV.Z, 0.0, PV.Y, 701; EG-NEXT: CNDE_INT T1.W, PV.Z, T0.Z, PV.X, 702; EG-NEXT: ASHR * T3.W, KC0[3].W, literal.x, 703; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00) 704; EG-NEXT: XOR_INT T1.X, PV.W, PS, 705; EG-NEXT: XOR_INT T5.Y, PV.Z, PS, 706; EG-NEXT: SUB_INT T0.Z, PV.X, PV.Y, 707; EG-NEXT: SETGT_INT T1.W, 0.0, T4.X, BS:VEC_021/SCL_122 708; EG-NEXT: CNDE_INT * T6.W, T0.Y, T5.X, 0.0, 709; EG-NEXT: SETGT_INT T0.X, 0.0, T0.X, 710; EG-NEXT: CNDE_INT T6.Y, PV.W, PV.Z, 0.0, 711; EG-NEXT: SUB_INT T0.Z, T1.Y, T2.W, BS:VEC_021/SCL_122 712; EG-NEXT: SUB_INT T2.W, PV.Y, T3.W, 713; EG-NEXT: SUBB_UINT * T4.W, PV.X, T3.W, 714; EG-NEXT: SUB_INT T3.X, PV.W, PS, 715; EG-NEXT: SETGT_INT T1.Y, 0.0, T4.Y, 716; EG-NEXT: CNDE_INT T6.Z, T0.Y, PV.Z, 0.0, 717; EG-NEXT: SUB_INT T0.W, T0.W, T7.X, BS:VEC_021/SCL_122 718; EG-NEXT: CNDE_INT * T4.W, PV.X, T2.X, 0.0, 719; EG-NEXT: CNDE_INT T6.X, T1.W, PV.W, 0.0, 720; EG-NEXT: CNDE_INT T4.Y, PV.Y, PV.X, 0.0, 721; EG-NEXT: SUB_INT T0.W, T1.Z, T2.Y, 722; EG-NEXT: LSHR * T2.X, KC0[2].Y, literal.x, 723; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 724; EG-NEXT: CNDE_INT T4.Z, T0.X, PV.W, 0.0, 725; EG-NEXT: SUB_INT * T0.W, T1.X, T3.W, BS:VEC_120/SCL_212 726; EG-NEXT: CNDE_INT T4.X, T1.Y, PV.W, 0.0, 727; EG-NEXT: ADD_INT * T0.W, KC0[2].Y, literal.x, 728; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00) 729; EG-NEXT: LSHR * T0.X, PV.W, literal.x, 730; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 731 %conv = fptosi <4 x float> %x to <4 x i64> 732 store <4 x i64> %conv, <4 x i64> addrspace(1)* %out 733 ret void 734} 735 736define amdgpu_kernel void @fp_to_uint_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { 737; SI-LABEL: fp_to_uint_f32_to_i1: 738; SI: ; %bb.0: 739; SI-NEXT: s_load_dword s4, s[0:1], 0xb 740; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 741; SI-NEXT: s_mov_b32 s3, 0xf000 742; SI-NEXT: s_mov_b32 s2, -1 743; SI-NEXT: s_waitcnt lgkmcnt(0) 744; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, s4 745; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 746; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 747; SI-NEXT: s_endpgm 748; 749; VI-LABEL: fp_to_uint_f32_to_i1: 750; VI: ; %bb.0: 751; VI-NEXT: s_load_dword s4, s[0:1], 0x2c 752; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 753; VI-NEXT: s_mov_b32 s3, 0xf000 754; VI-NEXT: s_mov_b32 s2, -1 755; VI-NEXT: s_waitcnt lgkmcnt(0) 756; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, s4 757; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 758; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 759; VI-NEXT: s_endpgm 760; 761; EG-LABEL: fp_to_uint_f32_to_i1: 762; EG: ; %bb.0: 763; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 764; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 765; EG-NEXT: CF_END 766; EG-NEXT: PAD 767; EG-NEXT: ALU clause starting at 4: 768; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 769; EG-NEXT: SETE_DX10 * T1.W, KC0[2].Z, literal.y, 770; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00) 771; EG-NEXT: AND_INT T1.W, PS, 1, 772; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 773; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 774; EG-NEXT: LSHL T0.X, PV.W, PS, 775; EG-NEXT: LSHL * T0.W, literal.x, PS, 776; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 777; EG-NEXT: MOV T0.Y, 0.0, 778; EG-NEXT: MOV * T0.Z, 0.0, 779; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 780; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 781 %conv = fptosi float %in to i1 782 store i1 %conv, i1 addrspace(1)* %out 783 ret void 784} 785 786define amdgpu_kernel void @fp_to_uint_fabs_f32_to_i1(i1 addrspace(1)* %out, float %in) #0 { 787; SI-LABEL: fp_to_uint_fabs_f32_to_i1: 788; SI: ; %bb.0: 789; SI-NEXT: s_load_dword s4, s[0:1], 0xb 790; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 791; SI-NEXT: s_mov_b32 s3, 0xf000 792; SI-NEXT: s_mov_b32 s2, -1 793; SI-NEXT: s_waitcnt lgkmcnt(0) 794; SI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, |s4| 795; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 796; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0 797; SI-NEXT: s_endpgm 798; 799; VI-LABEL: fp_to_uint_fabs_f32_to_i1: 800; VI: ; %bb.0: 801; VI-NEXT: s_load_dword s4, s[0:1], 0x2c 802; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 803; VI-NEXT: s_mov_b32 s3, 0xf000 804; VI-NEXT: s_mov_b32 s2, -1 805; VI-NEXT: s_waitcnt lgkmcnt(0) 806; VI-NEXT: v_cmp_eq_f32_e64 s[4:5], -1.0, |s4| 807; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 808; VI-NEXT: buffer_store_byte v0, off, s[0:3], 0 809; VI-NEXT: s_endpgm 810; 811; EG-LABEL: fp_to_uint_fabs_f32_to_i1: 812; EG: ; %bb.0: 813; EG-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[] 814; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 815; EG-NEXT: CF_END 816; EG-NEXT: PAD 817; EG-NEXT: ALU clause starting at 4: 818; EG-NEXT: AND_INT T0.W, KC0[2].Y, literal.x, 819; EG-NEXT: SETE_DX10 * T1.W, |KC0[2].Z|, literal.y, 820; EG-NEXT: 3(4.203895e-45), -1082130432(-1.000000e+00) 821; EG-NEXT: AND_INT T1.W, PS, 1, 822; EG-NEXT: LSHL * T0.W, PV.W, literal.x, 823; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 824; EG-NEXT: LSHL T0.X, PV.W, PS, 825; EG-NEXT: LSHL * T0.W, literal.x, PS, 826; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00) 827; EG-NEXT: MOV T0.Y, 0.0, 828; EG-NEXT: MOV * T0.Z, 0.0, 829; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 830; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 831 %in.fabs = call float @llvm.fabs.f32(float %in) 832 %conv = fptosi float %in.fabs to i1 833 store i1 %conv, i1 addrspace(1)* %out 834 ret void 835} 836 837define amdgpu_kernel void @fp_to_sint_f32_i16(i16 addrspace(1)* %out, float %in) #0 { 838; SI-LABEL: fp_to_sint_f32_i16: 839; SI: ; %bb.0: 840; SI-NEXT: s_load_dword s4, s[0:1], 0xb 841; SI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 842; SI-NEXT: s_mov_b32 s3, 0xf000 843; SI-NEXT: s_mov_b32 s2, -1 844; SI-NEXT: s_waitcnt lgkmcnt(0) 845; SI-NEXT: v_cvt_i32_f32_e32 v0, s4 846; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 847; SI-NEXT: s_endpgm 848; 849; VI-LABEL: fp_to_sint_f32_i16: 850; VI: ; %bb.0: 851; VI-NEXT: s_load_dword s2, s[0:1], 0x2c 852; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 853; VI-NEXT: s_mov_b32 s3, 0xf000 854; VI-NEXT: s_waitcnt lgkmcnt(0) 855; VI-NEXT: v_cvt_i32_f32_e32 v0, s2 856; VI-NEXT: s_mov_b32 s2, -1 857; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 858; VI-NEXT: s_endpgm 859; 860; EG-LABEL: fp_to_sint_f32_i16: 861; EG: ; %bb.0: 862; EG-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] 863; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X 864; EG-NEXT: CF_END 865; EG-NEXT: PAD 866; EG-NEXT: ALU clause starting at 4: 867; EG-NEXT: TRUNC T0.W, KC0[2].Z, 868; EG-NEXT: AND_INT * T1.W, KC0[2].Y, literal.x, 869; EG-NEXT: 3(4.203895e-45), 0(0.000000e+00) 870; EG-NEXT: FLT_TO_INT * T0.W, PV.W, 871; EG-NEXT: AND_INT T0.W, PV.W, literal.x, 872; EG-NEXT: LSHL * T1.W, T1.W, literal.y, 873; EG-NEXT: 65535(9.183409e-41), 3(4.203895e-45) 874; EG-NEXT: LSHL T0.X, PV.W, PS, 875; EG-NEXT: LSHL * T0.W, literal.x, PS, 876; EG-NEXT: 65535(9.183409e-41), 0(0.000000e+00) 877; EG-NEXT: MOV T0.Y, 0.0, 878; EG-NEXT: MOV * T0.Z, 0.0, 879; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 880; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00) 881 %sint = fptosi float %in to i16 882 store i16 %sint, i16 addrspace(1)* %out 883 ret void 884} 885 886attributes #0 = { nounwind } 887attributes #1 = { nounwind readnone } 888