1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; 64-bit divides and rems should be split into a fast and slow path 5; where the fast path uses a 32-bit operation. 6 7define i64 @sdiv64(i64 %a, i64 %b) { 8; GFX9-LABEL: sdiv64: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 12; GFX9-NEXT: v_mov_b32_e32 v4, 0 13; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 14; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 15; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 16; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 17; GFX9-NEXT: s_cbranch_execz .LBB0_2 18; GFX9-NEXT: ; %bb.1: 19; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 20; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 21; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc 22; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 23; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 24; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 25; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 26; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 27; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc 28; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 29; GFX9-NEXT: v_rcp_f32_e32 v2, v2 30; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 31; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 32; GFX9-NEXT: v_trunc_f32_e32 v3, v3 33; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 34; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 35; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 36; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 37; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 38; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12 39; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2 40; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 41; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 42; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 43; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0 44; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc 45; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0 46; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 47; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc 48; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc 49; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 50; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 51; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2 52; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc 53; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12 54; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13 55; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 56; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 57; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0 58; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0 59; GFX9-NEXT: v_mul_hi_u32 v14, v13, v2 60; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0 61; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5 62; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 63; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 64; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 65; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc 66; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 67; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 68; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 69; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc 70; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v1 71; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v4 72; GFX9-NEXT: v_xor_b32_e32 v6, v0, v4 73; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v1, v4, vcc 74; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v3, 0 75; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 76; GFX9-NEXT: v_xor_b32_e32 v5, v5, v4 77; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 78; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 79; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v2, 0 80; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v5, v3, 0 81; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 82; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 83; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc 84; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 85; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 86; GFX9-NEXT: v_mul_lo_u32 v7, v10, v2 87; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 88; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 89; GFX9-NEXT: v_add3_u32 v1, v1, v8, v7 90; GFX9-NEXT: v_sub_u32_e32 v7, v5, v1 91; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 92; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v7, v10, vcc 93; GFX9-NEXT: v_sub_co_u32_e64 v7, s[4:5], v0, v11 94; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[4:5], 0, v6, s[4:5] 95; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v6, v10 96; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 97; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v7, v11 98; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[4:5] 99; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v6, v10 100; GFX9-NEXT: v_cndmask_b32_e64 v6, v8, v7, s[4:5] 101; GFX9-NEXT: v_add_co_u32_e64 v7, s[4:5], 2, v2 102; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v5, v1, vcc 103; GFX9-NEXT: v_addc_co_u32_e64 v8, s[4:5], 0, v3, s[4:5] 104; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 105; GFX9-NEXT: v_add_co_u32_e64 v12, s[4:5], 1, v2 106; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 107; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 108; GFX9-NEXT: v_addc_co_u32_e64 v13, s[4:5], 0, v3, s[4:5] 109; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 110; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 111; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v6 112; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 113; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 114; GFX9-NEXT: v_cndmask_b32_e64 v1, v12, v7, s[4:5] 115; GFX9-NEXT: v_cndmask_b32_e64 v6, v13, v8, s[4:5] 116; GFX9-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc 117; GFX9-NEXT: v_xor_b32_e32 v2, v4, v9 118; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v6, vcc 119; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 120; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 121; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v1, v2 122; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v0, v2, vcc 123; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 124; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 125; GFX9-NEXT: .LBB0_2: ; %Flow 126; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 127; GFX9-NEXT: s_cbranch_execz .LBB0_4 128; GFX9-NEXT: ; %bb.3: 129; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 130; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 131; GFX9-NEXT: v_mov_b32_e32 v5, 0 132; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 133; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 134; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 135; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 136; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 137; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 138; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 139; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 140; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 141; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 142; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 143; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 144; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 145; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 146; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 147; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 148; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc 149; GFX9-NEXT: .LBB0_4: 150; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 151; GFX9-NEXT: v_mov_b32_e32 v0, v4 152; GFX9-NEXT: v_mov_b32_e32 v1, v5 153; GFX9-NEXT: s_setpc_b64 s[30:31] 154 %d = sdiv i64 %a, %b 155 ret i64 %d 156} 157 158define i64 @udiv64(i64 %a, i64 %b) { 159; GFX9-LABEL: udiv64: 160; GFX9: ; %bb.0: 161; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 163; GFX9-NEXT: v_mov_b32_e32 v4, 0 164; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 165; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 166; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 167; GFX9-NEXT: s_xor_b64 s[6:7], exec, s[4:5] 168; GFX9-NEXT: s_cbranch_execz .LBB1_2 169; GFX9-NEXT: ; %bb.1: 170; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 171; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 172; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 173; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 174; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 175; GFX9-NEXT: v_rcp_f32_e32 v4, v4 176; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 177; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 178; GFX9-NEXT: v_trunc_f32_e32 v5, v5 179; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 180; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 181; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 182; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 183; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 184; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 185; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 186; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 187; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 188; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 189; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 190; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v6, vcc 191; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 192; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 193; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc 194; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 195; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 196; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 197; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 198; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v8, v5, vcc 199; GFX9-NEXT: v_mul_lo_u32 v6, v10, v13 200; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 201; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 202; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 203; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0 204; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 205; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 206; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, 0 207; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 208; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 209; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 210; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 211; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc 212; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 213; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 214; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 215; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v13, v5, vcc 216; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 217; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 218; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 219; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 220; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 221; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 222; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 223; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 224; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 225; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v4, v6 226; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 227; GFX9-NEXT: v_mul_lo_u32 v8, v3, v6 228; GFX9-NEXT: v_mul_lo_u32 v9, v2, v7 229; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, 0 230; GFX9-NEXT: v_add3_u32 v5, v5, v9, v8 231; GFX9-NEXT: v_sub_u32_e32 v8, v1, v5 232; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 233; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v8, v3, vcc 234; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v2 235; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[4:5], 0, v4, s[4:5] 236; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v4, v3 237; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[4:5] 238; GFX9-NEXT: v_cmp_ge_u32_e64 s[4:5], v8, v2 239; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[4:5] 240; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], v4, v3 241; GFX9-NEXT: v_cndmask_b32_e64 v4, v9, v8, s[4:5] 242; GFX9-NEXT: v_add_co_u32_e64 v8, s[4:5], 2, v6 243; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 244; GFX9-NEXT: v_addc_co_u32_e64 v9, s[4:5], 0, v7, s[4:5] 245; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 246; GFX9-NEXT: v_add_co_u32_e64 v10, s[4:5], 1, v6 247; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 248; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 249; GFX9-NEXT: v_addc_co_u32_e64 v11, s[4:5], 0, v7, s[4:5] 250; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, -1, vcc 251; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 252; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v4 253; GFX9-NEXT: v_cndmask_b32_e32 v0, v5, v0, vcc 254; GFX9-NEXT: v_cndmask_b32_e64 v4, v11, v9, s[4:5] 255; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 256; GFX9-NEXT: v_cndmask_b32_e64 v0, v10, v8, s[4:5] 257; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v4, vcc 258; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v0, vcc 259; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 260; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 261; GFX9-NEXT: .LBB1_2: ; %Flow 262; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[6:7] 263; GFX9-NEXT: s_cbranch_execz .LBB1_4 264; GFX9-NEXT: ; %bb.3: 265; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 266; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 267; GFX9-NEXT: v_mov_b32_e32 v5, 0 268; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 269; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 270; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 271; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 272; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 273; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 274; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 275; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 276; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 277; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 278; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 279; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 280; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 281; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 282; GFX9-NEXT: v_add_u32_e32 v3, 1, v1 283; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 284; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v3, vcc 285; GFX9-NEXT: .LBB1_4: 286; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 287; GFX9-NEXT: v_mov_b32_e32 v0, v4 288; GFX9-NEXT: v_mov_b32_e32 v1, v5 289; GFX9-NEXT: s_setpc_b64 s[30:31] 290 %d = udiv i64 %a, %b 291 ret i64 %d 292} 293 294define i64 @srem64(i64 %a, i64 %b) { 295; GFX9-LABEL: srem64: 296; GFX9: ; %bb.0: 297; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 298; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 299; GFX9-NEXT: v_mov_b32_e32 v4, 0 300; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 301; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 302; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 303; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 304; GFX9-NEXT: s_cbranch_execz .LBB2_2 305; GFX9-NEXT: ; %bb.1: 306; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v3 307; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 308; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v4, vcc 309; GFX9-NEXT: v_xor_b32_e32 v9, v3, v4 310; GFX9-NEXT: v_xor_b32_e32 v10, v2, v4 311; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v10 312; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v9 313; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v10 314; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc 315; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 316; GFX9-NEXT: v_rcp_f32_e32 v2, v2 317; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 318; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 319; GFX9-NEXT: v_trunc_f32_e32 v3, v3 320; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 321; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 322; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v3 323; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 324; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 325; GFX9-NEXT: v_mul_lo_u32 v5, v7, v11 326; GFX9-NEXT: v_mul_hi_u32 v12, v6, v2 327; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 328; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 329; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v3 330; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v11, v2, 0 331; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v4, vcc 332; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v11, v5, 0 333; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v2 334; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v13, v3, vcc 335; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc 336; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 337; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 338; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v6, v2 339; GFX9-NEXT: v_addc_co_u32_e32 v11, vcc, v11, v3, vcc 340; GFX9-NEXT: v_mul_lo_u32 v4, v7, v11 341; GFX9-NEXT: v_mul_lo_u32 v5, v8, v12 342; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v12, 0 343; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 344; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v11, v5, 0 345; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v5, 0 346; GFX9-NEXT: v_mul_hi_u32 v13, v12, v2 347; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v11, v2, 0 348; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v5 349; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 350; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 351; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 352; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc 353; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 354; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 355; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v12, v2 356; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v11, v3, vcc 357; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v1 358; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v5 359; GFX9-NEXT: v_xor_b32_e32 v6, v0, v5 360; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v5, vcc 361; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v3, 0 362; GFX9-NEXT: v_mul_hi_u32 v7, v6, v2 363; GFX9-NEXT: v_xor_b32_e32 v4, v4, v5 364; GFX9-NEXT: v_add_co_u32_e32 v7, vcc, v7, v0 365; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 366; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 367; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 368; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v7, v0 369; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 370; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc 371; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2 372; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 373; GFX9-NEXT: v_mul_lo_u32 v2, v9, v0 374; GFX9-NEXT: v_mul_lo_u32 v3, v10, v1 375; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v10, v0, 0 376; GFX9-NEXT: v_add3_u32 v1, v1, v3, v2 377; GFX9-NEXT: v_sub_u32_e32 v2, v4, v1 378; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v6, v0 379; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, vcc 380; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v0, v10 381; GFX9-NEXT: v_subbrev_co_u32_e64 v6, s[6:7], 0, v2, s[4:5] 382; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v9 383; GFX9-NEXT: v_cndmask_b32_e64 v7, 0, -1, s[6:7] 384; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v3, v10 385; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 386; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v6, v9 387; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v2, v9, s[4:5] 388; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v8, s[6:7] 389; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v3, v10 390; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc 391; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] 392; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v9 393; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v7 394; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 395; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v10 396; GFX9-NEXT: v_cndmask_b32_e64 v2, v6, v2, s[4:5] 397; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc 398; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v9 399; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 400; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 401; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 402; GFX9-NEXT: v_cndmask_b32_e64 v2, v3, v8, s[4:5] 403; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 404; GFX9-NEXT: v_xor_b32_e32 v0, v0, v5 405; GFX9-NEXT: v_xor_b32_e32 v1, v1, v5 406; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v5 407; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v5, vcc 408; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 409; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 410; GFX9-NEXT: .LBB2_2: ; %Flow 411; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] 412; GFX9-NEXT: s_cbranch_execz .LBB2_4 413; GFX9-NEXT: ; %bb.3: 414; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 415; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 416; GFX9-NEXT: v_mov_b32_e32 v5, 0 417; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 418; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 419; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 420; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 421; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 422; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 423; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 424; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 425; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 426; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 427; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 428; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 429; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 430; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 431; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 432; GFX9-NEXT: .LBB2_4: 433; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 434; GFX9-NEXT: v_mov_b32_e32 v0, v4 435; GFX9-NEXT: v_mov_b32_e32 v1, v5 436; GFX9-NEXT: s_setpc_b64 s[30:31] 437 %d = srem i64 %a, %b 438 ret i64 %d 439} 440 441define i64 @urem64(i64 %a, i64 %b) { 442; GFX9-LABEL: urem64: 443; GFX9: ; %bb.0: 444; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 446; GFX9-NEXT: v_mov_b32_e32 v4, 0 447; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 448; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 449; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 450; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 451; GFX9-NEXT: s_cbranch_execz .LBB3_2 452; GFX9-NEXT: ; %bb.1: 453; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 454; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 455; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 456; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 457; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 458; GFX9-NEXT: v_rcp_f32_e32 v4, v4 459; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 460; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 461; GFX9-NEXT: v_trunc_f32_e32 v5, v5 462; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 463; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 464; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 465; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 466; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 467; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 468; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 469; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 470; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 471; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 472; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 473; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v6, vcc 474; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 475; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 476; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc 477; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 478; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 479; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 480; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 481; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v8, v5, vcc 482; GFX9-NEXT: v_mul_lo_u32 v6, v10, v13 483; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 484; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 485; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 486; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0 487; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 488; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 489; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, 0 490; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 491; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 492; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 493; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 494; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc 495; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 496; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 497; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 498; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v13, v5, vcc 499; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 500; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 501; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 502; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 503; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 504; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 505; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 506; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 507; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 508; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 509; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 510; GFX9-NEXT: v_mul_lo_u32 v6, v3, v4 511; GFX9-NEXT: v_mul_lo_u32 v7, v2, v5 512; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v4, 0 513; GFX9-NEXT: v_add3_u32 v5, v5, v7, v6 514; GFX9-NEXT: v_sub_u32_e32 v6, v1, v5 515; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 516; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v6, v3, vcc 517; GFX9-NEXT: v_sub_co_u32_e64 v6, s[4:5], v0, v2 518; GFX9-NEXT: v_subbrev_co_u32_e64 v7, s[6:7], 0, v4, s[4:5] 519; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v7, v3 520; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, -1, s[6:7] 521; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v6, v2 522; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 523; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, -1, s[6:7] 524; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v7, v3 525; GFX9-NEXT: v_subb_co_u32_e64 v4, s[4:5], v4, v3, s[4:5] 526; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 527; GFX9-NEXT: v_cndmask_b32_e64 v8, v8, v9, s[6:7] 528; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v6, v2 529; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 530; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 531; GFX9-NEXT: v_subbrev_co_u32_e64 v4, s[4:5], 0, v4, s[4:5] 532; GFX9-NEXT: v_cndmask_b32_e64 v2, 0, -1, vcc 533; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 534; GFX9-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v8 535; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 536; GFX9-NEXT: v_cndmask_b32_e64 v4, v7, v4, s[4:5] 537; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v2 538; GFX9-NEXT: v_cndmask_b32_e32 v5, v1, v4, vcc 539; GFX9-NEXT: v_cndmask_b32_e64 v1, v6, v9, s[4:5] 540; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 541; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 542; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 543; GFX9-NEXT: .LBB3_2: ; %Flow 544; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] 545; GFX9-NEXT: s_cbranch_execz .LBB3_4 546; GFX9-NEXT: ; %bb.3: 547; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 548; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 549; GFX9-NEXT: v_mov_b32_e32 v5, 0 550; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 551; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 552; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 553; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 554; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 555; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 556; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 557; GFX9-NEXT: v_mul_lo_u32 v1, v1, v2 558; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 559; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 560; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 561; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 562; GFX9-NEXT: v_sub_u32_e32 v1, v0, v2 563; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 564; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc 565; GFX9-NEXT: .LBB3_4: 566; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 567; GFX9-NEXT: v_mov_b32_e32 v0, v4 568; GFX9-NEXT: v_mov_b32_e32 v1, v5 569; GFX9-NEXT: s_setpc_b64 s[30:31] 570 %d = urem i64 %a, %b 571 ret i64 %d 572} 573 574define i32 @sdiv32(i32 %a, i32 %b) { 575; GFX9-LABEL: sdiv32: 576; GFX9: ; %bb.0: 577; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 578; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 579; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 580; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 581; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v1 582; GFX9-NEXT: v_sub_u32_e32 v4, 0, v1 583; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v0 584; GFX9-NEXT: v_add_u32_e32 v0, v0, v5 585; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 586; GFX9-NEXT: v_xor_b32_e32 v0, v0, v5 587; GFX9-NEXT: v_xor_b32_e32 v2, v5, v2 588; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 589; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 590; GFX9-NEXT: v_mul_lo_u32 v4, v4, v3 591; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 592; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 593; GFX9-NEXT: v_mul_hi_u32 v3, v0, v3 594; GFX9-NEXT: v_mul_lo_u32 v4, v3, v1 595; GFX9-NEXT: v_add_u32_e32 v5, 1, v3 596; GFX9-NEXT: v_sub_u32_e32 v0, v0, v4 597; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 598; GFX9-NEXT: v_sub_u32_e32 v4, v0, v1 599; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 600; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 601; GFX9-NEXT: v_add_u32_e32 v4, 1, v3 602; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 603; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v4, vcc 604; GFX9-NEXT: v_xor_b32_e32 v0, v0, v2 605; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 606; GFX9-NEXT: s_setpc_b64 s[30:31] 607 %d = sdiv i32 %a, %b 608 ret i32 %d 609} 610 611define i32 @udiv32(i32 %a, i32 %b) { 612; GFX9-LABEL: udiv32: 613; GFX9: ; %bb.0: 614; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 615; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 616; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 617; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 618; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 619; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 620; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 621; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 622; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 623; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 624; GFX9-NEXT: v_mul_lo_u32 v3, v2, v1 625; GFX9-NEXT: v_add_u32_e32 v4, 1, v2 626; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 627; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 628; GFX9-NEXT: v_sub_u32_e32 v3, v0, v1 629; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 630; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 631; GFX9-NEXT: v_add_u32_e32 v3, 1, v2 632; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 633; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 634; GFX9-NEXT: s_setpc_b64 s[30:31] 635 %d = udiv i32 %a, %b 636 ret i32 %d 637} 638 639define i32 @srem32(i32 %a, i32 %b) { 640; GFX9-LABEL: srem32: 641; GFX9: ; %bb.0: 642; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 644; GFX9-NEXT: v_add_u32_e32 v1, v1, v2 645; GFX9-NEXT: v_xor_b32_e32 v1, v1, v2 646; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 647; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 648; GFX9-NEXT: v_ashrrev_i32_e32 v4, 31, v0 649; GFX9-NEXT: v_add_u32_e32 v0, v0, v4 650; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 651; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4 652; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 653; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 654; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 655; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 656; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 657; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 658; GFX9-NEXT: v_mul_lo_u32 v2, v2, v1 659; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 660; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 661; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 662; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 663; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 664; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 665; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 666; GFX9-NEXT: v_xor_b32_e32 v0, v0, v4 667; GFX9-NEXT: v_sub_u32_e32 v0, v0, v4 668; GFX9-NEXT: s_setpc_b64 s[30:31] 669 %d = srem i32 %a, %b 670 ret i32 %d 671} 672 673define i32 @urem32(i32 %a, i32 %b) { 674; GFX9-LABEL: urem32: 675; GFX9: ; %bb.0: 676; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 677; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v1 678; GFX9-NEXT: v_sub_u32_e32 v3, 0, v1 679; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v2 680; GFX9-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 681; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2 682; GFX9-NEXT: v_mul_lo_u32 v3, v3, v2 683; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 684; GFX9-NEXT: v_add_u32_e32 v2, v2, v3 685; GFX9-NEXT: v_mul_hi_u32 v2, v0, v2 686; GFX9-NEXT: v_mul_lo_u32 v2, v2, v1 687; GFX9-NEXT: v_sub_u32_e32 v0, v0, v2 688; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 689; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 690; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 691; GFX9-NEXT: v_sub_u32_e32 v2, v0, v1 692; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v1 693; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 694; GFX9-NEXT: s_setpc_b64 s[30:31] 695 %d = urem i32 %a, %b 696 ret i32 %d 697} 698 699define <2 x i64> @sdivrem64(i64 %a, i64 %b) { 700; GFX9-LABEL: sdivrem64: 701; GFX9: ; %bb.0: 702; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 703; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 704; GFX9-NEXT: v_mov_b32_e32 v4, 0 705; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 706; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7 707; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 708; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 709; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5] 710; GFX9-NEXT: s_cbranch_execz .LBB8_2 711; GFX9-NEXT: ; %bb.1: 712; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3 713; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9 714; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc 715; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9 716; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9 717; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11 718; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10 719; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11 720; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc 721; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3 722; GFX9-NEXT: v_rcp_f32_e32 v2, v2 723; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2 724; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2 725; GFX9-NEXT: v_trunc_f32_e32 v3, v3 726; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3 727; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2 728; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3 729; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6 730; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0 731; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12 732; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2 733; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4 734; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0 735; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3 736; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0 737; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc 738; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0 739; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 740; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc 741; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc 742; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 743; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 744; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2 745; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc 746; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12 747; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13 748; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0 749; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5 750; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0 751; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0 752; GFX9-NEXT: v_mul_hi_u32 v14, v13, v2 753; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0 754; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5 755; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 756; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7 757; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc 758; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc 759; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3 760; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc 761; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2 762; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc 763; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v1 764; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v7 765; GFX9-NEXT: v_xor_b32_e32 v5, v0, v7 766; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v7, vcc 767; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0 768; GFX9-NEXT: v_mul_hi_u32 v6, v5, v2 769; GFX9-NEXT: v_xor_b32_e32 v4, v4, v7 770; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0 771; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc 772; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0 773; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0 774; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0 775; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc 776; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc 777; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2 778; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc 779; GFX9-NEXT: v_mul_lo_u32 v6, v10, v2 780; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3 781; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0 782; GFX9-NEXT: v_add3_u32 v1, v1, v8, v6 783; GFX9-NEXT: v_sub_u32_e32 v6, v4, v1 784; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v5, v0 785; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v6, v10, vcc 786; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v11 787; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[6:7], 0, v6, s[4:5] 788; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v10 789; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7] 790; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v11 791; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7] 792; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v10 793; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7] 794; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v2 795; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7] 796; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v2 797; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc 798; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7] 799; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10 800; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5 801; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc 802; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11 803; GFX9-NEXT: v_cndmask_b32_e64 v5, v16, v14, s[6:7] 804; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc 805; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10 806; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc 807; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 808; GFX9-NEXT: v_cndmask_b32_e64 v4, v15, v13, s[6:7] 809; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 810; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 811; GFX9-NEXT: v_xor_b32_e32 v5, v7, v9 812; GFX9-NEXT: v_xor_b32_e32 v2, v2, v5 813; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5 814; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v2, v5 815; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5] 816; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9] 817; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v8, v11 818; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5] 819; GFX9-NEXT: v_cndmask_b32_e64 v2, v12, v2, s[6:7] 820; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 821; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v3, s[6:7] 822; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 823; GFX9-NEXT: v_xor_b32_e32 v0, v0, v7 824; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7 825; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7 826; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc 827; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 828; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 829; GFX9-NEXT: .LBB8_2: ; %Flow 830; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[10:11] 831; GFX9-NEXT: s_cbranch_execz .LBB8_4 832; GFX9-NEXT: ; %bb.3: 833; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 834; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 835; GFX9-NEXT: v_mov_b32_e32 v5, 0 836; GFX9-NEXT: v_mov_b32_e32 v7, v5 837; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 838; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 839; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 840; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 841; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 842; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 843; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 844; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 845; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 846; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 847; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 848; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 849; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 850; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 851; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 852; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 853; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 854; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc 855; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc 856; GFX9-NEXT: .LBB8_4: 857; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 858; GFX9-NEXT: v_mov_b32_e32 v0, v4 859; GFX9-NEXT: v_mov_b32_e32 v1, v5 860; GFX9-NEXT: v_mov_b32_e32 v2, v6 861; GFX9-NEXT: v_mov_b32_e32 v3, v7 862; GFX9-NEXT: s_setpc_b64 s[30:31] 863 %d = sdiv i64 %a, %b 864 %r = srem i64 %a, %b 865 %ins.0 = insertelement <2 x i64> undef, i64 %d, i32 0 866 %ins.1 = insertelement <2 x i64> %ins.0, i64 %r, i32 1 867 ret <2 x i64> %ins.1 868} 869 870define <2 x i64> @udivrem64(i64 %a, i64 %b) { 871; GFX9-LABEL: udivrem64: 872; GFX9: ; %bb.0: 873; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 874; GFX9-NEXT: v_or_b32_e32 v5, v1, v3 875; GFX9-NEXT: v_mov_b32_e32 v4, 0 876; GFX9-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[4:5] 877; GFX9-NEXT: ; implicit-def: $vgpr6_vgpr7 878; GFX9-NEXT: ; implicit-def: $vgpr4_vgpr5 879; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 880; GFX9-NEXT: s_xor_b64 s[8:9], exec, s[4:5] 881; GFX9-NEXT: s_cbranch_execz .LBB9_2 882; GFX9-NEXT: ; %bb.1: 883; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v2 884; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3 885; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2 886; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc 887; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5 888; GFX9-NEXT: v_rcp_f32_e32 v4, v4 889; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4 890; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4 891; GFX9-NEXT: v_trunc_f32_e32 v5, v5 892; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5 893; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5 894; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4 895; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8 896; GFX9-NEXT: v_mul_lo_u32 v7, v11, v9 897; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v9, 0 898; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 899; GFX9-NEXT: v_mul_hi_u32 v12, v9, v4 900; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v9, v7, 0 901; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v12, v5 902; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v8, v4, 0 903; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, 0, v6, vcc 904; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v8, v7, 0 905; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v12, v4 906; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v13, v5, vcc 907; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 908; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6 909; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc 910; GFX9-NEXT: v_add_co_u32_e32 v12, vcc, v9, v4 911; GFX9-NEXT: v_addc_co_u32_e32 v13, vcc, v8, v5, vcc 912; GFX9-NEXT: v_mul_lo_u32 v6, v10, v13 913; GFX9-NEXT: v_mul_lo_u32 v7, v11, v12 914; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v10, v12, 0 915; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7 916; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v7, 0 917; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v7, 0 918; GFX9-NEXT: v_mul_hi_u32 v11, v12, v4 919; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v13, v4, 0 920; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v11, v7 921; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc 922; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9 923; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc 924; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc 925; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5 926; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc 927; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v12, v4 928; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, v13, v5, vcc 929; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v0, v7, 0 930; GFX9-NEXT: v_mul_hi_u32 v8, v0, v6 931; GFX9-NEXT: v_add_co_u32_e32 v8, vcc, v8, v4 932; GFX9-NEXT: v_addc_co_u32_e32 v9, vcc, 0, v5, vcc 933; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v1, v6, 0 934; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v1, v7, 0 935; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v8, v4 936; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v9, v5, vcc 937; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc 938; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v4, v6 939; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v5, vcc 940; GFX9-NEXT: v_mul_lo_u32 v8, v3, v6 941; GFX9-NEXT: v_mul_lo_u32 v9, v2, v7 942; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v2, v6, 0 943; GFX9-NEXT: v_add3_u32 v5, v5, v9, v8 944; GFX9-NEXT: v_sub_u32_e32 v8, v1, v5 945; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v0, v4 946; GFX9-NEXT: v_subb_co_u32_e64 v8, s[4:5], v8, v3, vcc 947; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v0, v2 948; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[6:7], 0, v8, s[4:5] 949; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v3 950; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, s[6:7] 951; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v2 952; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, -1, s[6:7] 953; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v10, v3 954; GFX9-NEXT: v_cndmask_b32_e64 v4, v4, v11, s[6:7] 955; GFX9-NEXT: v_add_co_u32_e64 v11, s[6:7], 2, v6 956; GFX9-NEXT: v_addc_co_u32_e64 v12, s[6:7], 0, v7, s[6:7] 957; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 1, v6 958; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v1, v5, vcc 959; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v7, s[6:7] 960; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v3 961; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v4 962; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, vcc 963; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 964; GFX9-NEXT: v_cndmask_b32_e64 v4, v14, v12, s[6:7] 965; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, vcc 966; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v3 967; GFX9-NEXT: v_subb_co_u32_e64 v3, s[4:5], v8, v3, s[4:5] 968; GFX9-NEXT: v_sub_co_u32_e64 v2, s[4:5], v9, v2 969; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc 970; GFX9-NEXT: v_subbrev_co_u32_e64 v3, s[4:5], 0, v3, s[4:5] 971; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 972; GFX9-NEXT: v_cndmask_b32_e64 v3, v10, v3, s[6:7] 973; GFX9-NEXT: v_cndmask_b32_e32 v5, v7, v4, vcc 974; GFX9-NEXT: v_cndmask_b32_e64 v4, v13, v11, s[6:7] 975; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 976; GFX9-NEXT: v_cndmask_b32_e64 v1, v9, v2, s[6:7] 977; GFX9-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 978; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v1, vcc 979; GFX9-NEXT: ; implicit-def: $vgpr2_vgpr3 980; GFX9-NEXT: ; implicit-def: $vgpr0_vgpr1 981; GFX9-NEXT: .LBB9_2: ; %Flow 982; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[8:9] 983; GFX9-NEXT: s_cbranch_execz .LBB9_4 984; GFX9-NEXT: ; %bb.3: 985; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 986; GFX9-NEXT: v_sub_u32_e32 v3, 0, v2 987; GFX9-NEXT: v_mov_b32_e32 v5, 0 988; GFX9-NEXT: v_mov_b32_e32 v7, v5 989; GFX9-NEXT: v_rcp_iflag_f32_e32 v1, v1 990; GFX9-NEXT: v_mul_f32_e32 v1, 0x4f7ffffe, v1 991; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1 992; GFX9-NEXT: v_mul_lo_u32 v3, v3, v1 993; GFX9-NEXT: v_mul_hi_u32 v3, v1, v3 994; GFX9-NEXT: v_add_u32_e32 v1, v1, v3 995; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 996; GFX9-NEXT: v_mul_lo_u32 v3, v1, v2 997; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 998; GFX9-NEXT: v_sub_u32_e32 v0, v0, v3 999; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 1000; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1001; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 1002; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 1003; GFX9-NEXT: v_sub_u32_e32 v3, v0, v2 1004; GFX9-NEXT: v_add_u32_e32 v4, 1, v1 1005; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v2 1006; GFX9-NEXT: v_cndmask_b32_e32 v6, v0, v3, vcc 1007; GFX9-NEXT: v_cndmask_b32_e32 v4, v1, v4, vcc 1008; GFX9-NEXT: .LBB9_4: 1009; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 1010; GFX9-NEXT: v_mov_b32_e32 v0, v4 1011; GFX9-NEXT: v_mov_b32_e32 v1, v5 1012; GFX9-NEXT: v_mov_b32_e32 v2, v6 1013; GFX9-NEXT: v_mov_b32_e32 v3, v7 1014; GFX9-NEXT: s_setpc_b64 s[30:31] 1015 %d = udiv i64 %a, %b 1016 %r = urem i64 %a, %b 1017 %ins.0 = insertelement <2 x i64> undef, i64 %d, i32 0 1018 %ins.1 = insertelement <2 x i64> %ins.0, i64 %r, i32 1 1019 ret <2 x i64> %ins.1 1020} 1021 1022define i64 @sdiv64_known32(i64 %a, i64 %b) { 1023; GFX9-LABEL: sdiv64_known32: 1024; GFX9: ; %bb.0: 1025; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1026; GFX9-NEXT: v_cvt_f32_u32_e32 v0, v3 1027; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v1 1028; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v0 1029; GFX9-NEXT: v_mul_f32_e32 v2, v1, v2 1030; GFX9-NEXT: v_trunc_f32_e32 v2, v2 1031; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v2 1032; GFX9-NEXT: v_mad_f32 v1, -v2, v0, v1 1033; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v1|, v0 1034; GFX9-NEXT: v_mov_b32_e32 v1, 0 1035; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc 1036; GFX9-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0 1037; GFX9-NEXT: s_setpc_b64 s[30:31] 1038 %a.ext = ashr i64 %a, 32 1039 %b.ext = ashr i64 %b, 32 1040 %d = udiv i64 %a.ext, %b.ext 1041 ret i64 %d 1042} 1043 1044define i64 @udiv64_known32(i64 %a, i64 %b) { 1045; GFX9-LABEL: udiv64_known32: 1046; GFX9: ; %bb.0: 1047; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1048; GFX9-NEXT: v_cvt_f32_u32_e32 v1, v2 1049; GFX9-NEXT: v_cvt_f32_u32_e32 v0, v0 1050; GFX9-NEXT: v_rcp_iflag_f32_e32 v2, v1 1051; GFX9-NEXT: v_mul_f32_e32 v2, v0, v2 1052; GFX9-NEXT: v_trunc_f32_e32 v2, v2 1053; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v2 1054; GFX9-NEXT: v_mad_f32 v0, -v2, v1, v0 1055; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v0|, v1 1056; GFX9-NEXT: v_mov_b32_e32 v1, 0 1057; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, 0, v3, vcc 1058; GFX9-NEXT: s_setpc_b64 s[30:31] 1059 %a.mask = and i64 %a, 4294967295 1060 %b.mask = and i64 %b, 4294967295 1061 %d = udiv i64 %a.mask, %b.mask 1062 ret i64 %d 1063} 1064